Add ZONE_COUNTY_MAP for all 5 scraped states (42 zone-to-county entries). Scraper now resolves county_id at startup and assigns it to each record. Upsert logic deduplicates by (name, state, county_id) to prevent duplicates when multiple zones map to the same county. Also adds County model for DB lookups and fixes Rhode Island zone count (4, not 5). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
106 lines
3.9 KiB
Python
106 lines
3.9 KiB
Python
"""
|
|
Database operations module for oil price CRUD operations.
|
|
"""
|
|
import logging
|
|
from datetime import datetime
|
|
from sqlalchemy.orm import Session
|
|
|
|
import sys
|
|
import os
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
import models
|
|
|
|
|
|
def upsert_oil_price(db_session: Session, item_dict: dict) -> bool:
|
|
"""
|
|
Insert or update an oil price record.
|
|
|
|
Logic:
|
|
- Match by (name, state, county_id) when county_id is available to avoid
|
|
duplicates when multiple zones map to the same county.
|
|
- Fall back to (name, state, zone) when county_id is not available.
|
|
- If record exists with non-null company_id: skip (vendor-managed price)
|
|
- If record exists with null company_id and different price: update
|
|
- If record exists with same price: skip (no change)
|
|
- If no record exists: insert new
|
|
|
|
Args:
|
|
db_session: SQLAlchemy session
|
|
item_dict: Dictionary with state, zone, name, price, date, county_id
|
|
|
|
Returns:
|
|
True if a record was inserted or updated, False otherwise
|
|
"""
|
|
county_id = item_dict.get("county_id")
|
|
|
|
# Check if record already exists - prefer matching by county_id to avoid
|
|
# duplicates when multiple zones map to the same county
|
|
if county_id is not None:
|
|
existing_record = db_session.query(models.OilPrice).filter(
|
|
models.OilPrice.name == item_dict["name"],
|
|
models.OilPrice.state == item_dict["state"],
|
|
models.OilPrice.county_id == county_id
|
|
).first()
|
|
else:
|
|
existing_record = db_session.query(models.OilPrice).filter(
|
|
models.OilPrice.name == item_dict["name"],
|
|
models.OilPrice.state == item_dict["state"],
|
|
models.OilPrice.zone == item_dict["zone"]
|
|
).first()
|
|
|
|
if existing_record:
|
|
# Record exists - check if we should update
|
|
if existing_record.company_id is not None:
|
|
logging.debug(
|
|
f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
|
"due to non-null company_id"
|
|
)
|
|
return False
|
|
|
|
# Always update county_id if we have one and it differs
|
|
updated = False
|
|
if county_id is not None and existing_record.county_id != county_id:
|
|
existing_record.county_id = county_id
|
|
updated = True
|
|
|
|
# Company ID is null - check if price changed
|
|
if existing_record.price != item_dict["price"]:
|
|
existing_record.price = item_dict["price"]
|
|
existing_record.date = item_dict["date"]
|
|
existing_record.scrapetimestamp = datetime.utcnow()
|
|
logging.info(
|
|
f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
|
f"to {item_dict['price']}"
|
|
)
|
|
return True
|
|
elif updated:
|
|
existing_record.scrapetimestamp = datetime.utcnow()
|
|
logging.info(
|
|
f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
|
f"to {county_id}"
|
|
)
|
|
return True
|
|
else:
|
|
logging.debug(
|
|
f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}"
|
|
)
|
|
return False
|
|
else:
|
|
# No record exists - create new
|
|
oil_price_record = models.OilPrice(
|
|
state=item_dict["state"],
|
|
zone=item_dict["zone"],
|
|
name=item_dict["name"],
|
|
price=item_dict["price"],
|
|
date=item_dict["date"],
|
|
county_id=county_id,
|
|
scrapetimestamp=datetime.utcnow()
|
|
)
|
|
db_session.add(oil_price_record)
|
|
logging.info(
|
|
f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
|
f"(county_id={county_id})"
|
|
)
|
|
return True
|