Files
crawler/fuel_scraper/db_operations.py
Edwin Eames 8f45f4c209 feat(CRIT-010): add zone-to-county mapping and county_id to oil_prices
Add ZONE_COUNTY_MAP for all 5 scraped states (42 zone-to-county entries).
Scraper now resolves county_id at startup and assigns it to each record.
Upsert logic deduplicates by (name, state, county_id) to prevent duplicates
when multiple zones map to the same county. Also adds County model for
DB lookups and fixes Rhode Island zone count (4, not 5).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 18:31:46 -05:00

106 lines
3.9 KiB
Python

"""
Database operations module for oil price CRUD operations.
"""
import logging
from datetime import datetime
from sqlalchemy.orm import Session
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import models
def upsert_oil_price(db_session: Session, item_dict: dict) -> bool:
"""
Insert or update an oil price record.
Logic:
- Match by (name, state, county_id) when county_id is available to avoid
duplicates when multiple zones map to the same county.
- Fall back to (name, state, zone) when county_id is not available.
- If record exists with non-null company_id: skip (vendor-managed price)
- If record exists with null company_id and different price: update
- If record exists with same price: skip (no change)
- If no record exists: insert new
Args:
db_session: SQLAlchemy session
item_dict: Dictionary with state, zone, name, price, date, county_id
Returns:
True if a record was inserted or updated, False otherwise
"""
county_id = item_dict.get("county_id")
# Check if record already exists - prefer matching by county_id to avoid
# duplicates when multiple zones map to the same county
if county_id is not None:
existing_record = db_session.query(models.OilPrice).filter(
models.OilPrice.name == item_dict["name"],
models.OilPrice.state == item_dict["state"],
models.OilPrice.county_id == county_id
).first()
else:
existing_record = db_session.query(models.OilPrice).filter(
models.OilPrice.name == item_dict["name"],
models.OilPrice.state == item_dict["state"],
models.OilPrice.zone == item_dict["zone"]
).first()
if existing_record:
# Record exists - check if we should update
if existing_record.company_id is not None:
logging.debug(
f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
"due to non-null company_id"
)
return False
# Always update county_id if we have one and it differs
updated = False
if county_id is not None and existing_record.county_id != county_id:
existing_record.county_id = county_id
updated = True
# Company ID is null - check if price changed
if existing_record.price != item_dict["price"]:
existing_record.price = item_dict["price"]
existing_record.date = item_dict["date"]
existing_record.scrapetimestamp = datetime.utcnow()
logging.info(
f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
f"to {item_dict['price']}"
)
return True
elif updated:
existing_record.scrapetimestamp = datetime.utcnow()
logging.info(
f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
f"to {county_id}"
)
return True
else:
logging.debug(
f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}"
)
return False
else:
# No record exists - create new
oil_price_record = models.OilPrice(
state=item_dict["state"],
zone=item_dict["zone"],
name=item_dict["name"],
price=item_dict["price"],
date=item_dict["date"],
county_id=county_id,
scrapetimestamp=datetime.utcnow()
)
db_session.add(oil_price_record)
logging.info(
f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
f"(county_id={county_id})"
)
return True