feat(CRIT-010): add zone-to-county mapping and county_id to oil_prices
Add ZONE_COUNTY_MAP for all 5 scraped states (42 zone-to-county entries). Scraper now resolves county_id at startup and assigns it to each record. Upsert logic deduplicates by (name, state, county_id) to prevent duplicates when multiple zones map to the same county. Also adds County model for DB lookups and fixes Rhode Island zone count (4, not 5). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
105
fuel_scraper/db_operations.py
Normal file
105
fuel_scraper/db_operations.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Database operations module for oil price CRUD operations.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
import sys
|
||||
import os
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import models
|
||||
|
||||
|
||||
def upsert_oil_price(db_session: Session, item_dict: dict) -> bool:
|
||||
"""
|
||||
Insert or update an oil price record.
|
||||
|
||||
Logic:
|
||||
- Match by (name, state, county_id) when county_id is available to avoid
|
||||
duplicates when multiple zones map to the same county.
|
||||
- Fall back to (name, state, zone) when county_id is not available.
|
||||
- If record exists with non-null company_id: skip (vendor-managed price)
|
||||
- If record exists with null company_id and different price: update
|
||||
- If record exists with same price: skip (no change)
|
||||
- If no record exists: insert new
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy session
|
||||
item_dict: Dictionary with state, zone, name, price, date, county_id
|
||||
|
||||
Returns:
|
||||
True if a record was inserted or updated, False otherwise
|
||||
"""
|
||||
county_id = item_dict.get("county_id")
|
||||
|
||||
# Check if record already exists - prefer matching by county_id to avoid
|
||||
# duplicates when multiple zones map to the same county
|
||||
if county_id is not None:
|
||||
existing_record = db_session.query(models.OilPrice).filter(
|
||||
models.OilPrice.name == item_dict["name"],
|
||||
models.OilPrice.state == item_dict["state"],
|
||||
models.OilPrice.county_id == county_id
|
||||
).first()
|
||||
else:
|
||||
existing_record = db_session.query(models.OilPrice).filter(
|
||||
models.OilPrice.name == item_dict["name"],
|
||||
models.OilPrice.state == item_dict["state"],
|
||||
models.OilPrice.zone == item_dict["zone"]
|
||||
).first()
|
||||
|
||||
if existing_record:
|
||||
# Record exists - check if we should update
|
||||
if existing_record.company_id is not None:
|
||||
logging.debug(
|
||||
f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
||||
"due to non-null company_id"
|
||||
)
|
||||
return False
|
||||
|
||||
# Always update county_id if we have one and it differs
|
||||
updated = False
|
||||
if county_id is not None and existing_record.county_id != county_id:
|
||||
existing_record.county_id = county_id
|
||||
updated = True
|
||||
|
||||
# Company ID is null - check if price changed
|
||||
if existing_record.price != item_dict["price"]:
|
||||
existing_record.price = item_dict["price"]
|
||||
existing_record.date = item_dict["date"]
|
||||
existing_record.scrapetimestamp = datetime.utcnow()
|
||||
logging.info(
|
||||
f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
||||
f"to {item_dict['price']}"
|
||||
)
|
||||
return True
|
||||
elif updated:
|
||||
existing_record.scrapetimestamp = datetime.utcnow()
|
||||
logging.info(
|
||||
f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
||||
f"to {county_id}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logging.debug(
|
||||
f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}"
|
||||
)
|
||||
return False
|
||||
else:
|
||||
# No record exists - create new
|
||||
oil_price_record = models.OilPrice(
|
||||
state=item_dict["state"],
|
||||
zone=item_dict["zone"],
|
||||
name=item_dict["name"],
|
||||
price=item_dict["price"],
|
||||
date=item_dict["date"],
|
||||
county_id=county_id,
|
||||
scrapetimestamp=datetime.utcnow()
|
||||
)
|
||||
db_session.add(oil_price_record)
|
||||
logging.info(
|
||||
f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
|
||||
f"(county_id={county_id})"
|
||||
)
|
||||
return True
|
||||
Reference in New Issue
Block a user