""" Database operations module for oil price CRUD operations. """ import logging import sys import os from datetime import datetime # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from sqlalchemy.orm import Session from sqlalchemy import func import models def upsert_oil_price(db_session: Session, item_dict: dict, force_update_metadata: bool = False) -> bool: """ Insert or update an oil price record. Logic: - Match by (name, state, county_id) - case insensitive on name! - If county_id is None, fall back to (name, state, zone). - If match found: - If company_id is set: SKIP (vendor managed). - Update name to formatted version (e.g. "Leblanc Oil" vs "LEBLANC OIL"). - Update phone/url if missing OR force_update_metadata is True. - Update price/date if changed. - If no match: INSERT. Args: db_session: SQLAlchemy session item_dict: Dictionary with state, zone, name, price, date, county_id force_update_metadata: If True, overwrite existing phone/url """ county_id = item_dict.get("county_id") site_name = item_dict.get("site_name", "NewEnglandOil") name_clean = item_dict["name"].strip() # Query for existing record - Case Insensitive query = db_session.query(models.OilPrice).filter( func.lower(models.OilPrice.name) == name_clean.lower(), models.OilPrice.state == item_dict["state"] ) if county_id is not None: query = query.filter(models.OilPrice.county_id == county_id) else: query = query.filter(models.OilPrice.zone == item_dict["zone"]) existing_record = query.first() new_phone = item_dict.get("phone") new_url = item_dict.get("url") if existing_record: # Record exists if existing_record.company_id is not None: logging.debug( f"[{site_name}] Skipping update for {name_clean} (ID={existing_record.id}) " "due to non-null company_id" ) return False updated = False # 1. Update name casing if different (and new name looks "better" e.g. not all caps) # Simple heuristic: if existing is all caps and new is mixed, take new. if existing_record.name != name_clean: # We trust the scraper's _smart_title() output is generally good existing_record.name = name_clean updated = True # 2. Update county_id if we have one (scraper resolved it) and DB didn't have it if county_id is not None and existing_record.county_id != county_id: existing_record.county_id = county_id updated = True # 3. Backfill or Force Update phone/url if new_phone: if not existing_record.phone or (force_update_metadata and existing_record.phone != new_phone): existing_record.phone = new_phone updated = True if new_url: if not existing_record.url or (force_update_metadata and existing_record.url != new_url): existing_record.url = new_url updated = True # 4. Check Price Change # We compare as float provided logic is sound, but float equality can be tricky. # However, price is usually 2 decimals. if abs(existing_record.price - item_dict["price"]) > 0.001: existing_record.price = item_dict["price"] existing_record.date = item_dict["date"] existing_record.scrapetimestamp = datetime.utcnow() logging.info( f"[{site_name}] Updated price for {name_clean} (ID={existing_record.id}) " f"to {item_dict['price']}" ) return True elif updated: existing_record.scrapetimestamp = datetime.utcnow() logging.info( f"[{site_name}] Updated metadata for {name_clean} (ID={existing_record.id})" ) return True else: # No meaningful change logging.debug( f"[{site_name}] Price unchanged for {name_clean} in {item_dict['state']} zone {item_dict['zone']}" ) return False else: # Create new oil_price_record = models.OilPrice( state=item_dict["state"], zone=item_dict["zone"], name=name_clean, price=item_dict["price"], date=item_dict["date"], county_id=county_id, phone=new_phone, url=new_url, scrapetimestamp=datetime.utcnow() ) db_session.add(oil_price_record) logging.info( f"[{site_name}] Added new record for {name_clean} in {item_dict['state']} zone {item_dict['zone']} " f"(county_id={county_id})" ) return True