import logging from datetime import datetime from typing import List from fastapi import APIRouter, Depends, HTTPException from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from app.models import CompanyPrice from app.schemas import PriceRecord from app.newenglandoil.schemas import LatestPriceResponse from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError from app.database import get_db logger = logging.getLogger(__name__) router = APIRouter( prefix="/scraper/newenglandoil", tags=["New England Oil Scraper"], ) @router.get( "/latestprice", response_model=LatestPriceResponse, ) async def get_latest_prices(db: Session = Depends(get_db)): """ Scrape latest oil prices from New England Oil Zone 10. This endpoint: 1. Scrapes the New England Oil website for current prices 2. Stores all prices in the database (historical tracking) 3. Returns the scraped prices Designed to be called from cron for automated price tracking. Example: curl http://localhost:8000/scraper/newenglandoil/latestprice Returns: LatestPriceResponse with scraped prices and storage statistics """ logger.info("=" * 60) logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10") logger.info("=" * 60) try: # Scrape the website scraped_prices = scrape_newengland_oil() if not scraped_prices: logger.warning("No prices were scraped") return LatestPriceResponse( status="warning", message="No prices found on website", prices_scraped=0, prices_stored=0, scrape_timestamp=datetime.utcnow().isoformat(), prices=[] ) # Store prices in database stored_count = 0 price_records = [] for price_data in scraped_prices: # Add to response list (regardless of whether it's new or existing) price_records.append(PriceRecord( company_name=price_data["company_name"], town=price_data.get("town"), price_decimal=float(price_data["price_decimal"]), scrape_date=price_data["scrape_date"].isoformat(), zone=price_data.get("zone", "zone10"), )) try: # Check for existing record to prevent duplicates existing_record = db.query(CompanyPrice).filter( CompanyPrice.company_name == price_data["company_name"], CompanyPrice.scrape_date == price_data["scrape_date"], CompanyPrice.zone == price_data.get("zone", "zone10") ).first() if existing_record: logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}") continue # Create database record price_record = CompanyPrice( company_name=price_data["company_name"], town=price_data.get("town"), price_decimal=price_data["price_decimal"], scrape_date=price_data["scrape_date"], zone=price_data.get("zone", "zone10"), ) db.add(price_record) stored_count += 1 except Exception as e: logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}") # Commit all new records if stored_count > 0: db.commit() logger.info(f"Successfully stored {stored_count} new price records") else: logger.info("No new price records to store (all duplicates)") return LatestPriceResponse( status="success", message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records", prices_scraped=len(scraped_prices), prices_stored=stored_count, scrape_timestamp=datetime.utcnow().isoformat(), prices=price_records, ) except ScraperError as e: logger.error(f"Scraper error: {e}") raise HTTPException( status_code=500, detail=f"Scraping failed: {str(e)}" ) except SQLAlchemyError as e: db.rollback() logger.error(f"Database error during price storage: {e}", exc_info=True) raise HTTPException( status_code=500, detail=f"Database error: {str(e)}" ) except Exception as e: db.rollback() logger.error(f"Unexpected error during scraping: {e}", exc_info=True) raise HTTPException( status_code=500, detail=f"Scraping failed: {str(e)}" )