eamco_scraper/app/newenglandoil/router.py

import logging
from datetime import datetime
from typing import List

from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError

from app.models import CompanyPrice
from app.schemas import PriceRecord
from app.newenglandoil.schemas import LatestPriceResponse
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
from app.database import get_db

logger = logging.getLogger(__name__)

router = APIRouter(
    prefix="/scraper/newenglandoil",
    tags=["New England Oil Scraper"],
)

@router.get(
    "/latestprice",
    response_model=LatestPriceResponse,
)
async def get_latest_prices(db: Session = Depends(get_db)):
    """
    Scrape latest oil prices from New England Oil Zone 10.

    This endpoint:
    1. Scrapes the New England Oil website for current prices
    2. Stores all prices in the database (historical tracking)
    3. Returns the scraped prices

    Designed to be called from cron for automated price tracking.

    Example:
        curl http://localhost:8000/scraper/newenglandoil/latestprice

    Returns:
        LatestPriceResponse with scraped prices and storage statistics
    """
    logger.info("=" * 60)
    logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
    logger.info("=" * 60)

    try:
        # Scrape the website
        scraped_prices = scrape_newengland_oil()

        if not scraped_prices:
            logger.warning("No prices were scraped")
            return LatestPriceResponse(
                status="warning",
                message="No prices found on website",
                prices_scraped=0,
                prices_stored=0,
                scrape_timestamp=datetime.utcnow().isoformat(),
                prices=[]
            )

        # Store prices in database
        stored_count = 0
        price_records = []

        for price_data in scraped_prices:
            # Add to response list (regardless of whether it's new or existing)
            price_records.append(PriceRecord(
                company_name=price_data["company_name"],
                town=price_data.get("town"),
                price_decimal=float(price_data["price_decimal"]),
                scrape_date=price_data["scrape_date"].isoformat(),
                zone=price_data.get("zone", "zone10"),
            ))

            try:
                # Check for existing record to prevent duplicates
                existing_record = db.query(CompanyPrice).filter(
                    CompanyPrice.company_name == price_data["company_name"],
                    CompanyPrice.scrape_date == price_data["scrape_date"],
                    CompanyPrice.zone == price_data.get("zone", "zone10")
                ).first()

                if existing_record:
                    logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
                    continue

                # Create database record
                price_record = CompanyPrice(
                    company_name=price_data["company_name"],
                    town=price_data.get("town"),
                    price_decimal=price_data["price_decimal"],
                    scrape_date=price_data["scrape_date"],
                    zone=price_data.get("zone", "zone10"),
                )

                db.add(price_record)
                stored_count += 1

            except Exception as e:
                logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")

        # Commit all new records
        if stored_count > 0:
            db.commit()
            logger.info(f"Successfully stored {stored_count} new price records")
        else:
            logger.info("No new price records to store (all duplicates)")

        return LatestPriceResponse(
            status="success",
            message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
            prices_scraped=len(scraped_prices),
            prices_stored=stored_count,
            scrape_timestamp=datetime.utcnow().isoformat(),
            prices=price_records,
        )

    except ScraperError as e:
        logger.error(f"Scraper error: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Scraping failed: {str(e)}"
        )

    except SQLAlchemyError as e:
        db.rollback()
        logger.error(f"Database error during price storage: {e}", exc_info=True)
        raise HTTPException(
            status_code=500,
            detail=f"Database error: {str(e)}"
        )

    except Exception as e:
        db.rollback()
        logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
        raise HTTPException(
            status_code=500,
            detail=f"Scraping failed: {str(e)}"
        )