feat: initial commit for oil price scraper service

FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures) and competitor oil pricing from NewEnglandOil. Includes cron-driven scraping, PostgreSQL storage, and REST endpoints for price retrieval. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 17:57:44 -05:00
commit af9c2f99e7
25 changed files with 1566 additions and 0 deletions
--- a/app/newenglandoil/router.py
+++ b/app/newenglandoil/router.py
@@ -0,0 +1,140 @@
+import logging
+from datetime import datetime
+from typing import List
+
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+from sqlalchemy.exc import SQLAlchemyError
+
+from app.models import CompanyPrice
+from app.schemas import PriceRecord
+from app.newenglandoil.schemas import LatestPriceResponse
+from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
+from app.database import get_db
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(
+    prefix="/scraper/newenglandoil",
+    tags=["New England Oil Scraper"],
+)
+
+@router.get(
+    "/latestprice",
+    response_model=LatestPriceResponse,
+)
+async def get_latest_prices(db: Session = Depends(get_db)):
+    """
+    Scrape latest oil prices from New England Oil Zone 10.
+
+    This endpoint:
+    1. Scrapes the New England Oil website for current prices
+    2. Stores all prices in the database (historical tracking)
+    3. Returns the scraped prices
+
+    Designed to be called from cron for automated price tracking.
+
+    Example:
+        curl http://localhost:8000/scraper/newenglandoil/latestprice
+
+    Returns:
+        LatestPriceResponse with scraped prices and storage statistics
+    """
+    logger.info("=" * 60)
+    logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
+    logger.info("=" * 60)
+
+    try:
+        # Scrape the website
+        scraped_prices = scrape_newengland_oil()
+        
+        if not scraped_prices:
+            logger.warning("No prices were scraped")
+            return LatestPriceResponse(
+                status="warning",
+                message="No prices found on website",
+                prices_scraped=0,
+                prices_stored=0,
+                scrape_timestamp=datetime.utcnow().isoformat(),
+                prices=[]
+            )
+
+        # Store prices in database
+        stored_count = 0
+        price_records = []
+        
+        for price_data in scraped_prices:
+            # Add to response list (regardless of whether it's new or existing)
+            price_records.append(PriceRecord(
+                company_name=price_data["company_name"],
+                town=price_data.get("town"),
+                price_decimal=float(price_data["price_decimal"]),
+                scrape_date=price_data["scrape_date"].isoformat(),
+                zone=price_data.get("zone", "zone10"),
+            ))
+
+            try:
+                # Check for existing record to prevent duplicates
+                existing_record = db.query(CompanyPrice).filter(
+                    CompanyPrice.company_name == price_data["company_name"],
+                    CompanyPrice.scrape_date == price_data["scrape_date"],
+                    CompanyPrice.zone == price_data.get("zone", "zone10")
+                ).first()
+                
+                if existing_record:
+                    logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
+                    continue
+
+                # Create database record
+                price_record = CompanyPrice(
+                    company_name=price_data["company_name"],
+                    town=price_data.get("town"),
+                    price_decimal=price_data["price_decimal"],
+                    scrape_date=price_data["scrape_date"],
+                    zone=price_data.get("zone", "zone10"),
+                )
+                
+                db.add(price_record)
+                stored_count += 1
+                
+            except Exception as e:
+                logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")
+        
+        # Commit all new records
+        if stored_count > 0:
+            db.commit()
+            logger.info(f"Successfully stored {stored_count} new price records")
+        else:
+            logger.info("No new price records to store (all duplicates)")
+
+        return LatestPriceResponse(
+            status="success",
+            message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
+            prices_scraped=len(scraped_prices),
+            prices_stored=stored_count,
+            scrape_timestamp=datetime.utcnow().isoformat(),
+            prices=price_records,
+        )
+
+    except ScraperError as e:
+        logger.error(f"Scraper error: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Scraping failed: {str(e)}"
+        )
+
+    except SQLAlchemyError as e:
+        db.rollback()
+        logger.error(f"Database error during price storage: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Database error: {str(e)}"
+        )
+
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Scraping failed: {str(e)}"
+        )