Files
Edwin Eames af9c2f99e7 feat: initial commit for oil price scraper service
FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures)
and competitor oil pricing from NewEnglandOil. Includes cron-driven
scraping, PostgreSQL storage, and REST endpoints for price retrieval.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 17:57:44 -05:00

141 lines
4.8 KiB
Python

import logging
from datetime import datetime
from typing import List
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from app.models import CompanyPrice
from app.schemas import PriceRecord
from app.newenglandoil.schemas import LatestPriceResponse
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
from app.database import get_db
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/scraper/newenglandoil",
tags=["New England Oil Scraper"],
)
@router.get(
"/latestprice",
response_model=LatestPriceResponse,
)
async def get_latest_prices(db: Session = Depends(get_db)):
"""
Scrape latest oil prices from New England Oil Zone 10.
This endpoint:
1. Scrapes the New England Oil website for current prices
2. Stores all prices in the database (historical tracking)
3. Returns the scraped prices
Designed to be called from cron for automated price tracking.
Example:
curl http://localhost:8000/scraper/newenglandoil/latestprice
Returns:
LatestPriceResponse with scraped prices and storage statistics
"""
logger.info("=" * 60)
logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
logger.info("=" * 60)
try:
# Scrape the website
scraped_prices = scrape_newengland_oil()
if not scraped_prices:
logger.warning("No prices were scraped")
return LatestPriceResponse(
status="warning",
message="No prices found on website",
prices_scraped=0,
prices_stored=0,
scrape_timestamp=datetime.utcnow().isoformat(),
prices=[]
)
# Store prices in database
stored_count = 0
price_records = []
for price_data in scraped_prices:
# Add to response list (regardless of whether it's new or existing)
price_records.append(PriceRecord(
company_name=price_data["company_name"],
town=price_data.get("town"),
price_decimal=float(price_data["price_decimal"]),
scrape_date=price_data["scrape_date"].isoformat(),
zone=price_data.get("zone", "zone10"),
))
try:
# Check for existing record to prevent duplicates
existing_record = db.query(CompanyPrice).filter(
CompanyPrice.company_name == price_data["company_name"],
CompanyPrice.scrape_date == price_data["scrape_date"],
CompanyPrice.zone == price_data.get("zone", "zone10")
).first()
if existing_record:
logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
continue
# Create database record
price_record = CompanyPrice(
company_name=price_data["company_name"],
town=price_data.get("town"),
price_decimal=price_data["price_decimal"],
scrape_date=price_data["scrape_date"],
zone=price_data.get("zone", "zone10"),
)
db.add(price_record)
stored_count += 1
except Exception as e:
logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")
# Commit all new records
if stored_count > 0:
db.commit()
logger.info(f"Successfully stored {stored_count} new price records")
else:
logger.info("No new price records to store (all duplicates)")
return LatestPriceResponse(
status="success",
message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
prices_scraped=len(scraped_prices),
prices_stored=stored_count,
scrape_timestamp=datetime.utcnow().isoformat(),
prices=price_records,
)
except ScraperError as e:
logger.error(f"Scraper error: {e}")
raise HTTPException(
status_code=500,
detail=f"Scraping failed: {str(e)}"
)
except SQLAlchemyError as e:
db.rollback()
logger.error(f"Database error during price storage: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Database error: {str(e)}"
)
except Exception as e:
db.rollback()
logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Scraping failed: {str(e)}"
)