FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures) and competitor oil pricing from NewEnglandOil. Includes cron-driven scraping, PostgreSQL storage, and REST endpoints for price retrieval. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
141 lines
4.8 KiB
Python
141 lines
4.8 KiB
Python
import logging
|
|
from datetime import datetime
|
|
from typing import List
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
|
|
from app.models import CompanyPrice
|
|
from app.schemas import PriceRecord
|
|
from app.newenglandoil.schemas import LatestPriceResponse
|
|
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
|
|
from app.database import get_db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(
|
|
prefix="/scraper/newenglandoil",
|
|
tags=["New England Oil Scraper"],
|
|
)
|
|
|
|
@router.get(
|
|
"/latestprice",
|
|
response_model=LatestPriceResponse,
|
|
)
|
|
async def get_latest_prices(db: Session = Depends(get_db)):
|
|
"""
|
|
Scrape latest oil prices from New England Oil Zone 10.
|
|
|
|
This endpoint:
|
|
1. Scrapes the New England Oil website for current prices
|
|
2. Stores all prices in the database (historical tracking)
|
|
3. Returns the scraped prices
|
|
|
|
Designed to be called from cron for automated price tracking.
|
|
|
|
Example:
|
|
curl http://localhost:8000/scraper/newenglandoil/latestprice
|
|
|
|
Returns:
|
|
LatestPriceResponse with scraped prices and storage statistics
|
|
"""
|
|
logger.info("=" * 60)
|
|
logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
|
|
logger.info("=" * 60)
|
|
|
|
try:
|
|
# Scrape the website
|
|
scraped_prices = scrape_newengland_oil()
|
|
|
|
if not scraped_prices:
|
|
logger.warning("No prices were scraped")
|
|
return LatestPriceResponse(
|
|
status="warning",
|
|
message="No prices found on website",
|
|
prices_scraped=0,
|
|
prices_stored=0,
|
|
scrape_timestamp=datetime.utcnow().isoformat(),
|
|
prices=[]
|
|
)
|
|
|
|
# Store prices in database
|
|
stored_count = 0
|
|
price_records = []
|
|
|
|
for price_data in scraped_prices:
|
|
# Add to response list (regardless of whether it's new or existing)
|
|
price_records.append(PriceRecord(
|
|
company_name=price_data["company_name"],
|
|
town=price_data.get("town"),
|
|
price_decimal=float(price_data["price_decimal"]),
|
|
scrape_date=price_data["scrape_date"].isoformat(),
|
|
zone=price_data.get("zone", "zone10"),
|
|
))
|
|
|
|
try:
|
|
# Check for existing record to prevent duplicates
|
|
existing_record = db.query(CompanyPrice).filter(
|
|
CompanyPrice.company_name == price_data["company_name"],
|
|
CompanyPrice.scrape_date == price_data["scrape_date"],
|
|
CompanyPrice.zone == price_data.get("zone", "zone10")
|
|
).first()
|
|
|
|
if existing_record:
|
|
logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
|
|
continue
|
|
|
|
# Create database record
|
|
price_record = CompanyPrice(
|
|
company_name=price_data["company_name"],
|
|
town=price_data.get("town"),
|
|
price_decimal=price_data["price_decimal"],
|
|
scrape_date=price_data["scrape_date"],
|
|
zone=price_data.get("zone", "zone10"),
|
|
)
|
|
|
|
db.add(price_record)
|
|
stored_count += 1
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")
|
|
|
|
# Commit all new records
|
|
if stored_count > 0:
|
|
db.commit()
|
|
logger.info(f"Successfully stored {stored_count} new price records")
|
|
else:
|
|
logger.info("No new price records to store (all duplicates)")
|
|
|
|
return LatestPriceResponse(
|
|
status="success",
|
|
message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
|
|
prices_scraped=len(scraped_prices),
|
|
prices_stored=stored_count,
|
|
scrape_timestamp=datetime.utcnow().isoformat(),
|
|
prices=price_records,
|
|
)
|
|
|
|
except ScraperError as e:
|
|
logger.error(f"Scraper error: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Scraping failed: {str(e)}"
|
|
)
|
|
|
|
except SQLAlchemyError as e:
|
|
db.rollback()
|
|
logger.error(f"Database error during price storage: {e}", exc_info=True)
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Database error: {str(e)}"
|
|
)
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Scraping failed: {str(e)}"
|
|
)
|