feat: initial commit for oil price scraper service
FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures) and competitor oil pricing from NewEnglandOil. Includes cron-driven scraping, PostgreSQL storage, and REST endpoints for price retrieval. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
140
app/newenglandoil/router.py
Normal file
140
app/newenglandoil/router.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from app.models import CompanyPrice
|
||||
from app.schemas import PriceRecord
|
||||
from app.newenglandoil.schemas import LatestPriceResponse
|
||||
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
|
||||
from app.database import get_db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/scraper/newenglandoil",
|
||||
tags=["New England Oil Scraper"],
|
||||
)
|
||||
|
||||
@router.get(
|
||||
"/latestprice",
|
||||
response_model=LatestPriceResponse,
|
||||
)
|
||||
async def get_latest_prices(db: Session = Depends(get_db)):
|
||||
"""
|
||||
Scrape latest oil prices from New England Oil Zone 10.
|
||||
|
||||
This endpoint:
|
||||
1. Scrapes the New England Oil website for current prices
|
||||
2. Stores all prices in the database (historical tracking)
|
||||
3. Returns the scraped prices
|
||||
|
||||
Designed to be called from cron for automated price tracking.
|
||||
|
||||
Example:
|
||||
curl http://localhost:8000/scraper/newenglandoil/latestprice
|
||||
|
||||
Returns:
|
||||
LatestPriceResponse with scraped prices and storage statistics
|
||||
"""
|
||||
logger.info("=" * 60)
|
||||
logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
|
||||
logger.info("=" * 60)
|
||||
|
||||
try:
|
||||
# Scrape the website
|
||||
scraped_prices = scrape_newengland_oil()
|
||||
|
||||
if not scraped_prices:
|
||||
logger.warning("No prices were scraped")
|
||||
return LatestPriceResponse(
|
||||
status="warning",
|
||||
message="No prices found on website",
|
||||
prices_scraped=0,
|
||||
prices_stored=0,
|
||||
scrape_timestamp=datetime.utcnow().isoformat(),
|
||||
prices=[]
|
||||
)
|
||||
|
||||
# Store prices in database
|
||||
stored_count = 0
|
||||
price_records = []
|
||||
|
||||
for price_data in scraped_prices:
|
||||
# Add to response list (regardless of whether it's new or existing)
|
||||
price_records.append(PriceRecord(
|
||||
company_name=price_data["company_name"],
|
||||
town=price_data.get("town"),
|
||||
price_decimal=float(price_data["price_decimal"]),
|
||||
scrape_date=price_data["scrape_date"].isoformat(),
|
||||
zone=price_data.get("zone", "zone10"),
|
||||
))
|
||||
|
||||
try:
|
||||
# Check for existing record to prevent duplicates
|
||||
existing_record = db.query(CompanyPrice).filter(
|
||||
CompanyPrice.company_name == price_data["company_name"],
|
||||
CompanyPrice.scrape_date == price_data["scrape_date"],
|
||||
CompanyPrice.zone == price_data.get("zone", "zone10")
|
||||
).first()
|
||||
|
||||
if existing_record:
|
||||
logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
|
||||
continue
|
||||
|
||||
# Create database record
|
||||
price_record = CompanyPrice(
|
||||
company_name=price_data["company_name"],
|
||||
town=price_data.get("town"),
|
||||
price_decimal=price_data["price_decimal"],
|
||||
scrape_date=price_data["scrape_date"],
|
||||
zone=price_data.get("zone", "zone10"),
|
||||
)
|
||||
|
||||
db.add(price_record)
|
||||
stored_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")
|
||||
|
||||
# Commit all new records
|
||||
if stored_count > 0:
|
||||
db.commit()
|
||||
logger.info(f"Successfully stored {stored_count} new price records")
|
||||
else:
|
||||
logger.info("No new price records to store (all duplicates)")
|
||||
|
||||
return LatestPriceResponse(
|
||||
status="success",
|
||||
message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
|
||||
prices_scraped=len(scraped_prices),
|
||||
prices_stored=stored_count,
|
||||
scrape_timestamp=datetime.utcnow().isoformat(),
|
||||
prices=price_records,
|
||||
)
|
||||
|
||||
except ScraperError as e:
|
||||
logger.error(f"Scraper error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Scraping failed: {str(e)}"
|
||||
)
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
db.rollback()
|
||||
logger.error(f"Database error during price storage: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Database error: {str(e)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Scraping failed: {str(e)}"
|
||||
)
|
||||
Reference in New Issue
Block a user