Files
eamco_scraper/app/main.py
Edwin Eames af9c2f99e7 feat: initial commit for oil price scraper service
FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures)
and competitor oil pricing from NewEnglandOil. Includes cron-driven
scraping, PostgreSQL storage, and REST endpoints for price retrieval.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 17:57:44 -05:00

196 lines
5.8 KiB
Python

"""
eamco_scraper - FastAPI Oil Price Scraping Microservice.
This microservice provides endpoints for scraping oil prices from New England Oil
and storing them in the database for historical tracking.
Endpoints:
GET /health - Health check with database connectivity status
GET /scraper/newenglandoil/latestprice - Trigger scrape and return latest prices
Usage:
# Development
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
# Production (Docker)
docker run -p 8000:8000 eamco_scraper
# Trigger from cron
curl http://localhost:8000/scraper/newenglandoil/latestprice
"""
import logging
import os
import sys
from typing import List
from datetime import datetime
from fastapi import FastAPI, Depends, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
from app.config import (
DATABASE_URL,
CORS_ORIGINS,
LOG_LEVEL,
LOG_FORMAT,
)
from app.models import Base, CompanyPrice
from app.database import engine, get_db, check_db_connection
from app.schemas import HealthResponse, PriceRecord
from app.newenglandoil.router import router as newenglandoil_router
from app.priceticker.router import router as priceticker_router
# =============================================================================
# LOGGING CONFIGURATION
# =============================================================================
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
format=LOG_FORMAT,
handlers=[
logging.StreamHandler(sys.stdout),
]
)
logger = logging.getLogger(__name__)
# =============================================================================
# FASTAPI APPLICATION
# =============================================================================
app = FastAPI(
title="eamco_scraper",
description="Oil price scraping microservice for New England Oil",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc",
)
# =============================================================================
# CORS MIDDLEWARE
# =============================================================================
app.add_middleware(
CORSMiddleware,
allow_origins=CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =============================================================================
# ROUTERS
# =============================================================================
app.include_router(newenglandoil_router)
app.include_router(priceticker_router)
# =============================================================================
# ENDPOINTS
# =============================================================================
@app.get("/", include_in_schema=False)
async def root():
"""Root endpoint - redirect to docs."""
return {
"service": "eamco_scraper",
"version": "1.0.0",
"docs": "/docs",
}
@app.get("/health", response_model=HealthResponse, tags=["Health"])
async def health_check():
"""
Health check endpoint.
Returns service status and database connectivity.
Use this endpoint for container health checks and monitoring.
Returns:
HealthResponse with status and db_connected flag
"""
db_connected = check_db_connection()
return HealthResponse(
status="healthy" if db_connected else "degraded",
db_connected=db_connected,
)
@app.get(
"/scraper/prices",
response_model=List[PriceRecord],
tags=["Prices"],
)
async def get_stored_prices(
date: str | None = None,
db: Session = Depends(get_db)
):
"""
Get stored oil prices from the database.
If no date is provided, returns prices for the current date (UTC).
Does NOT trigger a scrape.
"""
if not date:
date = datetime.utcnow().date().isoformat()
try:
# Query prices for the specific date
prices = db.query(CompanyPrice).filter(
CompanyPrice.scrape_date == date
).all()
return [
PriceRecord(
company_name=p.company_name,
town=p.town,
price_decimal=float(p.price_decimal),
scrape_date=str(p.scrape_date),
zone=p.zone
) for p in prices
]
except Exception as e:
logger.error(f"Error fetching prices: {e}")
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# STARTUP/SHUTDOWN EVENTS
# =============================================================================
@app.on_event("startup")
async def startup_event():
"""Application startup - log configuration and test DB connection."""
logger.info("🚀 eamco_scraper STARTING")
mode = os.environ.get('MODE', 'DEVELOPMENT').upper()
if mode in ['DEVELOPMENT', 'DEV', 'LOCAL']:
logger.info("🤖🤖🤖🤖🤖 Mode: Development 🤖🤖🤖🤖🤖")
elif mode in ['PRODUCTION', 'PROD']:
logger.info("💀💀💀💀💀💀💀💀💀💀 ⚠️ WARNING PRODUCTION 💀💀💀💀💀💀💀💀💀💀")
logger.info(f"DB: {DATABASE_URL[:30]}...")
logger.info(f"CORS: {len(CORS_ORIGINS)} origins configured")
# Test database connection
if check_db_connection():
logger.info("DB Connection: ✅ OK")
else:
logger.warning("DB Connection: ❌ FAILED")
# Create tables if they don't exist
try:
Base.metadata.create_all(bind=engine)
logger.info("Database tables verified/created")
except Exception as e:
logger.error(f"Failed to create tables: {e}")
@app.on_event("shutdown")
async def shutdown_event():
"""Application shutdown - cleanup."""
logger.info("🛑 eamco_scraper SHUTTING DOWN")