Files
eamco_scraper/app/main.py
Edwin Eames fad7c1dc28 feat: add fallback to most recent prices when requested date has no data
- When no prices exist for the requested date, query for the most
  recent available date and return those prices instead
- Log informational message when falling back to alternate date
2026-02-27 18:41:12 -05:00

207 lines
6.3 KiB
Python

"""
eamco_scraper - FastAPI Oil Price Scraping Microservice.
This microservice provides endpoints for scraping oil prices from New England Oil
and storing them in the database for historical tracking.
Endpoints:
GET /health - Health check with database connectivity status
GET /scraper/newenglandoil/latestprice - Trigger scrape and return latest prices
Usage:
# Development
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
# Production (Docker)
docker run -p 8000:8000 eamco_scraper
# Trigger from cron
curl http://localhost:8000/scraper/newenglandoil/latestprice
"""
import logging
import os
import sys
from typing import List
from datetime import datetime
from fastapi import FastAPI, Depends, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
from app.config import (
DATABASE_URL,
CORS_ORIGINS,
LOG_LEVEL,
LOG_FORMAT,
)
from app.models import Base, CompanyPrice
from app.database import engine, get_db, check_db_connection
from app.schemas import HealthResponse, PriceRecord
from app.newenglandoil.router import router as newenglandoil_router
from app.priceticker.router import router as priceticker_router
# =============================================================================
# LOGGING CONFIGURATION
# =============================================================================
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
format=LOG_FORMAT,
handlers=[
logging.StreamHandler(sys.stdout),
]
)
logger = logging.getLogger(__name__)
# =============================================================================
# FASTAPI APPLICATION
# =============================================================================
app = FastAPI(
title="eamco_scraper",
description="Oil price scraping microservice for New England Oil",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc",
)
# =============================================================================
# CORS MIDDLEWARE
# =============================================================================
app.add_middleware(
CORSMiddleware,
allow_origins=CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =============================================================================
# ROUTERS
# =============================================================================
app.include_router(newenglandoil_router)
app.include_router(priceticker_router)
# =============================================================================
# ENDPOINTS
# =============================================================================
@app.get("/", include_in_schema=False)
async def root():
"""Root endpoint - redirect to docs."""
return {
"service": "eamco_scraper",
"version": "1.0.0",
"docs": "/docs",
}
@app.get("/health", response_model=HealthResponse, tags=["Health"])
async def health_check():
"""
Health check endpoint.
Returns service status and database connectivity.
Use this endpoint for container health checks and monitoring.
Returns:
HealthResponse with status and db_connected flag
"""
db_connected = check_db_connection()
return HealthResponse(
status="healthy" if db_connected else "degraded",
db_connected=db_connected,
)
@app.get(
"/scraper/prices",
response_model=List[PriceRecord],
tags=["Prices"],
)
async def get_stored_prices(
date: str | None = None,
db: Session = Depends(get_db)
):
"""
Get stored oil prices from the database.
If no date is provided, returns prices for the current date (UTC).
Does NOT trigger a scrape.
"""
if not date:
date = datetime.utcnow().date().isoformat()
try:
# Query prices for the specific date
prices = db.query(CompanyPrice).filter(
CompanyPrice.scrape_date == date
).all()
# Fallback: if no prices for requested date, get most recent ones
if not prices:
latest_date_result = db.query(CompanyPrice.scrape_date).order_by(
CompanyPrice.scrape_date.desc()
).first()
if latest_date_result:
prices = db.query(CompanyPrice).filter(
CompanyPrice.scrape_date == latest_date_result[0]
).all()
logger.info(f"No prices for {date}, returning {len(prices)} prices from {latest_date_result[0]}")
return [
PriceRecord(
company_name=p.company_name,
town=p.town,
price_decimal=float(p.price_decimal),
scrape_date=str(p.scrape_date),
zone=p.zone
) for p in prices
]
except Exception as e:
logger.error(f"Error fetching prices: {e}")
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# STARTUP/SHUTDOWN EVENTS
# =============================================================================
@app.on_event("startup")
async def startup_event():
"""Application startup - log configuration and test DB connection."""
logger.info("🚀 eamco_scraper STARTING")
mode = os.environ.get('MODE', 'DEVELOPMENT').upper()
if mode in ['DEVELOPMENT', 'DEV', 'LOCAL']:
logger.info("🤖🤖🤖🤖🤖 Mode: Development 🤖🤖🤖🤖🤖")
elif mode in ['PRODUCTION', 'PROD']:
logger.info("💀💀💀💀💀💀💀💀💀💀 ⚠️ WARNING PRODUCTION 💀💀💀💀💀💀💀💀💀💀")
logger.info(f"DB: {DATABASE_URL[:30]}...")
logger.info(f"CORS: {len(CORS_ORIGINS)} origins configured")
# Test database connection
if check_db_connection():
logger.info("DB Connection: ✅ OK")
else:
logger.warning("DB Connection: ❌ FAILED")
# Create tables if they don't exist
try:
Base.metadata.create_all(bind=engine)
logger.info("Database tables verified/created")
except Exception as e:
logger.error(f"Failed to create tables: {e}")
@app.on_event("shutdown")
async def shutdown_event():
"""Application shutdown - cleanup."""
logger.info("🛑 eamco_scraper SHUTTING DOWN")