- When no prices exist for the requested date, query for the most recent available date and return those prices instead - Log informational message when falling back to alternate date
207 lines
6.3 KiB
Python
207 lines
6.3 KiB
Python
"""
|
|
eamco_scraper - FastAPI Oil Price Scraping Microservice.
|
|
|
|
This microservice provides endpoints for scraping oil prices from New England Oil
|
|
and storing them in the database for historical tracking.
|
|
|
|
Endpoints:
|
|
GET /health - Health check with database connectivity status
|
|
GET /scraper/newenglandoil/latestprice - Trigger scrape and return latest prices
|
|
|
|
Usage:
|
|
# Development
|
|
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
|
|
|
# Production (Docker)
|
|
docker run -p 8000:8000 eamco_scraper
|
|
|
|
# Trigger from cron
|
|
curl http://localhost:8000/scraper/newenglandoil/latestprice
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import sys
|
|
from typing import List
|
|
from datetime import datetime
|
|
|
|
from fastapi import FastAPI, Depends, HTTPException
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.config import (
|
|
DATABASE_URL,
|
|
CORS_ORIGINS,
|
|
LOG_LEVEL,
|
|
LOG_FORMAT,
|
|
)
|
|
from app.models import Base, CompanyPrice
|
|
from app.database import engine, get_db, check_db_connection
|
|
from app.schemas import HealthResponse, PriceRecord
|
|
from app.newenglandoil.router import router as newenglandoil_router
|
|
from app.priceticker.router import router as priceticker_router
|
|
|
|
|
|
# =============================================================================
|
|
# LOGGING CONFIGURATION
|
|
# =============================================================================
|
|
|
|
logging.basicConfig(
|
|
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
|
format=LOG_FORMAT,
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# =============================================================================
|
|
# FASTAPI APPLICATION
|
|
# =============================================================================
|
|
|
|
app = FastAPI(
|
|
title="eamco_scraper",
|
|
description="Oil price scraping microservice for New England Oil",
|
|
version="1.0.0",
|
|
docs_url="/docs",
|
|
redoc_url="/redoc",
|
|
)
|
|
|
|
# =============================================================================
|
|
# CORS MIDDLEWARE
|
|
# =============================================================================
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=CORS_ORIGINS,
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# =============================================================================
|
|
# ROUTERS
|
|
# =============================================================================
|
|
|
|
app.include_router(newenglandoil_router)
|
|
app.include_router(priceticker_router)
|
|
|
|
# =============================================================================
|
|
# ENDPOINTS
|
|
# =============================================================================
|
|
|
|
|
|
@app.get("/", include_in_schema=False)
|
|
async def root():
|
|
"""Root endpoint - redirect to docs."""
|
|
return {
|
|
"service": "eamco_scraper",
|
|
"version": "1.0.0",
|
|
"docs": "/docs",
|
|
}
|
|
|
|
|
|
@app.get("/health", response_model=HealthResponse, tags=["Health"])
|
|
async def health_check():
|
|
"""
|
|
Health check endpoint.
|
|
|
|
Returns service status and database connectivity.
|
|
Use this endpoint for container health checks and monitoring.
|
|
|
|
Returns:
|
|
HealthResponse with status and db_connected flag
|
|
"""
|
|
db_connected = check_db_connection()
|
|
|
|
return HealthResponse(
|
|
status="healthy" if db_connected else "degraded",
|
|
db_connected=db_connected,
|
|
)
|
|
|
|
|
|
@app.get(
|
|
"/scraper/prices",
|
|
response_model=List[PriceRecord],
|
|
tags=["Prices"],
|
|
)
|
|
async def get_stored_prices(
|
|
date: str | None = None,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Get stored oil prices from the database.
|
|
|
|
If no date is provided, returns prices for the current date (UTC).
|
|
Does NOT trigger a scrape.
|
|
"""
|
|
if not date:
|
|
date = datetime.utcnow().date().isoformat()
|
|
|
|
try:
|
|
# Query prices for the specific date
|
|
prices = db.query(CompanyPrice).filter(
|
|
CompanyPrice.scrape_date == date
|
|
).all()
|
|
|
|
# Fallback: if no prices for requested date, get most recent ones
|
|
if not prices:
|
|
latest_date_result = db.query(CompanyPrice.scrape_date).order_by(
|
|
CompanyPrice.scrape_date.desc()
|
|
).first()
|
|
if latest_date_result:
|
|
prices = db.query(CompanyPrice).filter(
|
|
CompanyPrice.scrape_date == latest_date_result[0]
|
|
).all()
|
|
logger.info(f"No prices for {date}, returning {len(prices)} prices from {latest_date_result[0]}")
|
|
|
|
return [
|
|
PriceRecord(
|
|
company_name=p.company_name,
|
|
town=p.town,
|
|
price_decimal=float(p.price_decimal),
|
|
scrape_date=str(p.scrape_date),
|
|
zone=p.zone
|
|
) for p in prices
|
|
]
|
|
except Exception as e:
|
|
logger.error(f"Error fetching prices: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# =============================================================================
|
|
# STARTUP/SHUTDOWN EVENTS
|
|
# =============================================================================
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup_event():
|
|
"""Application startup - log configuration and test DB connection."""
|
|
logger.info("🚀 eamco_scraper STARTING")
|
|
mode = os.environ.get('MODE', 'DEVELOPMENT').upper()
|
|
if mode in ['DEVELOPMENT', 'DEV', 'LOCAL']:
|
|
logger.info("🤖🤖🤖🤖🤖 Mode: Development 🤖🤖🤖🤖🤖")
|
|
elif mode in ['PRODUCTION', 'PROD']:
|
|
logger.info("💀💀💀💀💀💀💀💀💀💀 ⚠️ WARNING PRODUCTION 💀💀💀💀💀💀💀💀💀💀")
|
|
logger.info(f"DB: {DATABASE_URL[:30]}...")
|
|
logger.info(f"CORS: {len(CORS_ORIGINS)} origins configured")
|
|
|
|
# Test database connection
|
|
if check_db_connection():
|
|
logger.info("DB Connection: ✅ OK")
|
|
else:
|
|
logger.warning("DB Connection: ❌ FAILED")
|
|
|
|
# Create tables if they don't exist
|
|
try:
|
|
Base.metadata.create_all(bind=engine)
|
|
logger.info("Database tables verified/created")
|
|
except Exception as e:
|
|
logger.error(f"Failed to create tables: {e}")
|
|
|
|
|
|
@app.on_event("shutdown")
|
|
async def shutdown_event():
|
|
"""Application shutdown - cleanup."""
|
|
logger.info("🛑 eamco_scraper SHUTTING DOWN")
|