feat: initial commit for oil price scraper service
FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures) and competitor oil pricing from NewEnglandOil. Includes cron-driven scraping, PostgreSQL storage, and REST endpoints for price retrieval. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
195
app/main.py
Normal file
195
app/main.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
eamco_scraper - FastAPI Oil Price Scraping Microservice.
|
||||
|
||||
This microservice provides endpoints for scraping oil prices from New England Oil
|
||||
and storing them in the database for historical tracking.
|
||||
|
||||
Endpoints:
|
||||
GET /health - Health check with database connectivity status
|
||||
GET /scraper/newenglandoil/latestprice - Trigger scrape and return latest prices
|
||||
|
||||
Usage:
|
||||
# Development
|
||||
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||
|
||||
# Production (Docker)
|
||||
docker run -p 8000:8000 eamco_scraper
|
||||
|
||||
# Trigger from cron
|
||||
curl http://localhost:8000/scraper/newenglandoil/latestprice
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import FastAPI, Depends, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import (
|
||||
DATABASE_URL,
|
||||
CORS_ORIGINS,
|
||||
LOG_LEVEL,
|
||||
LOG_FORMAT,
|
||||
)
|
||||
from app.models import Base, CompanyPrice
|
||||
from app.database import engine, get_db, check_db_connection
|
||||
from app.schemas import HealthResponse, PriceRecord
|
||||
from app.newenglandoil.router import router as newenglandoil_router
|
||||
from app.priceticker.router import router as priceticker_router
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LOGGING CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
||||
format=LOG_FORMAT,
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================================================
|
||||
# FASTAPI APPLICATION
|
||||
# =============================================================================
|
||||
|
||||
app = FastAPI(
|
||||
title="eamco_scraper",
|
||||
description="Oil price scraping microservice for New England Oil",
|
||||
version="1.0.0",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# CORS MIDDLEWARE
|
||||
# =============================================================================
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=CORS_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# ROUTERS
|
||||
# =============================================================================
|
||||
|
||||
app.include_router(newenglandoil_router)
|
||||
app.include_router(priceticker_router)
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root():
|
||||
"""Root endpoint - redirect to docs."""
|
||||
return {
|
||||
"service": "eamco_scraper",
|
||||
"version": "1.0.0",
|
||||
"docs": "/docs",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse, tags=["Health"])
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint.
|
||||
|
||||
Returns service status and database connectivity.
|
||||
Use this endpoint for container health checks and monitoring.
|
||||
|
||||
Returns:
|
||||
HealthResponse with status and db_connected flag
|
||||
"""
|
||||
db_connected = check_db_connection()
|
||||
|
||||
return HealthResponse(
|
||||
status="healthy" if db_connected else "degraded",
|
||||
db_connected=db_connected,
|
||||
)
|
||||
|
||||
|
||||
@app.get(
|
||||
"/scraper/prices",
|
||||
response_model=List[PriceRecord],
|
||||
tags=["Prices"],
|
||||
)
|
||||
async def get_stored_prices(
|
||||
date: str | None = None,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get stored oil prices from the database.
|
||||
|
||||
If no date is provided, returns prices for the current date (UTC).
|
||||
Does NOT trigger a scrape.
|
||||
"""
|
||||
if not date:
|
||||
date = datetime.utcnow().date().isoformat()
|
||||
|
||||
try:
|
||||
# Query prices for the specific date
|
||||
prices = db.query(CompanyPrice).filter(
|
||||
CompanyPrice.scrape_date == date
|
||||
).all()
|
||||
|
||||
return [
|
||||
PriceRecord(
|
||||
company_name=p.company_name,
|
||||
town=p.town,
|
||||
price_decimal=float(p.price_decimal),
|
||||
scrape_date=str(p.scrape_date),
|
||||
zone=p.zone
|
||||
) for p in prices
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching prices: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# STARTUP/SHUTDOWN EVENTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Application startup - log configuration and test DB connection."""
|
||||
logger.info("🚀 eamco_scraper STARTING")
|
||||
mode = os.environ.get('MODE', 'DEVELOPMENT').upper()
|
||||
if mode in ['DEVELOPMENT', 'DEV', 'LOCAL']:
|
||||
logger.info("🤖🤖🤖🤖🤖 Mode: Development 🤖🤖🤖🤖🤖")
|
||||
elif mode in ['PRODUCTION', 'PROD']:
|
||||
logger.info("💀💀💀💀💀💀💀💀💀💀 ⚠️ WARNING PRODUCTION 💀💀💀💀💀💀💀💀💀💀")
|
||||
logger.info(f"DB: {DATABASE_URL[:30]}...")
|
||||
logger.info(f"CORS: {len(CORS_ORIGINS)} origins configured")
|
||||
|
||||
# Test database connection
|
||||
if check_db_connection():
|
||||
logger.info("DB Connection: ✅ OK")
|
||||
else:
|
||||
logger.warning("DB Connection: ❌ FAILED")
|
||||
|
||||
# Create tables if they don't exist
|
||||
try:
|
||||
Base.metadata.create_all(bind=engine)
|
||||
logger.info("Database tables verified/created")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create tables: {e}")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""Application shutdown - cleanup."""
|
||||
logger.info("🛑 eamco_scraper SHUTTING DOWN")
|
||||
Reference in New Issue
Block a user