""" FastAPI web server for the crawler. Provides HTTP endpoints to trigger scrapes on demand. """ import logging from fastapi import FastAPI, HTTPException import models from database import SessionLocal from cheapestoil import scrape_state from cheapestoil.config import STATE_API_NAMES from newenglandoil.scraper import main as run_newenglandoil_scraper logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s", ) app = FastAPI(title="Crawler API", version="1.0.0") def _build_county_lookup(db_session): """Build a (state_abbr, county_name) -> county_id lookup from the DB.""" counties = db_session.query(models.County).all() return {(c.state.strip(), c.name.strip()): c.id for c in counties} @app.get("/health") def health(): return {"status": "ok"} @app.get("/scrape/{state_abbr}") def scrape_endpoint(state_abbr: str, refresh_metadata: bool = False): """Trigger a CheapestOil scrape for a single state.""" state_abbr = state_abbr.upper() if state_abbr not in STATE_API_NAMES: raise HTTPException( status_code=400, detail=f"Unknown state: {state_abbr}. Valid: {list(STATE_API_NAMES.keys())}", ) db_session = SessionLocal() try: county_lookup = _build_county_lookup(db_session) result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata) return result except Exception as e: db_session.rollback() logging.error(f"Scrape failed for {state_abbr}: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) finally: db_session.close() @app.get("/scrape-newenglandoil") def scrape_newenglandoil_endpoint(state: str = None, refresh_metadata: bool = False): """Trigger the NewEnglandOil scraper (runs synchronously).""" try: # This will run the scraper and log to stdout (inherited from app's logging setup) run_newenglandoil_scraper(refresh_metadata=refresh_metadata, target_state_abbr=state) return {"status": "ok", "message": "NewEnglandOil scrape completed"} except Exception as e: logging.error(f"NewEnglandOil scrape failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e))