refactor: replace fuel_scraper with newenglandoil + cheapestoil scrapers
- Add newenglandoil/ package as the primary scraper (replaces fuel_scraper) - Add cheapestoil/ package as a secondary market price scraper - Add app.py entry point for direct execution - Update run.py: new scrape_cheapest(), migrate command, --state filter, --refresh-metadata flag for overwriting existing phone/URL data - Update models.py with latest schema fields - Update requirements.txt dependencies - Update Dockerfile and docker-compose.yml for new structure - Remove deprecated fuel_scraper module, test.py, and log file Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
65
app.py
Normal file
65
app.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
FastAPI web server for the crawler.
|
||||
Provides HTTP endpoints to trigger scrapes on demand.
|
||||
"""
|
||||
import logging
|
||||
from fastapi import FastAPI, HTTPException
|
||||
|
||||
import models
|
||||
from database import SessionLocal
|
||||
from cheapestoil import scrape_state
|
||||
from cheapestoil.config import STATE_API_NAMES
|
||||
from newenglandoil.scraper import main as run_newenglandoil_scraper
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s",
|
||||
)
|
||||
|
||||
app = FastAPI(title="Crawler API", version="1.0.0")
|
||||
|
||||
|
||||
def _build_county_lookup(db_session):
|
||||
"""Build a (state_abbr, county_name) -> county_id lookup from the DB."""
|
||||
counties = db_session.query(models.County).all()
|
||||
return {(c.state.strip(), c.name.strip()): c.id for c in counties}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/scrape/{state_abbr}")
|
||||
def scrape_endpoint(state_abbr: str, refresh_metadata: bool = False):
|
||||
"""Trigger a CheapestOil scrape for a single state."""
|
||||
state_abbr = state_abbr.upper()
|
||||
if state_abbr not in STATE_API_NAMES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unknown state: {state_abbr}. Valid: {list(STATE_API_NAMES.keys())}",
|
||||
)
|
||||
|
||||
db_session = SessionLocal()
|
||||
try:
|
||||
county_lookup = _build_county_lookup(db_session)
|
||||
result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata)
|
||||
return result
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logging.error(f"Scrape failed for {state_abbr}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db_session.close()
|
||||
|
||||
|
||||
@app.get("/scrape-newenglandoil")
|
||||
def scrape_newenglandoil_endpoint(state: str = None, refresh_metadata: bool = False):
|
||||
"""Trigger the NewEnglandOil scraper (runs synchronously)."""
|
||||
try:
|
||||
# This will run the scraper and log to stdout (inherited from app's logging setup)
|
||||
run_newenglandoil_scraper(refresh_metadata=refresh_metadata, target_state_abbr=state)
|
||||
return {"status": "ok", "message": "NewEnglandOil scrape completed"}
|
||||
except Exception as e:
|
||||
logging.error(f"NewEnglandOil scrape failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user