# run.py import argparse import logging import models from database import init_db, SessionLocal from newenglandoil import main as run_scraper_main logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def initialize_database(): """Initializes the database by creating tables based on models.""" logger.info("Attempting to initialize database...") try: init_db() logger.info("Database initialization process completed.") except Exception as e: logger.error(f"Error during database initialization: {e}", exc_info=True) def scrape_data(state_abbr: str | None = None, refresh_metadata: bool = False): """Runs the NewEnglandOil scraper.""" logger.info("Starting the NewEnglandOil scraper...") if refresh_metadata: logger.info("Metadata refresh enabled: Existing phone/URL data may be overwritten.") if state_abbr: logger.info(f"Scraping restricted to state: {state_abbr}") try: run_scraper_main(refresh_metadata=refresh_metadata, target_state_abbr=state_abbr) logger.info("NewEnglandOil scraper finished.") except Exception as e: logger.error(f"Error during scraping process: {e}", exc_info=True) def scrape_cheapest(state_abbr: str, refresh_metadata: bool = False): """Runs the CheapestOil scraper for a single state.""" from cheapestoil import scrape_state logger.info(f"Starting CheapestOil scrape for {state_abbr}...") if refresh_metadata: logger.info("Metadata refresh enabled: Existing phone/URL data may be overwritten.") db_session = SessionLocal() try: counties = db_session.query(models.County).all() county_lookup = {(c.state.strip(), c.name.strip()): c.id for c in counties} result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata) logger.info(f"CheapestOil result: {result}") except Exception as e: db_session.rollback() logger.error(f"Error during CheapestOil scrape: {e}", exc_info=True) finally: db_session.close() def run_migration(): """Runs the data normalization migration.""" from migrate_normalize import main as migrate_main logger.info("Running data normalization migration...") try: migrate_main() logger.info("Migration completed.") except Exception as e: logger.error(f"Error during migration: {e}", exc_info=True) def start_server(): """Starts the FastAPI server.""" import uvicorn logger.info("Starting FastAPI crawler server on port 9553...") uvicorn.run("app:app", host="0.0.0.0", port=9553) def main(): parser = argparse.ArgumentParser(description="Fuel Price Scraper Control Script") parser.add_argument( "action", choices=["initdb", "scrape", "scrape-cheapest", "migrate", "server"], help=( "'initdb' to initialize the database, " "'scrape' to run NewEnglandOil scraper, " "'scrape-cheapest' to run CheapestOil scraper, " "'migrate' to run data normalization migration, " "'server' to start the FastAPI server." ), ) parser.add_argument( "--state", default=None, help="State abbreviation (MA, CT, ME, NH, RI, VT).", ) parser.add_argument( "--refresh-metadata", action="store_true", help="Force refresh phone numbers and URLs, overwriting existing data.", ) args = parser.parse_args() if args.action == "initdb": initialize_database() elif args.action == "scrape": scrape_data(state_abbr=args.state, refresh_metadata=args.refresh_metadata) elif args.action == "scrape-cheapest": if not args.state: logger.error("--state is required for scrape-cheapest action") parser.print_help() return scrape_cheapest(args.state.upper(), refresh_metadata=args.refresh_metadata) elif args.action == "migrate": run_migration() elif args.action == "server": start_server() if __name__ == "__main__": main()