#!/usr/bin/env python3
"""
Main scraper orchestrator module.
Coordinates fetching, parsing, and storing oil price data.
"""
import logging
import sys
import os

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from sqlalchemy.orm import Session
from database import SessionLocal, init_db
import models

from .config import SITES_CONFIG, ZONE_COUNTY_MAP, setup_logging
from .http_client import make_request
from .parsers import parse_price_table, parse_zone_slug_to_int
from .db_operations import upsert_oil_price


def _build_county_lookup(db_session: Session) -> dict:
    """
    Build a lookup dict from (state_abbrev, county_name) -> county_id
    by querying the county table.
    """
    counties = db_session.query(models.County).all()
    lookup = {}
    for c in counties:
        lookup[(c.state, c.name)] = c.id
    logging.info(f"Built county lookup with {len(lookup)} entries")
    return lookup


def _resolve_county_id(state_key: str, zone_number: int, county_lookup: dict) -> int | None:
    """
    Resolve a county_id from ZONE_COUNTY_MAP and the county lookup.
    Returns None if no mapping exists.
    """
    mapping = ZONE_COUNTY_MAP.get((state_key, zone_number))
    if not mapping:
        logging.debug(f"No zone-to-county mapping for ({state_key}, {zone_number})")
        return None
    state_abbrev, county_name = mapping
    county_id = county_lookup.get((state_abbrev, county_name))
    if county_id is None:
        logging.warning(f"County not found in DB: ({state_abbrev}, {county_name}) for zone ({state_key}, {zone_number})")
    return county_id


def _scrape_zone(
    db_session: Session,
    site_name: str,
    url_template: str,
    base_url: str,
    oil_type: int,
    state_key: str,
    zone_slug: str,
    county_lookup: dict
) -> int:
    """
    Scrape a single zone and store records.

    Returns:
        Number of records processed
    """
    format_params = {
        "base_url": base_url,
        "state_slug": state_key,
        "zone_slug": zone_slug,
        "oil_type": oil_type
    }
    target_url = url_template.format(**format_params)

    logging.info(f"Scraping: {target_url} (State: {state_key}, Zone Slug: {zone_slug})")

    soup = make_request(target_url)
    if not soup:
        logging.warning(f"Failed to retrieve or parse {target_url}. Skipping.")
        return 0

    parsed_items = parse_price_table(soup, state_key, zone_slug)

    if not parsed_items:
        logging.info(f"No data extracted from {target_url}")
        return 0

    # Resolve county_id for this zone
    zone_number = parse_zone_slug_to_int(zone_slug)
    county_id = None
    if zone_number is not None:
        county_id = _resolve_county_id(state_key, zone_number, county_lookup)

    records_processed = 0
    for item_dict in parsed_items:
        item_dict["county_id"] = county_id
        if upsert_oil_price(db_session, item_dict):
            records_processed += 1

    logging.info(
        f"Processed {len(parsed_items)} records from {site_name} - {state_key}/{zone_slug} "
        f"({records_processed} inserted/updated, county_id={county_id})"
    )

    return len(parsed_items)


def _scrape_site(db_session: Session, site_config: dict, county_lookup: dict) -> int:
    """
    Scrape all zones for a single site.

    Returns:
        Total number of records processed
    """
    site_name = site_config["site_name"]
    base_url = site_config["base_url"]
    url_template = site_config["url_template"]
    oil_type = site_config["oil_type"]

    logging.info(f"--- Processing site: {site_name} ---")

    total_records = 0

    for state_key, zone_slugs in site_config["locations"].items():
        for zone_slug in zone_slugs:
            records = _scrape_zone(
                db_session=db_session,
                site_name=site_name,
                url_template=url_template,
                base_url=base_url,
                oil_type=oil_type,
                state_key=state_key,
                zone_slug=zone_slug,
                county_lookup=county_lookup
            )
            total_records += records

    return total_records


def main():
    """
    Main entry point for the oil price scraper.

    Initializes database, iterates through all configured sites and zones,
    scrapes price data, and stores it in the database.
    """
    setup_logging()
    logging.info("Starting oil price scraper job.")

    # Initialize database
    try:
        init_db()
        logging.info("Database initialized/checked successfully.")
    except Exception as e:
        logging.error(f"Failed to initialize database: {e}", exc_info=True)
        return

    db_session: Session = SessionLocal()
    total_records = 0

    try:
        # Build county lookup at startup
        county_lookup = _build_county_lookup(db_session)

        # Process each configured site
        for site_config in SITES_CONFIG:
            records = _scrape_site(db_session, site_config, county_lookup)
            total_records += records

        # Commit all changes
        if total_records > 0:
            db_session.commit()
            logging.info(f"Successfully committed records to the database.")
        else:
            logging.info("No new records were queued for database insertion in this run.")

    except Exception as e:
        logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True)
        db_session.rollback()
        logging.info("Database transaction rolled back due to error.")
    finally:
        db_session.close()
        logging.info("Database session closed.")

    logging.info("Oil price scraper job finished.")


if __name__ == "__main__":
    main()