diff --git a/Dockerfile b/Dockerfile index 19f7c0b..ffa4cf7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Use an official Python runtime as a parent image -FROM python:3.9-slim-buster +FROM python:3.11-slim-bookworm # Set environment variables ENV PYTHONDONTWRITEBYTECODE 1 @@ -24,5 +24,7 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy the rest of the application code into the container # This will be overridden by the volume mount in docker-compose for development COPY . . -#CMD ["python3", "run.py", "initdb"] -CMD ["python3", "run.py", "scrape"] \ No newline at end of file + +EXPOSE 9553 + +CMD ["python3", "run.py", "server"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..c0160ac --- /dev/null +++ b/README.md @@ -0,0 +1,203 @@ +# NewEnglandBio Fuel Price Crawler + +Python scraper that collects heating oil prices from NewEnglandOil.com and MaineOil.com and stores them in PostgreSQL. Runs as a batch job (no HTTP server). + +## Tech Stack + +- **Language:** Python 3.9+ +- **HTTP:** requests + BeautifulSoup4 +- **Database:** SQLAlchemy + psycopg2 (PostgreSQL) +- **Deployment:** Docker + +## Project Structure + +``` +crawler/ +├── run.py # CLI entry point (initdb / scrape) +├── database.py # SQLAlchemy engine and session config +├── models.py # ORM models (OilPrice, County, Company) +├── fuel_scraper.py # Legacy monolithic scraper (deprecated) +├── fuel_scraper/ # Modular package (use this) +│ ├── __init__.py # Exports main() +│ ├── config.py # Site configs, zone-to-county mappings, logging +│ ├── http_client.py # HTTP requests with browser User-Agent +│ ├── parsers.py # HTML table parsing for price extraction +│ ├── scraper.py # Main orchestrator +│ └── db_operations.py # Upsert logic for oil_prices table +├── test.py # HTML parsing validation +├── requirements.txt +├── Dockerfile +├── docker-compose.yml +└── .env +``` + +## URLs Scraped + +The crawler hits these external websites to collect price data: + +### NewEnglandOil.com (5 states) + +**URL pattern:** `https://www.newenglandoil.com/{state}/{zone}.asp?type=0` + +| State | Zones | Example URL | +|-------|-------|-------------| +| Connecticut | zone1–zone10 | `https://www.newenglandoil.com/connecticut/zone1.asp?type=0` | +| Massachusetts | zone1–zone15 | `https://www.newenglandoil.com/massachusetts/zone1.asp?type=0` | +| New Hampshire | zone1–zone6 | `https://www.newenglandoil.com/newhampshire/zone1.asp?type=0` | +| Rhode Island | zone1–zone4 | `https://www.newenglandoil.com/rhodeisland/zone1.asp?type=0` | +| Vermont | zone1–zone4 | `https://www.newenglandoil.com/vermont/zone1.asp?type=0` | + +### MaineOil.com (1 state) + +**URL pattern:** `https://www.maineoil.com/{zone}.asp?type=0` + +| State | Zones | Example URL | +|-------|-------|-------------| +| Maine | zone1–zone7 | `https://www.maineoil.com/zone1.asp?type=0` | + +**Total: ~46 pages scraped per run.** + +Each page contains an HTML table with columns: Company Name, Price, Date. The parser extracts these and maps zones to counties using the config. + +## How to Run + +### CLI Usage + +```bash +# Initialize database tables +python3 run.py initdb + +# Run the scraper +python3 run.py scrape +``` + +### Docker + +```bash +# Build +docker-compose build + +# Run scraper (default command) +docker-compose run app + +# Initialize database via Docker +docker-compose run app python3 run.py initdb + +# Both in sequence +docker-compose run app python3 run.py initdb && docker-compose run app +``` + +### Curl the Scraped Data + +The crawler itself does **not** serve HTTP endpoints. After scraping, the data is available through the **Rust API** (port 9552): + +```bash +# Get oil prices for a specific county +curl http://localhost:9552/oil-prices/county/1 + +# Get oil prices for Suffolk County (MA) — find county_id first +curl http://localhost:9552/state/MA +# Then use the county_id from the response +curl http://localhost:9552/oil-prices/county/5 +``` + +**Response format:** +```json +[ + { + "id": 1234, + "state": "Massachusetts", + "zone": 1, + "name": "ABC Fuel Co", + "price": 3.29, + "date": "01/15/2026", + "scrapetimestamp": "2026-01-15T14:30:00Z", + "county_id": 5 + } +] +``` + +### Query the Database Directly + +```bash +# All prices for Massachusetts +psql postgresql://postgres:password@192.168.1.204:5432/fuelprices \ + -c "SELECT name, price, date, county_id FROM oil_prices WHERE state='Massachusetts' ORDER BY price;" + +# Latest scrape timestamp +psql postgresql://postgres:password@192.168.1.204:5432/fuelprices \ + -c "SELECT MAX(scrapetimestamp) FROM oil_prices;" + +# Prices by county with county name +psql postgresql://postgres:password@192.168.1.204:5432/fuelprices \ + -c "SELECT c.name AS county, o.name AS company, o.price + FROM oil_prices o JOIN county c ON o.county_id = c.id + WHERE c.state='MA' ORDER BY o.price;" +``` + +## Environment + +Create `.env`: + +``` +DATABASE_URL=postgresql://postgres:password@192.168.1.204:5432/fuelprices +``` + +## Zone-to-County Mapping + +Each scraping zone maps to one or more counties: + +**Connecticut (10 zones):** +- zone1 → Fairfield | zone2 → New Haven | zone3 → Middlesex +- zone4 → New London | zone5 → Hartford | zone6 → Hartford +- zone7 → Litchfield | zone8 → Tolland | zone9 → Windham +- zone10 → New Haven + +**Massachusetts (15 zones):** +- zone1 → Berkshire | zone2 → Franklin | zone3 → Hampshire +- zone4 → Hampden | zone5 → Worcester | zone6 → Worcester +- zone7 → Middlesex | zone8 → Essex | zone9 → Suffolk +- zone10 → Norfolk | zone11 → Plymouth | zone12 → Bristol +- zone13 → Barnstable | zone14 → Dukes | zone15 → Nantucket + +**New Hampshire (6 zones):** +- zone1 → Coos, Grafton | zone2 → Carroll, Belknap +- zone3 → Sullivan, Merrimack | zone4 → Strafford, Cheshire +- zone5 → Hillsborough | zone6 → Rockingham + +**Rhode Island (4 zones):** +- zone1 → Providence | zone2 → Kent, Bristol +- zone3 → Washington | zone4 → Newport + +**Maine (7 zones):** +- zone1 → Cumberland | zone2 → York | zone3 → Sagadahoc, Lincoln, Knox +- zone4 → Androscoggin, Oxford, Franklin +- zone5 → Kennebec, Somerset | zone6 → Penobscot, Piscataquis +- zone7 → Hancock, Washington, Waldo, Aroostook + +## Upsert Logic + +When storing scraped data, the crawler: + +1. Matches existing records by `(name, state, county_id)` or `(name, state, zone)` +2. **Skips** records where `company_id IS NOT NULL` (vendor-managed prices take priority) +3. **Updates** if the price or county_id has changed +4. **Inserts** a new record if no match exists + +## Scheduling + +The crawler has no built-in scheduler. Run it via cron or Unraid's User Scripts: + +```bash +# Cron: run daily at 2 AM +0 2 * * * cd /mnt/code/tradewar/crawler && docker-compose run app +``` + +## Logging + +Logs to `oil_scraper.log` in the working directory. Level: INFO. + +``` +2026-01-15 14:30:00 - INFO - [scraper.py:42] - Scraping Massachusetts zone1... +2026-01-15 14:30:01 - INFO - [db_operations.py:28] - Upserted 15 records for Massachusetts zone1 +``` diff --git a/app.py b/app.py new file mode 100644 index 0000000..5e0bbd5 --- /dev/null +++ b/app.py @@ -0,0 +1,65 @@ +""" +FastAPI web server for the crawler. +Provides HTTP endpoints to trigger scrapes on demand. +""" +import logging +from fastapi import FastAPI, HTTPException + +import models +from database import SessionLocal +from cheapestoil import scrape_state +from cheapestoil.config import STATE_API_NAMES +from newenglandoil.scraper import main as run_newenglandoil_scraper + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s", +) + +app = FastAPI(title="Crawler API", version="1.0.0") + + +def _build_county_lookup(db_session): + """Build a (state_abbr, county_name) -> county_id lookup from the DB.""" + counties = db_session.query(models.County).all() + return {(c.state.strip(), c.name.strip()): c.id for c in counties} + + +@app.get("/health") +def health(): + return {"status": "ok"} + + +@app.get("/scrape/{state_abbr}") +def scrape_endpoint(state_abbr: str, refresh_metadata: bool = False): + """Trigger a CheapestOil scrape for a single state.""" + state_abbr = state_abbr.upper() + if state_abbr not in STATE_API_NAMES: + raise HTTPException( + status_code=400, + detail=f"Unknown state: {state_abbr}. Valid: {list(STATE_API_NAMES.keys())}", + ) + + db_session = SessionLocal() + try: + county_lookup = _build_county_lookup(db_session) + result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata) + return result + except Exception as e: + db_session.rollback() + logging.error(f"Scrape failed for {state_abbr}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + finally: + db_session.close() + + +@app.get("/scrape-newenglandoil") +def scrape_newenglandoil_endpoint(state: str = None, refresh_metadata: bool = False): + """Trigger the NewEnglandOil scraper (runs synchronously).""" + try: + # This will run the scraper and log to stdout (inherited from app's logging setup) + run_newenglandoil_scraper(refresh_metadata=refresh_metadata, target_state_abbr=state) + return {"status": "ok", "message": "NewEnglandOil scrape completed"} + except Exception as e: + logging.error(f"NewEnglandOil scrape failed: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/cheapestoil/__init__.py b/cheapestoil/__init__.py new file mode 100644 index 0000000..ad45470 --- /dev/null +++ b/cheapestoil/__init__.py @@ -0,0 +1,4 @@ +# cheapestoil package +from .scraper import scrape_state + +__all__ = ["scrape_state"] diff --git a/cheapestoil/api_client.py b/cheapestoil/api_client.py new file mode 100644 index 0000000..e108f79 --- /dev/null +++ b/cheapestoil/api_client.py @@ -0,0 +1,136 @@ +""" +HTTP client for the CheapestOil JSON API. +""" +import re +import requests +from bs4 import BeautifulSoup + +from .config import API_URL + +DEFAULT_HEADERS = { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/91.0.4472.124 Safari/537.36" + ) +} + +REQUEST_TIMEOUT = 20 + + +def fetch_company_details(slug: str) -> dict: + """ + Fetch company details (real URL, phone) from their CheapestOil profile page. + + Args: + slug: The company slug/path (e.g. "Abc-Oil-Company") + + Returns: + Dict with keys: "url" (str|None), "phone" (str|None) + """ + if not slug: + return {"url": None, "phone": None} + + # Construct detail URL + # If slug is full URL, use it, else append to base + if slug.startswith("http"): + url = slug + else: + url = f"https://www.cheapestoil.com/{slug}" + + try: + resp = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT) + resp.raise_for_status() + soup = BeautifulSoup(resp.content, 'html.parser') + + real_url = None + phone = None + + # 1. Extract Real URL + # Look for "Visit Website" link or similar anchor texts + # Usually contained in a link with text "Visit Website" or the company name + # We look for a link that is NOT internal (doesn't contain cheapestoil.com) + # and behaves like an external link. + + # Common pattern: Visit Website + visit_link = soup.find('a', string=re.compile(r"Visit Website|Company Website", re.IGNORECASE)) + if visit_link and visit_link.get('href'): + href = visit_link.get('href') + if 'cheapestoil.com' not in href and href.startswith('http'): + real_url = href + + # Fallback: look for any external link in the contact section if structured + if not real_url: + # Try to find the first external link in the main content area + # (This is heuristics-based, might need adjustment) + content_div = soup.find('div', class_='col-md-8') # Common bootstrap main col + if content_div: + links = content_div.find_all('a', href=True) + for a in links: + href = a['href'] + if href.startswith('http') and 'cheapestoil.com' not in href: + real_url = href + break + + # 2. Extract Phone + # Reuse robust regex pattern logic + page_text = soup.get_text(" ", strip=True) + + # Look for "Phone:", "Tel:", etc. + # This is a bit simplified compared to the other scraper but likely sufficient + phone_match = re.search(r'(?:Phone|Tel|Call).*?(\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4})', page_text, re.IGNORECASE) + if phone_match: + phone_candidate = phone_match.group(1) + else: + # Fallback to just finding a phone pattern + phone_match = re.search(r'(?:\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4})', page_text) + phone_candidate = phone_match.group(0) if phone_match else None + + if phone_candidate: + digits = re.sub(r'\D', '', phone_candidate) + if len(digits) == 10: + phone = f"({digits[:3]}) {digits[3:6]}-{digits[6:]}" + else: + phone = phone_candidate + + return {"url": real_url, "phone": phone} + + except Exception as e: + logging.warning(f"Failed to fetch details for {slug}: {e}") + return {"url": None, "phone": None} + + + +def fetch_county_prices(state_api_name: str, county_name: str | None = None) -> list: + """ + Fetch price data from the CheapestOil API. + + Args: + state_api_name: State name as used by the API (e.g. "Massachusetts", "NewHampshire") + county_name: County name filter, or None for state-level results + + Returns: + List of raw JSON arrays from the API, or empty list on failure. + """ + params = { + "sort": 0, + "state": state_api_name, + "county": county_name or "", + "zip": "", + } + try: + resp = requests.get( + API_URL, params=params, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT + ) + resp.raise_for_status() + data = resp.json() + if isinstance(data, list): + return data + logging.warning(f"Unexpected response type from API: {type(data)}") + return [] + except requests.exceptions.RequestException as e: + logging.error(f"Error fetching CheapestOil API for {state_api_name}/{county_name}: {e}") + return [] + except ValueError as e: + logging.error(f"Invalid JSON from CheapestOil API: {e}") + return [] diff --git a/cheapestoil/company_matcher.py b/cheapestoil/company_matcher.py new file mode 100644 index 0000000..516aecd --- /dev/null +++ b/cheapestoil/company_matcher.py @@ -0,0 +1,90 @@ +""" +Company name normalization and matching for cross-source deduplication. + +Handles slight naming variations between NewEnglandOil and CheapestOil: + "Fireman's Fuel Co." == "Firemans Fuel" after normalization. +""" +import re +import logging + +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy.orm import Session +import models + +# Suffixes to strip during normalization (order matters: longer first) +_STRIP_SUFFIXES = [ + "enterprises", "company", "oil co", "fuel co", "corp", "inc", "llc", "co", +] + + +def normalize_company_name(name: str) -> str: + """ + Normalize a company name for fuzzy matching. + + Steps: + 1. Strip whitespace, lowercase + 2. Replace '&' with 'and' + 3. Remove punctuation (apostrophes, periods, commas) + 4. Remove common suffixes + 5. Collapse multiple spaces + + Args: + name: Raw company name + + Returns: + Normalized string for comparison. + """ + s = name.strip().lower() + s = s.replace("&", "and") + s = re.sub(r"['.,$]", "", s) + s = s.strip() + # Remove common suffixes (longest first to avoid partial matches) + for suffix in _STRIP_SUFFIXES: + if s.endswith(suffix): + s = s[: -len(suffix)] + break + s = re.sub(r"\s+", " ", s).strip() + return s + + +def find_existing_record( + db_session: Session, + raw_name: str, + state_abbr: str, + county_id: int | None, +) -> "models.OilPrice | None": + """ + Find an existing oil_prices record that matches by normalized company name. + + Queries all records for the given state+county_id (or state+zone=0 if no county), + then compares normalized names in Python. + + Args: + db_session: SQLAlchemy session + raw_name: Raw company name from CheapestOil + state_abbr: Two-letter state abbreviation + county_id: County ID or None + + Returns: + Matching OilPrice record or None. + """ + target = normalize_company_name(raw_name) + if not target: + return None + + query = db_session.query(models.OilPrice).filter( + models.OilPrice.state == state_abbr, + ) + if county_id is not None: + query = query.filter(models.OilPrice.county_id == county_id) + else: + query = query.filter(models.OilPrice.zone == 0) + + for record in query.all(): + if normalize_company_name(record.name) == target: + return record + + return None diff --git a/cheapestoil/config.py b/cheapestoil/config.py new file mode 100644 index 0000000..cf157b6 --- /dev/null +++ b/cheapestoil/config.py @@ -0,0 +1,50 @@ +""" +Configuration for the CheapestOil scraper. +""" + +API_URL = "https://www.cheapestoil.com/heating-oil-prices/api" + +# Seconds between requests to be polite +SCRAPE_DELAY = 2 + +# State abbreviation -> list of county names on cheapestoil.com +# None means state-level only (no county filter) +STATE_COUNTIES = { + "MA": [ + "Barnstable", "Berkshire", "Bristol", "Essex", "Franklin", + "Hampden", "Hampshire", "Middlesex", "Norfolk", "Plymouth", + "Suffolk", "Worcester", + ], + "CT": [ + "Fairfield", "Hartford", "Litchfield", "Middlesex", + "New Haven", "New London", "Tolland", "Windham", + ], + "ME": [ + "Cumberland", "York", "Penobscot", "Kennebec", "Androscoggin", + "Aroostook", "Oxford", "Hancock", "Somerset", "Knox", + "Waldo", "Sagadahoc", "Lincoln", "Washington", "Franklin", + "Piscataquis", + ], + "NH": [ + "Belknap", "Carroll", "Cheshire", "Coos", "Grafton", + "Hillsborough", "Merrimack", "Rockingham", "Strafford", "Sullivan", + ], + "RI": [ + "Bristol", "Kent", "Newport", "Providence", "Washington", + ], + "VT": [ + "Addison", "Bennington", "Caledonia", "Chittenden", "Essex", + "Franklin", "Grand Isle", "Lamoille", "Orange", "Orleans", + "Rutland", "Washington", "Windham", "Windsor", + ], +} + +# State abbreviation -> API state name (as used in cheapestoil.com params) +STATE_API_NAMES = { + "MA": "Massachusetts", + "CT": "Connecticut", + "ME": "Maine", + "NH": "NewHampshire", + "RI": "RhodeIsland", + "VT": "Vermont", +} diff --git a/cheapestoil/parsers.py b/cheapestoil/parsers.py new file mode 100644 index 0000000..7d6dddb --- /dev/null +++ b/cheapestoil/parsers.py @@ -0,0 +1,111 @@ +""" +Parsers for CheapestOil API response data. + +API returns arrays like: + [name, 150gal_price, 300gal_price, 500gal_price, service_area, updated, link, flag] + +Price fields come as HTML strings like "$3.69
(Total $553.50*)" +""" +import re +import logging + +# Common abbreviations that should stay uppercase after title-casing +_KEEP_UPPER = {"LLC", "INC", "LP", "HVAC", "II", "III", "IV", "USA"} + + +def _smart_title(name: str) -> str: + """Convert a company name to title case, preserving common abbreviations.""" + words = name.title().split() + return " ".join(w.upper() if w.upper() in _KEEP_UPPER else w for w in words) + + +def parse_price_150(price_html: str) -> float | None: + """ + Extract the per-gallon price from a CheapestOil price field. + + Examples: + "$3.69
(Total $553.50*)" -> 3.69 + "$4.199" -> 4.199 + "" -> None + + Args: + price_html: Raw price string from the API + + Returns: + Float price or None if unparseable. + """ + if not price_html or not isinstance(price_html, str): + return None + # The per-gallon price is the first dollar amount before any
tag + match = re.search(r'\$(\d+\.\d+)', price_html) + if match: + try: + return float(match.group(1)) + except ValueError: + pass + logging.warning(f"Could not parse price from: {price_html!r}") + return None + + +def parse_company_record(row: list, county_name: str | None) -> dict | None: + """ + Convert an API row array to a structured dict. + + Expected row format: + [0] name + [1] 150gal price (HTML) + [2] 300gal price (HTML) + [3] 500gal price (HTML) + [4] service area text + [5] last updated date string + [6] company link/slug + [7] flag/badge + + Args: + row: Raw array from the API + county_name: County name this row came from (None for state-level) + + Returns: + Dict with {name, price, service_area, county_name, date} or None. + """ + if not isinstance(row, list) or len(row) < 6: + logging.warning(f"Skipping malformed row: {row!r}") + return None + + name = str(row[0]).strip() if row[0] else "" + if not name: + return None + + # Apply title case normalization + name = _smart_title(name) + + price = parse_price_150(str(row[1]) if row[1] else "") + service_area = str(row[4]).strip() if row[4] else "" + date_str = str(row[5]).strip() if row[5] else "" + # DB column is VARCHAR(20), truncate to fit + if len(date_str) > 20: + date_str = date_str[:20] + + # Extract company URL from row[6] (link/slug) + # Only accept if it looks like a real external URL, not a slug + url = None + slug = None + if len(row) > 6 and row[6]: + raw_link = str(row[6]).strip() + if raw_link: + if raw_link.startswith("http"): + url = raw_link + else: + # It's a slug for the cheapestoil detail page + slug = raw_link + + return { + "slug": slug, # Return slug so scraper can use it to fetch details + "name": name, + "price": price, + "service_area": service_area, + "county_name": county_name, + "date": date_str, + "url": url, + "slug": slug, + } diff --git a/cheapestoil/scraper.py b/cheapestoil/scraper.py new file mode 100644 index 0000000..6455311 --- /dev/null +++ b/cheapestoil/scraper.py @@ -0,0 +1,217 @@ +""" +Main orchestrator for the CheapestOil scraper. +""" +import logging +import time +from datetime import datetime + +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy.orm import Session +import models + +from .config import STATE_COUNTIES, STATE_API_NAMES, SCRAPE_DELAY +from .api_client import fetch_company_details, fetch_county_prices +from .parsers import parse_company_record +from .company_matcher import find_existing_record +from .town_lookup import resolve_county_from_service_area + + +def _resolve_county_id( + county_name: str | None, + service_area: str, + state_abbr: str, + county_lookup: dict, +) -> int | None: + """ + Resolve a county_id from either a direct county name or service area text. + + For MA/CT/ME: county_name comes directly from the API query parameter. + For NH/RI/VT: parse service_area text to find a town -> county mapping. + """ + # Direct county match (MA/CT/ME) + if county_name: + county_id = county_lookup.get((state_abbr, county_name)) + if county_id is None: + logging.warning(f"County not in DB: ({state_abbr}, {county_name})") + return county_id + + # Service area parsing (NH/RI/VT) + if service_area: + resolved = resolve_county_from_service_area(service_area, state_abbr) + if resolved: + county_id = county_lookup.get((state_abbr, resolved)) + if county_id is not None: + return county_id + logging.warning(f"Resolved county '{resolved}' not in DB for {state_abbr}") + + return None + + +def scrape_state(state_abbr: str, db_session: Session, county_lookup: dict, refresh_metadata: bool = False) -> dict: + """ + Scrape all CheapestOil data for a single state. + + Args: + state_abbr: Two-letter state code (MA, CT, ME, NH, RI, VT) + db_session: SQLAlchemy session + county_lookup: Dict of (state_abbr, county_name) -> county_id + refresh_metadata: If True, force re-fetch details (phone/url) and overwrite DB. + + Returns: + Summary dict with {state, counties_scraped, records_added, records_updated, records_skipped} + """ + state_abbr = state_abbr.upper() + if state_abbr not in STATE_API_NAMES: + raise ValueError(f"Unknown state: {state_abbr}. Must be one of {list(STATE_API_NAMES.keys())}") + + api_name = STATE_API_NAMES[state_abbr] + counties = STATE_COUNTIES[state_abbr] + + summary = { + "state": state_abbr, + "counties_scraped": 0, + "records_added": 0, + "records_updated": 0, + "records_skipped": 0, + } + + details_cache = {} # cache for detail pages: slug -> {url, phone} + + for i, county_name in enumerate(counties): + if i > 0: + time.sleep(SCRAPE_DELAY) + + label = county_name or "(state-level)" + logging.info(f"[CheapestOil] Fetching: {state_abbr} / {label}") + + rows = fetch_county_prices(api_name, county_name) + if not rows: + logging.info(f"No results for {state_abbr} / {label}") + continue + + logging.info(f"[CheapestOil] Processing {len(rows)} records from {state_abbr} / {label} (Size: {len(rows)})") + + summary["counties_scraped"] += 1 + + for row in rows: + record = parse_company_record(row, county_name) + if not record or record["price"] is None: + summary["records_skipped"] += 1 + continue + + # Resolve county_id + county_id = _resolve_county_id( + record["county_name"], + record["service_area"], + state_abbr, + county_lookup, + ) + + # Check for existing record (cross-source dedup) + existing = find_existing_record( + db_session, record["name"], state_abbr, county_id + ) + + # Fetch details logic: + slug = record.get("slug") + real_url = record.get("url") + phone = None + + # Determine if we need to fetch details + # If refresh_metadata is True, we want to fetch to ensure fresh data. + # If not, we fetch if we are missing info (which is handled if we don't have existing record or existing record missing info) + # Simplest approach: fetch if we have slug and (refresh_metadata OR missing basic info) + + should_fetch_details = False + if slug: + if refresh_metadata: + should_fetch_details = True + elif existing: + if not existing.url or not existing.phone: + should_fetch_details = True + else: + # New record, always fetch + should_fetch_details = True + + if should_fetch_details: + if slug in details_cache: + cached = details_cache[slug] + real_url = cached["url"] + phone = cached["phone"] + else: + details = fetch_company_details(slug) + details_cache[slug] = details + real_url = details["url"] + phone = details["phone"] + time.sleep(1.0) # Polite delay between detail pages + + if existing: + # Skip vendor-managed records + if existing.company_id is not None: + logging.debug(f"Skipping vendor-managed: {record['name']}") + summary["records_skipped"] += 1 + continue + + updated = False + + # Backfill or Force Update url + if real_url: + if not existing.url or (refresh_metadata and existing.url != real_url): + existing.url = real_url + updated = True + logging.info(f"Updated/Backfilled URL for {record['name']}") + + # Backfill or Force Update phone + if phone: + if not existing.phone or (refresh_metadata and existing.phone != phone): + existing.phone = phone + updated = True + logging.info(f"Updated/Backfilled Phone for {record['name']}") + + # Backfill county_id if we have it now + if county_id is not None and existing.county_id != county_id: + existing.county_id = county_id + updated = True + logging.info(f"Updated county_id for {record['name']}") + + # Update if price changed, otherwise just touch timestamp + if existing.price != record["price"]: + existing.price = record["price"] + existing.date = record["date"] + existing.scrapetimestamp = datetime.utcnow() + summary["records_updated"] += 1 + logging.info(f"Updated price: {record['name']} ${existing.price:.2f} → ${record['price']:.2f}") + elif updated: + existing.scrapetimestamp = datetime.utcnow() + summary["records_updated"] += 1 + else: + existing.scrapetimestamp = datetime.utcnow() + summary["records_skipped"] += 1 + logging.debug(f"No changes for {record['name']} (${record['price']:.2f})") + else: + # Insert new record (zone=0 for cheapestoil) + oil_price = models.OilPrice( + state=state_abbr, + zone=0, + name=record["name"], + price=record["price"], + date=record["date"], + county_id=county_id, + url=real_url, + phone=phone, + scrapetimestamp=datetime.utcnow(), + ) + db_session.add(oil_price) + summary["records_added"] += 1 + logging.info(f"Added: {record['name']} in {state_abbr} (county_id={county_id}, phone={phone})") + + db_session.commit() + logging.info( + f"[CheapestOil] State {state_abbr} complete: " + f"{summary['records_added']} added, {summary['records_updated']} updated, " + f"{summary['records_skipped']} skipped (no changes)" + ) + return summary diff --git a/cheapestoil/town_lookup.py b/cheapestoil/town_lookup.py new file mode 100644 index 0000000..a990067 --- /dev/null +++ b/cheapestoil/town_lookup.py @@ -0,0 +1,1586 @@ +""" +Town-to-county mapping for all 6 New England states. + +Used to resolve county from service_area text when CheapestOil returns +state-level data (NH, RI, VT) without county filtering. + +For MA/CT/ME, the API has county-level pages so this is only needed as fallback. +""" +import re +import logging + +# County names for direct mention matching (e.g. "Throughout Rockingham County") +_NE_COUNTIES = { + "CT": ["Fairfield", "Hartford", "Litchfield", "Middlesex", "New Haven", + "New London", "Tolland", "Windham"], + "MA": ["Barnstable", "Berkshire", "Bristol", "Dukes", "Essex", "Franklin", + "Hampden", "Hampshire", "Middlesex", "Nantucket", "Norfolk", + "Plymouth", "Suffolk", "Worcester"], + "ME": ["Androscoggin", "Aroostook", "Cumberland", "Franklin", "Hancock", + "Kennebec", "Knox", "Lincoln", "Oxford", "Penobscot", "Piscataquis", + "Sagadahoc", "Somerset", "Waldo", "Washington", "York"], + "NH": ["Belknap", "Carroll", "Cheshire", "Coos", "Grafton", "Hillsborough", + "Merrimack", "Rockingham", "Strafford", "Sullivan"], + "RI": ["Bristol", "Kent", "Newport", "Providence", "Washington"], + "VT": ["Addison", "Bennington", "Caledonia", "Chittenden", "Essex", + "Franklin", "Grand Isle", "Lamoille", "Orange", "Orleans", + "Rutland", "Washington", "Windham", "Windsor"], +} + +# Town name (lowercase) -> County name, organized by state +TOWN_COUNTY_MAP = { + "CT": { + "andover": "Tolland", + "ansonia": "New Haven", + "ashford": "Windham", + "avon": "Hartford", + "barkhamsted": "Litchfield", + "beacon falls": "New Haven", + "berlin": "Hartford", + "bethany": "New Haven", + "bethel": "Fairfield", + "bethlehem": "Litchfield", + "bloomfield": "Hartford", + "bolton": "Tolland", + "bozrah": "New London", + "branford": "New Haven", + "bridgeport": "Fairfield", + "bridgewater": "Litchfield", + "bristol": "Hartford", + "brookfield": "Fairfield", + "brooklyn": "Windham", + "burlington": "Hartford", + "canaan": "Litchfield", + "canterbury": "Windham", + "canton": "Hartford", + "chaplin": "Windham", + "cheshire": "New Haven", + "chester": "Middlesex", + "clinton": "Middlesex", + "colchester": "New London", + "colebrook": "Litchfield", + "columbia": "Tolland", + "cornwall": "Litchfield", + "coventry": "Tolland", + "cromwell": "Middlesex", + "danbury": "Fairfield", + "darien": "Fairfield", + "deep river": "Middlesex", + "derby": "New Haven", + "durham": "Middlesex", + "east granby": "Hartford", + "east haddam": "Middlesex", + "east hampton": "Middlesex", + "east hartford": "Hartford", + "east haven": "New Haven", + "east lyme": "New London", + "east windsor": "Hartford", + "eastford": "Windham", + "easton": "Fairfield", + "ellington": "Tolland", + "enfield": "Hartford", + "essex": "Middlesex", + "fairfield": "Fairfield", + "farmington": "Hartford", + "franklin": "New London", + "glastonbury": "Hartford", + "goshen": "Litchfield", + "granby": "Hartford", + "greenwich": "Fairfield", + "griswold": "New London", + "groton": "New London", + "guilford": "New Haven", + "haddam": "Middlesex", + "hamden": "New Haven", + "hampton": "Windham", + "hartford": "Hartford", + "hartland": "Hartford", + "harwinton": "Litchfield", + "hebron": "Tolland", + "kent": "Litchfield", + "killingly": "Windham", + "killingworth": "Middlesex", + "lebanon": "New London", + "ledyard": "New London", + "lisbon": "New London", + "litchfield": "Litchfield", + "lyme": "New London", + "madison": "New Haven", + "manchester": "Hartford", + "mansfield": "Tolland", + "marlborough": "Hartford", + "meriden": "New Haven", + "middlebury": "New Haven", + "middlefield": "Middlesex", + "middletown": "Middlesex", + "milford": "New Haven", + "monroe": "Fairfield", + "montville": "New London", + "morris": "Litchfield", + "naugatuck": "New Haven", + "new britain": "Hartford", + "new canaan": "Fairfield", + "new fairfield": "Fairfield", + "new hartford": "Litchfield", + "new haven": "New Haven", + "new london": "New London", + "new milford": "Litchfield", + "newington": "Hartford", + "newtown": "Fairfield", + "norfolk": "Litchfield", + "north branford": "New Haven", + "north canaan": "Litchfield", + "north haven": "New Haven", + "north stonington": "New London", + "norwalk": "Fairfield", + "norwich": "New London", + "old lyme": "New London", + "old saybrook": "Middlesex", + "orange": "New Haven", + "oxford": "New Haven", + "plainfield": "Windham", + "plainville": "Hartford", + "plymouth": "Litchfield", + "pomfret": "Windham", + "portland": "Middlesex", + "preston": "New London", + "prospect": "New Haven", + "putnam": "Windham", + "redding": "Fairfield", + "ridgefield": "Fairfield", + "rocky hill": "Hartford", + "roxbury": "Litchfield", + "salem": "New London", + "salisbury": "Litchfield", + "scotland": "Windham", + "seymour": "New Haven", + "sharon": "Litchfield", + "shelton": "Fairfield", + "sherman": "Fairfield", + "simsbury": "Hartford", + "somers": "Tolland", + "south windsor": "Hartford", + "southbury": "New Haven", + "southington": "Hartford", + "sprague": "New London", + "stafford": "Tolland", + "stamford": "Fairfield", + "sterling": "Windham", + "stonington": "New London", + "stratford": "Fairfield", + "suffield": "Hartford", + "thomaston": "Litchfield", + "thompson": "Windham", + "tolland": "Tolland", + "torrington": "Litchfield", + "trumbull": "Fairfield", + "union": "Tolland", + "vernon": "Tolland", + "voluntown": "New London", + "wallingford": "New Haven", + "warren": "Litchfield", + "washington": "Litchfield", + "waterbury": "New Haven", + "waterford": "New London", + "watertown": "Litchfield", + "west hartford": "Hartford", + "west haven": "New Haven", + "westbrook": "Middlesex", + "weston": "Fairfield", + "westport": "Fairfield", + "wethersfield": "Hartford", + "willington": "Tolland", + "wilton": "Fairfield", + "winchester": "Litchfield", + "windham": "Windham", + "windsor": "Hartford", + "windsor locks": "Hartford", + "wolcott": "New Haven", + "woodbridge": "New Haven", + "woodbury": "Litchfield", + "woodstock": "Windham", + }, + "MA": { + "abington": "Plymouth", + "acton": "Middlesex", + "acushnet": "Bristol", + "adams": "Berkshire", + "agawam": "Hampden", + "alford": "Berkshire", + "amesbury": "Essex", + "amherst": "Hampshire", + "andover": "Essex", + "arlington": "Middlesex", + "ashburnham": "Worcester", + "ashby": "Middlesex", + "ashfield": "Franklin", + "ashland": "Middlesex", + "athol": "Worcester", + "attleboro": "Bristol", + "auburn": "Worcester", + "avon": "Norfolk", + "ayer": "Middlesex", + "barnstable": "Barnstable", + "barre": "Worcester", + "becket": "Berkshire", + "bedford": "Middlesex", + "belchertown": "Hampshire", + "bellingham": "Norfolk", + "belmont": "Middlesex", + "berkley": "Bristol", + "berlin": "Worcester", + "bernardston": "Franklin", + "beverly": "Essex", + "billerica": "Middlesex", + "blackstone": "Worcester", + "blandford": "Hampden", + "bolton": "Worcester", + "boston": "Suffolk", + "bourne": "Barnstable", + "boxborough": "Middlesex", + "boxford": "Essex", + "boylston": "Worcester", + "braintree": "Norfolk", + "brewster": "Barnstable", + "bridgewater": "Plymouth", + "brimfield": "Hampden", + "brockton": "Plymouth", + "brookfield": "Worcester", + "brookline": "Norfolk", + "buckland": "Franklin", + "burlington": "Middlesex", + "cambridge": "Middlesex", + "canton": "Norfolk", + "carlisle": "Middlesex", + "carver": "Plymouth", + "charlemont": "Franklin", + "charlton": "Worcester", + "chatham": "Barnstable", + "chelmsford": "Middlesex", + "chelsea": "Suffolk", + "cheshire": "Berkshire", + "chester": "Hampden", + "chesterfield": "Hampshire", + "chicopee": "Hampden", + "chilmark": "Dukes", + "clarksburg": "Berkshire", + "clinton": "Worcester", + "cohasset": "Norfolk", + "colrain": "Franklin", + "concord": "Middlesex", + "conway": "Franklin", + "cummington": "Hampshire", + "dalton": "Berkshire", + "danvers": "Essex", + "dartmouth": "Bristol", + "dedham": "Norfolk", + "deerfield": "Franklin", + "dennis": "Barnstable", + "dighton": "Bristol", + "douglas": "Worcester", + "dover": "Norfolk", + "dracut": "Middlesex", + "dudley": "Worcester", + "dunstable": "Middlesex", + "duxbury": "Plymouth", + "east bridgewater": "Plymouth", + "east brookfield": "Worcester", + "east longmeadow": "Hampden", + "eastham": "Barnstable", + "easthampton": "Hampshire", + "easton": "Bristol", + "edgartown": "Dukes", + "egremont": "Berkshire", + "erving": "Franklin", + "essex": "Essex", + "everett": "Middlesex", + "fairhaven": "Bristol", + "fall river": "Bristol", + "falmouth": "Barnstable", + "fitchburg": "Worcester", + "florida": "Berkshire", + "foxborough": "Norfolk", + "framingham": "Middlesex", + "franklin": "Norfolk", + "freetown": "Bristol", + "gardner": "Worcester", + "georgetown": "Essex", + "gill": "Franklin", + "gloucester": "Essex", + "goshen": "Hampshire", + "gosnold": "Dukes", + "grafton": "Worcester", + "granby": "Hampshire", + "granville": "Hampden", + "great barrington": "Berkshire", + "greenfield": "Franklin", + "groton": "Middlesex", + "groveland": "Essex", + "hadley": "Hampshire", + "halifax": "Plymouth", + "hamilton": "Essex", + "hampden": "Hampden", + "hancock": "Berkshire", + "hanover": "Plymouth", + "hanson": "Plymouth", + "hardwick": "Worcester", + "harvard": "Worcester", + "harwich": "Barnstable", + "hatfield": "Hampshire", + "haverhill": "Essex", + "hawley": "Franklin", + "heath": "Franklin", + "hingham": "Plymouth", + "hinsdale": "Berkshire", + "holbrook": "Norfolk", + "holden": "Worcester", + "holland": "Hampden", + "holliston": "Middlesex", + "holyoke": "Hampden", + "hopedale": "Worcester", + "hopkinton": "Middlesex", + "hubbardston": "Worcester", + "hudson": "Middlesex", + "hull": "Plymouth", + "huntington": "Hampshire", + "ipswich": "Essex", + "kingston": "Plymouth", + "lakeville": "Plymouth", + "lancaster": "Worcester", + "lanesborough": "Berkshire", + "lawrence": "Essex", + "lee": "Berkshire", + "leicester": "Worcester", + "lenox": "Berkshire", + "leominster": "Worcester", + "leverett": "Franklin", + "lexington": "Middlesex", + "leyden": "Franklin", + "lincoln": "Middlesex", + "littleton": "Middlesex", + "longmeadow": "Hampden", + "lowell": "Middlesex", + "ludlow": "Hampden", + "lunenburg": "Worcester", + "lynn": "Essex", + "lynnfield": "Essex", + "malden": "Middlesex", + "manchester-by-the-sea": "Essex", + "manchester": "Essex", + "mansfield": "Bristol", + "marblehead": "Essex", + "marion": "Plymouth", + "marlborough": "Middlesex", + "marshfield": "Plymouth", + "mashpee": "Barnstable", + "mattapoisett": "Plymouth", + "maynard": "Middlesex", + "medfield": "Norfolk", + "medford": "Middlesex", + "medway": "Norfolk", + "melrose": "Middlesex", + "mendon": "Worcester", + "merrimac": "Essex", + "methuen": "Essex", + "middleborough": "Plymouth", + "middlefield": "Hampshire", + "middleton": "Essex", + "milford": "Worcester", + "millbury": "Worcester", + "millis": "Norfolk", + "millville": "Worcester", + "milton": "Norfolk", + "monroe": "Franklin", + "monson": "Hampden", + "montague": "Franklin", + "monterey": "Berkshire", + "montgomery": "Hampden", + "mount washington": "Berkshire", + "nahant": "Essex", + "nantucket": "Nantucket", + "natick": "Middlesex", + "needham": "Norfolk", + "new ashford": "Berkshire", + "new bedford": "Bristol", + "new braintree": "Worcester", + "new marlborough": "Berkshire", + "new salem": "Franklin", + "newbury": "Essex", + "newburyport": "Essex", + "newton": "Middlesex", + "norfolk": "Norfolk", + "north adams": "Berkshire", + "north andover": "Essex", + "north attleborough": "Bristol", + "north brookfield": "Worcester", + "north reading": "Middlesex", + "northampton": "Hampshire", + "northborough": "Worcester", + "northbridge": "Worcester", + "northfield": "Franklin", + "norton": "Bristol", + "norwell": "Plymouth", + "norwood": "Norfolk", + "oak bluffs": "Dukes", + "oakham": "Worcester", + "orange": "Franklin", + "orleans": "Barnstable", + "otis": "Berkshire", + "oxford": "Worcester", + "palmer": "Hampden", + "paxton": "Worcester", + "peabody": "Essex", + "pelham": "Hampshire", + "pembroke": "Plymouth", + "pepperell": "Middlesex", + "peru": "Berkshire", + "petersham": "Worcester", + "phillipston": "Worcester", + "pittsfield": "Berkshire", + "plainfield": "Hampshire", + "plainville": "Norfolk", + "plymouth": "Plymouth", + "plympton": "Plymouth", + "princeton": "Worcester", + "provincetown": "Barnstable", + "quincy": "Norfolk", + "randolph": "Norfolk", + "raynham": "Bristol", + "reading": "Middlesex", + "rehoboth": "Bristol", + "revere": "Suffolk", + "richmond": "Berkshire", + "rochester": "Plymouth", + "rockland": "Plymouth", + "rockport": "Essex", + "rowe": "Franklin", + "rowley": "Essex", + "royalston": "Worcester", + "russell": "Hampden", + "rutland": "Worcester", + "salem": "Essex", + "salisbury": "Essex", + "sandisfield": "Berkshire", + "sandwich": "Barnstable", + "saugus": "Essex", + "savoy": "Berkshire", + "scituate": "Plymouth", + "seekonk": "Bristol", + "sharon": "Norfolk", + "sheffield": "Berkshire", + "shelburne": "Franklin", + "sherborn": "Middlesex", + "shirley": "Middlesex", + "shrewsbury": "Worcester", + "shutesbury": "Franklin", + "somerset": "Bristol", + "somerville": "Middlesex", + "south hadley": "Hampshire", + "southampton": "Hampshire", + "southborough": "Worcester", + "southbridge": "Worcester", + "southwick": "Hampden", + "spencer": "Worcester", + "springfield": "Hampden", + "sterling": "Worcester", + "stockbridge": "Berkshire", + "stoneham": "Middlesex", + "stoughton": "Norfolk", + "stow": "Middlesex", + "sturbridge": "Worcester", + "sudbury": "Middlesex", + "sunderland": "Franklin", + "sutton": "Worcester", + "swampscott": "Essex", + "swansea": "Bristol", + "taunton": "Bristol", + "templeton": "Worcester", + "tewksbury": "Middlesex", + "tisbury": "Dukes", + "tolland": "Hampden", + "topsfield": "Essex", + "townsend": "Middlesex", + "truro": "Barnstable", + "tyngsborough": "Middlesex", + "tyringham": "Berkshire", + "upton": "Worcester", + "uxbridge": "Worcester", + "wakefield": "Middlesex", + "wales": "Hampden", + "walpole": "Norfolk", + "waltham": "Middlesex", + "ware": "Hampshire", + "wareham": "Plymouth", + "warren": "Worcester", + "warwick": "Franklin", + "washington": "Berkshire", + "watertown": "Middlesex", + "wayland": "Middlesex", + "webster": "Worcester", + "wellesley": "Norfolk", + "wellfleet": "Barnstable", + "wendell": "Franklin", + "wenham": "Essex", + "west boylston": "Worcester", + "west bridgewater": "Plymouth", + "west brookfield": "Worcester", + "west newbury": "Essex", + "west springfield": "Hampden", + "west stockbridge": "Berkshire", + "west tisbury": "Dukes", + "westborough": "Worcester", + "westfield": "Hampden", + "westford": "Middlesex", + "westhampton": "Hampshire", + "westminster": "Worcester", + "weston": "Middlesex", + "westport": "Bristol", + "westwood": "Norfolk", + "weymouth": "Norfolk", + "whately": "Franklin", + "whitman": "Plymouth", + "wilbraham": "Hampden", + "williamsburg": "Hampshire", + "williamstown": "Berkshire", + "wilmington": "Middlesex", + "winchendon": "Worcester", + "winchester": "Middlesex", + "windsor": "Berkshire", + "winthrop": "Suffolk", + "woburn": "Middlesex", + "worcester": "Worcester", + "worthington": "Hampshire", + "wrentham": "Norfolk", + "yarmouth": "Barnstable", + }, + "ME": { + "auburn": "Androscoggin", + "durham": "Androscoggin", + "greene": "Androscoggin", + "leeds": "Androscoggin", + "lewiston": "Androscoggin", + "lisbon": "Androscoggin", + "livermore": "Androscoggin", + "livermore falls": "Androscoggin", + "mechanic falls": "Androscoggin", + "minot": "Androscoggin", + "poland": "Androscoggin", + "sabattus": "Androscoggin", + "turner": "Androscoggin", + "wales": "Androscoggin", + "allagash": "Aroostook", + "amity": "Aroostook", + "ashland": "Aroostook", + "bancroft": "Aroostook", + "blaine": "Aroostook", + "bridgewater": "Aroostook", + "caribou": "Aroostook", + "castle hill": "Aroostook", + "caswell": "Aroostook", + "chapman": "Aroostook", + "connor": "Aroostook", + "crystal": "Aroostook", + "dyer brook": "Aroostook", + "eagle lake": "Aroostook", + "easton": "Aroostook", + "fort fairfield": "Aroostook", + "fort kent": "Aroostook", + "frenchville": "Aroostook", + "grand isle": "Aroostook", + "hamlin": "Aroostook", + "haynesville": "Aroostook", + "hersey": "Aroostook", + "hodgdon": "Aroostook", + "houlton": "Aroostook", + "island falls": "Aroostook", + "limestone": "Aroostook", + "linneus": "Aroostook", + "littleton": "Aroostook", + "ludlow": "Aroostook", + "madawaska": "Aroostook", + "mapleton": "Aroostook", + "mars hill": "Aroostook", + "masardis": "Aroostook", + "merrill": "Aroostook", + "monticello": "Aroostook", + "new canada": "Aroostook", + "new limerick": "Aroostook", + "new sweden": "Aroostook", + "oakfield": "Aroostook", + "orient": "Aroostook", + "perham": "Aroostook", + "portage lake": "Aroostook", + "presque isle": "Aroostook", + "saint agatha": "Aroostook", + "saint francis": "Aroostook", + "sherman": "Aroostook", + "smyrna": "Aroostook", + "stockholm": "Aroostook", + "van buren": "Aroostook", + "wade": "Aroostook", + "wallagrass": "Aroostook", + "washburn": "Aroostook", + "westfield": "Aroostook", + "weston": "Aroostook", + "woodland": "Aroostook", + "baldwin": "Cumberland", + "bridgton": "Cumberland", + "brunswick": "Cumberland", + "cape elizabeth": "Cumberland", + "casco": "Cumberland", + "chebeague island": "Cumberland", + "cumberland": "Cumberland", + "falmouth": "Cumberland", + "freeport": "Cumberland", + "frye island": "Cumberland", + "gorham": "Cumberland", + "gray": "Cumberland", + "harpswell": "Cumberland", + "harrison": "Cumberland", + "long island": "Cumberland", + "naples": "Cumberland", + "new gloucester": "Cumberland", + "north yarmouth": "Cumberland", + "portland": "Cumberland", + "pownal": "Cumberland", + "raymond": "Cumberland", + "scarborough": "Cumberland", + "sebago": "Cumberland", + "south portland": "Cumberland", + "standish": "Cumberland", + "westbrook": "Cumberland", + "windham": "Cumberland", + "yarmouth": "Cumberland", + "avon": "Franklin", + "carrabassett valley": "Franklin", + "carthage": "Franklin", + "chesterville": "Franklin", + "eustis": "Franklin", + "farmington": "Franklin", + "industry": "Franklin", + "jay": "Franklin", + "kingfield": "Franklin", + "new sharon": "Franklin", + "new vineyard": "Franklin", + "phillips": "Franklin", + "rangeley": "Franklin", + "strong": "Franklin", + "temple": "Franklin", + "weld": "Franklin", + "wilton": "Franklin", + "amherst": "Hancock", + "aurora": "Hancock", + "bar harbor": "Hancock", + "blue hill": "Hancock", + "brooklin": "Hancock", + "brooksville": "Hancock", + "bucksport": "Hancock", + "castine": "Hancock", + "cranberry isles": "Hancock", + "dedham": "Hancock", + "deer isle": "Hancock", + "eastbrook": "Hancock", + "ellsworth": "Hancock", + "franklin": "Hancock", + "frenchboro": "Hancock", + "gouldsboro": "Hancock", + "hancock": "Hancock", + "lamoine": "Hancock", + "mariaville": "Hancock", + "mount desert": "Hancock", + "orland": "Hancock", + "otis": "Hancock", + "penobscot": "Hancock", + "sedgwick": "Hancock", + "sorrento": "Hancock", + "southwest harbor": "Hancock", + "stonington": "Hancock", + "sullivan": "Hancock", + "surry": "Hancock", + "swans island": "Hancock", + "tremont": "Hancock", + "trenton": "Hancock", + "verona island": "Hancock", + "waltham": "Hancock", + "winter harbor": "Hancock", + "albion": "Kennebec", + "augusta": "Kennebec", + "belgrade": "Kennebec", + "benton": "Kennebec", + "chelsea": "Kennebec", + "china": "Kennebec", + "clinton": "Kennebec", + "farmingdale": "Kennebec", + "fayette": "Kennebec", + "gardiner": "Kennebec", + "hallowell": "Kennebec", + "litchfield": "Kennebec", + "manchester": "Kennebec", + "monmouth": "Kennebec", + "mount vernon": "Kennebec", + "oakland": "Kennebec", + "pittston": "Kennebec", + "randolph": "Kennebec", + "readfield": "Kennebec", + "rome": "Kennebec", + "sidney": "Kennebec", + "vassalboro": "Kennebec", + "vienna": "Kennebec", + "waterville": "Kennebec", + "wayne": "Kennebec", + "west gardiner": "Kennebec", + "windsor": "Kennebec", + "winslow": "Kennebec", + "winthrop": "Kennebec", + "appleton": "Knox", + "camden": "Knox", + "cushing": "Knox", + "friendship": "Knox", + "hope": "Knox", + "isle au haut": "Knox", + "north haven": "Knox", + "owls head": "Knox", + "rockland": "Knox", + "rockport": "Knox", + "saint george": "Knox", + "south thomaston": "Knox", + "thomaston": "Knox", + "union": "Knox", + "vinalhaven": "Knox", + "warren": "Knox", + "washington": "Knox", + "alna": "Lincoln", + "boothbay": "Lincoln", + "boothbay harbor": "Lincoln", + "bremen": "Lincoln", + "bristol": "Lincoln", + "damariscotta": "Lincoln", + "dresden": "Lincoln", + "edgecomb": "Lincoln", + "jefferson": "Lincoln", + "newcastle": "Lincoln", + "nobleboro": "Lincoln", + "somerville": "Lincoln", + "south bristol": "Lincoln", + "southport": "Lincoln", + "waldoboro": "Lincoln", + "westport island": "Lincoln", + "whitefield": "Lincoln", + "wiscasset": "Lincoln", + "albany": "Oxford", + "andover": "Oxford", + "bethel": "Oxford", + "brownfield": "Oxford", + "buckfield": "Oxford", + "byron": "Oxford", + "canton": "Oxford", + "denmark": "Oxford", + "dixfield": "Oxford", + "fryeburg": "Oxford", + "gilead": "Oxford", + "greenwood": "Oxford", + "hanover": "Oxford", + "hartford": "Oxford", + "hebron": "Oxford", + "hiram": "Oxford", + "lovell": "Oxford", + "mexico": "Oxford", + "newry": "Oxford", + "norway": "Oxford", + "oxford": "Oxford", + "paris": "Oxford", + "peru": "Oxford", + "porter": "Oxford", + "roxbury": "Oxford", + "rumford": "Oxford", + "stoneham": "Oxford", + "stow": "Oxford", + "sumner": "Oxford", + "sweden": "Oxford", + "upton": "Oxford", + "waterford": "Oxford", + "west paris": "Oxford", + "woodstock": "Oxford", + "milton": "Oxford", + "alton": "Penobscot", + "bangor": "Penobscot", + "bradford": "Penobscot", + "bradley": "Penobscot", + "brewer": "Penobscot", + "burlington": "Penobscot", + "carmel": "Penobscot", + "charleston": "Penobscot", + "chester": "Penobscot", + "clifton": "Penobscot", + "corinna": "Penobscot", + "corinth": "Penobscot", + "dexter": "Penobscot", + "dixmont": "Penobscot", + "east millinocket": "Penobscot", + "eddington": "Penobscot", + "edinburg": "Penobscot", + "enfield": "Penobscot", + "etna": "Penobscot", + "exeter": "Penobscot", + "garland": "Penobscot", + "glenburn": "Penobscot", + "greenbush": "Penobscot", + "greenfield": "Penobscot", + "hampden": "Penobscot", + "holden": "Penobscot", + "howland": "Penobscot", + "hudson": "Penobscot", + "kenduskeag": "Penobscot", + "lagrange": "Penobscot", + "lee": "Penobscot", + "levant": "Penobscot", + "lincoln": "Penobscot", + "lowell": "Penobscot", + "mattawamkeag": "Penobscot", + "maxfield": "Penobscot", + "medway": "Penobscot", + "milford": "Penobscot", + "millinocket": "Penobscot", + "newburgh": "Penobscot", + "newport": "Penobscot", + "old town": "Penobscot", + "orono": "Penobscot", + "orrington": "Penobscot", + "passadumkeag": "Penobscot", + "patten": "Penobscot", + "plymouth": "Penobscot", + "prentiss": "Penobscot", + "stetson": "Penobscot", + "springfield": "Penobscot", + "stacyville": "Penobscot", + "veazie": "Penobscot", + "winn": "Penobscot", + "woodville": "Penobscot", + "mount chase": "Penobscot", + "abbot": "Piscataquis", + "atkinson": "Piscataquis", + "beaver cove": "Piscataquis", + "bowerbank": "Piscataquis", + "brownville": "Piscataquis", + "dover-foxcroft": "Piscataquis", + "greenville": "Piscataquis", + "guilford": "Piscataquis", + "medford": "Piscataquis", + "milo": "Piscataquis", + "monson": "Piscataquis", + "parkman": "Piscataquis", + "sangerville": "Piscataquis", + "sebec": "Piscataquis", + "shirley": "Piscataquis", + "wellington": "Piscataquis", + "willimantic": "Piscataquis", + "arrowsic": "Sagadahoc", + "bath": "Sagadahoc", + "bowdoin": "Sagadahoc", + "bowdoinham": "Sagadahoc", + "georgetown": "Sagadahoc", + "phippsburg": "Sagadahoc", + "richmond": "Sagadahoc", + "topsham": "Sagadahoc", + "west bath": "Sagadahoc", + "woolwich": "Sagadahoc", + "anson": "Somerset", + "athens": "Somerset", + "bingham": "Somerset", + "cambridge": "Somerset", + "canaan": "Somerset", + "caratunk": "Somerset", + "cornville": "Somerset", + "detroit": "Somerset", + "embden": "Somerset", + "fairfield": "Somerset", + "harmony": "Somerset", + "hartland": "Somerset", + "jackman": "Somerset", + "madison": "Somerset", + "mercer": "Somerset", + "moscow": "Somerset", + "new portland": "Somerset", + "norridgewock": "Somerset", + "palmyra": "Somerset", + "pittsfield": "Somerset", + "ripley": "Somerset", + "saint albans": "Somerset", + "skowhegan": "Somerset", + "smithfield": "Somerset", + "solon": "Somerset", + "starks": "Somerset", + "belfast": "Waldo", + "belmont": "Waldo", + "brooks": "Waldo", + "burnham": "Waldo", + "frankfort": "Waldo", + "freedom": "Waldo", + "islesboro": "Waldo", + "jackson": "Waldo", + "knox": "Waldo", + "liberty": "Waldo", + "lincolnville": "Waldo", + "monroe": "Waldo", + "montville": "Waldo", + "morrill": "Waldo", + "northport": "Waldo", + "palermo": "Waldo", + "prospect": "Waldo", + "searsmont": "Waldo", + "searsport": "Waldo", + "stockton springs": "Waldo", + "swanville": "Waldo", + "thorndike": "Waldo", + "troy": "Waldo", + "unity": "Waldo", + "waldo": "Waldo", + "winterport": "Waldo", + "addison": "Washington", + "alexander": "Washington", + "baileyville": "Washington", + "beals": "Washington", + "beddington": "Washington", + "calais": "Washington", + "centerville": "Washington", + "charlotte": "Washington", + "cherryfield": "Washington", + "columbia": "Washington", + "columbia falls": "Washington", + "cooper": "Washington", + "crawford": "Washington", + "cutler": "Washington", + "danforth": "Washington", + "deblois": "Washington", + "dennysville": "Washington", + "east machias": "Washington", + "eastport": "Washington", + "harrington": "Washington", + "jonesboro": "Washington", + "jonesport": "Washington", + "lubec": "Washington", + "machias": "Washington", + "machiasport": "Washington", + "marion": "Washington", + "marshfield": "Washington", + "meddybemps": "Washington", + "milbridge": "Washington", + "northfield": "Washington", + "pembroke": "Washington", + "perry": "Washington", + "princeton": "Washington", + "robbinston": "Washington", + "roque bluffs": "Washington", + "steuben": "Washington", + "talmadge": "Washington", + "topsfield": "Washington", + "vanceboro": "Washington", + "waite": "Washington", + "wesley": "Washington", + "whiting": "Washington", + "whitneyville": "Washington", + "acton": "York", + "alfred": "York", + "arundel": "York", + "berwick": "York", + "biddeford": "York", + "buxton": "York", + "cornish": "York", + "dayton": "York", + "eliot": "York", + "hollis": "York", + "kennebunk": "York", + "kennebunkport": "York", + "kittery": "York", + "lebanon": "York", + "limerick": "York", + "limington": "York", + "lyman": "York", + "newfield": "York", + "north berwick": "York", + "ogunquit": "York", + "old orchard beach": "York", + "parsonsfield": "York", + "saco": "York", + "sanford": "York", + "shapleigh": "York", + "south berwick": "York", + "waterboro": "York", + "wells": "York", + "york": "York", + }, + "NH": { + "acworth": "Sullivan", + "albany": "Carroll", + "alexandria": "Grafton", + "allenstown": "Merrimack", + "alstead": "Cheshire", + "alton": "Belknap", + "amherst": "Hillsborough", + "andover": "Merrimack", + "antrim": "Hillsborough", + "ashland": "Grafton", + "atkinson": "Rockingham", + "auburn": "Rockingham", + "barnstead": "Belknap", + "barrington": "Strafford", + "bartlett": "Carroll", + "bath": "Grafton", + "bedford": "Hillsborough", + "belmont": "Belknap", + "bennington": "Hillsborough", + "benton": "Grafton", + "berlin": "Coos", + "bethlehem": "Grafton", + "boscawen": "Merrimack", + "bow": "Merrimack", + "bradford": "Merrimack", + "brentwood": "Rockingham", + "bridgewater": "Grafton", + "bristol": "Grafton", + "brookfield": "Carroll", + "brookline": "Hillsborough", + "campton": "Grafton", + "canaan": "Grafton", + "candia": "Rockingham", + "canterbury": "Merrimack", + "carroll": "Coos", + "center harbor": "Belknap", + "charlestown": "Sullivan", + "chatham": "Carroll", + "chester": "Rockingham", + "chesterfield": "Cheshire", + "chichester": "Merrimack", + "claremont": "Sullivan", + "clarksville": "Coos", + "colebrook": "Coos", + "columbia": "Coos", + "concord": "Merrimack", + "conway": "Carroll", + "cornish": "Sullivan", + "croydon": "Sullivan", + "dalton": "Coos", + "danbury": "Merrimack", + "danville": "Rockingham", + "deerfield": "Rockingham", + "deering": "Hillsborough", + "derry": "Rockingham", + "dorchester": "Grafton", + "dover": "Strafford", + "dublin": "Cheshire", + "dummer": "Coos", + "dunbarton": "Merrimack", + "durham": "Strafford", + "east kingston": "Rockingham", + "easton": "Grafton", + "eaton": "Carroll", + "effingham": "Carroll", + "ellsworth": "Grafton", + "enfield": "Grafton", + "epping": "Rockingham", + "epsom": "Merrimack", + "errol": "Coos", + "exeter": "Rockingham", + "farmington": "Strafford", + "fitzwilliam": "Cheshire", + "francestown": "Hillsborough", + "franconia": "Grafton", + "franklin": "Merrimack", + "freedom": "Carroll", + "fremont": "Rockingham", + "gilford": "Belknap", + "gilmanton": "Belknap", + "gilsum": "Cheshire", + "goffstown": "Hillsborough", + "gorham": "Coos", + "goshen": "Sullivan", + "grafton": "Grafton", + "grantham": "Sullivan", + "greenfield": "Hillsborough", + "greenland": "Rockingham", + "greenville": "Hillsborough", + "groton": "Grafton", + "hampstead": "Rockingham", + "hampton": "Rockingham", + "hampton falls": "Rockingham", + "hancock": "Hillsborough", + "hanover": "Grafton", + "harrisville": "Cheshire", + "hart's location": "Carroll", + "haverhill": "Grafton", + "hebron": "Grafton", + "henniker": "Merrimack", + "hill": "Merrimack", + "hillsborough": "Hillsborough", + "hinsdale": "Cheshire", + "holderness": "Grafton", + "hollis": "Hillsborough", + "hooksett": "Merrimack", + "hopkinton": "Merrimack", + "hudson": "Hillsborough", + "jackson": "Carroll", + "jaffrey": "Cheshire", + "jefferson": "Coos", + "keene": "Cheshire", + "kensington": "Rockingham", + "kingston": "Rockingham", + "laconia": "Belknap", + "lancaster": "Coos", + "landaff": "Grafton", + "langdon": "Sullivan", + "lee": "Strafford", + "lempster": "Sullivan", + "lincoln": "Grafton", + "lisbon": "Grafton", + "litchfield": "Hillsborough", + "littleton": "Grafton", + "londonderry": "Rockingham", + "loudon": "Merrimack", + "lyme": "Grafton", + "lyndeborough": "Hillsborough", + "madbury": "Strafford", + "madison": "Carroll", + "manchester": "Hillsborough", + "marlborough": "Cheshire", + "marlow": "Cheshire", + "mason": "Hillsborough", + "meredith": "Belknap", + "merrimack": "Hillsborough", + "middleton": "Strafford", + "milan": "Coos", + "milford": "Hillsborough", + "millsfield": "Coos", + "milton": "Strafford", + "monroe": "Grafton", + "mont vernon": "Hillsborough", + "moultonborough": "Carroll", + "nashua": "Hillsborough", + "nelson": "Cheshire", + "new boston": "Hillsborough", + "new castle": "Rockingham", + "new durham": "Strafford", + "new hampton": "Belknap", + "new ipswich": "Hillsborough", + "new london": "Merrimack", + "newbury": "Merrimack", + "newfields": "Rockingham", + "newington": "Rockingham", + "newmarket": "Rockingham", + "newport": "Sullivan", + "newton": "Rockingham", + "north hampton": "Rockingham", + "northfield": "Merrimack", + "northumberland": "Coos", + "northwood": "Rockingham", + "nottingham": "Rockingham", + "orange": "Grafton", + "orford": "Grafton", + "ossipee": "Carroll", + "pelham": "Hillsborough", + "pembroke": "Merrimack", + "peterborough": "Hillsborough", + "piermont": "Grafton", + "pittsburg": "Coos", + "pittsfield": "Merrimack", + "plainfield": "Sullivan", + "plaistow": "Rockingham", + "plymouth": "Grafton", + "portsmouth": "Rockingham", + "randolph": "Coos", + "raymond": "Rockingham", + "richmond": "Cheshire", + "rindge": "Cheshire", + "rochester": "Strafford", + "rollinsford": "Strafford", + "roxbury": "Cheshire", + "rumney": "Grafton", + "rye": "Rockingham", + "salem": "Rockingham", + "salisbury": "Merrimack", + "sanbornton": "Belknap", + "sandown": "Rockingham", + "sandwich": "Carroll", + "seabrook": "Rockingham", + "sharon": "Hillsborough", + "shelburne": "Coos", + "somersworth": "Strafford", + "south hampton": "Rockingham", + "springfield": "Sullivan", + "stark": "Coos", + "stewartstown": "Coos", + "stoddard": "Cheshire", + "strafford": "Strafford", + "stratford": "Coos", + "stratham": "Rockingham", + "sugar hill": "Grafton", + "sunapee": "Sullivan", + "surry": "Cheshire", + "sutton": "Merrimack", + "swanzey": "Cheshire", + "tamworth": "Carroll", + "temple": "Hillsborough", + "thornton": "Grafton", + "tilton": "Belknap", + "troy": "Cheshire", + "tuftonboro": "Carroll", + "unity": "Sullivan", + "wakefield": "Carroll", + "walpole": "Cheshire", + "warner": "Merrimack", + "warren": "Grafton", + "washington": "Sullivan", + "waterville valley": "Grafton", + "weare": "Hillsborough", + "webster": "Merrimack", + "wentworth": "Grafton", + "westmoreland": "Cheshire", + "whitefield": "Coos", + "wilmot": "Merrimack", + "wilton": "Hillsborough", + "winchester": "Cheshire", + "windham": "Rockingham", + "windsor": "Hillsborough", + "wolfeboro": "Carroll", + "woodstock": "Grafton", + }, + "RI": { + "barrington": "Bristol", + "bristol": "Bristol", + "warren": "Bristol", + "coventry": "Kent", + "east greenwich": "Kent", + "warwick": "Kent", + "west greenwich": "Kent", + "west warwick": "Kent", + "jamestown": "Newport", + "little compton": "Newport", + "middletown": "Newport", + "newport": "Newport", + "portsmouth": "Newport", + "tiverton": "Newport", + "burrillville": "Providence", + "central falls": "Providence", + "cranston": "Providence", + "cumberland": "Providence", + "east providence": "Providence", + "foster": "Providence", + "glocester": "Providence", + "johnston": "Providence", + "lincoln": "Providence", + "north providence": "Providence", + "north smithfield": "Providence", + "pawtucket": "Providence", + "providence": "Providence", + "scituate": "Providence", + "smithfield": "Providence", + "woonsocket": "Providence", + "charlestown": "Washington", + "exeter": "Washington", + "hopkinton": "Washington", + "narragansett": "Washington", + "new shoreham": "Washington", + "north kingstown": "Washington", + "richmond": "Washington", + "south kingstown": "Washington", + "westerly": "Washington", + }, + "VT": { + "addison": "Addison", + "bridport": "Addison", + "bristol": "Addison", + "cornwall": "Addison", + "ferrisburgh": "Addison", + "goshen": "Addison", + "granville": "Addison", + "hancock": "Addison", + "leicester": "Addison", + "lincoln": "Addison", + "middlebury": "Addison", + "monkton": "Addison", + "new haven": "Addison", + "orwell": "Addison", + "panton": "Addison", + "ripton": "Addison", + "salisbury": "Addison", + "shoreham": "Addison", + "starksboro": "Addison", + "vergennes": "Addison", + "waltham": "Addison", + "weybridge": "Addison", + "whiting": "Addison", + "arlington": "Bennington", + "bennington": "Bennington", + "dorset": "Bennington", + "glastenbury": "Bennington", + "landgrove": "Bennington", + "manchester": "Bennington", + "peru": "Bennington", + "pownal": "Bennington", + "readsboro": "Bennington", + "rupert": "Bennington", + "sandgate": "Bennington", + "searsburg": "Bennington", + "shaftsbury": "Bennington", + "stamford": "Bennington", + "sunderland": "Bennington", + "winhall": "Bennington", + "woodford": "Bennington", + "barnet": "Caledonia", + "burke": "Caledonia", + "danville": "Caledonia", + "groton": "Caledonia", + "hardwick": "Caledonia", + "kirby": "Caledonia", + "lyndon": "Caledonia", + "newark": "Caledonia", + "peacham": "Caledonia", + "ryegate": "Caledonia", + "sheffield": "Caledonia", + "st. johnsbury": "Caledonia", + "st johnsbury": "Caledonia", + "stannard": "Caledonia", + "sutton": "Caledonia", + "walden": "Caledonia", + "waterford": "Caledonia", + "wheelock": "Caledonia", + "bolton": "Chittenden", + "burlington": "Chittenden", + "charlotte": "Chittenden", + "colchester": "Chittenden", + "essex": "Chittenden", + "essex junction": "Chittenden", + "hinesburg": "Chittenden", + "huntington": "Chittenden", + "jericho": "Chittenden", + "milton": "Chittenden", + "richmond": "Chittenden", + "shelburne": "Chittenden", + "south burlington": "Chittenden", + "underhill": "Chittenden", + "westford": "Chittenden", + "williston": "Chittenden", + "winooski": "Chittenden", + "averill": "Essex", + "bloomfield": "Essex", + "brighton": "Essex", + "brunswick": "Essex", + "canaan": "Essex", + "concord": "Essex", + "east haven": "Essex", + "ferdinand": "Essex", + "granby": "Essex", + "guildhall": "Essex", + "lemington": "Essex", + "lunenburg": "Essex", + "maidstone": "Essex", + "norton": "Essex", + "victory": "Essex", + "bakersfield": "Franklin", + "berkshire": "Franklin", + "enosburg": "Franklin", + "enosburg falls": "Franklin", + "fairfax": "Franklin", + "fairfield": "Franklin", + "fletcher": "Franklin", + "franklin": "Franklin", + "georgia": "Franklin", + "highgate": "Franklin", + "montgomery": "Franklin", + "richford": "Franklin", + "sheldon": "Franklin", + "st. albans": "Franklin", + "st albans": "Franklin", + "swanton": "Franklin", + "alburgh": "Grand Isle", + "grand isle": "Grand Isle", + "isle la motte": "Grand Isle", + "north hero": "Grand Isle", + "south hero": "Grand Isle", + "belvidere": "Lamoille", + "cambridge": "Lamoille", + "eden": "Lamoille", + "elmore": "Lamoille", + "hyde park": "Lamoille", + "johnson": "Lamoille", + "morristown": "Lamoille", + "morrisville": "Lamoille", + "stowe": "Lamoille", + "waterville": "Lamoille", + "wolcott": "Lamoille", + "bradford": "Orange", + "braintree": "Orange", + "brookfield": "Orange", + "chelsea": "Orange", + "corinth": "Orange", + "fairlee": "Orange", + "newbury": "Orange", + "orange": "Orange", + "randolph": "Orange", + "strafford": "Orange", + "thetford": "Orange", + "topsham": "Orange", + "tunbridge": "Orange", + "vershire": "Orange", + "washington": "Orange", + "west fairlee": "Orange", + "williamstown": "Orange", + "albany": "Orleans", + "barton": "Orleans", + "brownington": "Orleans", + "charleston": "Orleans", + "coventry": "Orleans", + "craftsbury": "Orleans", + "derby": "Orleans", + "glover": "Orleans", + "greensboro": "Orleans", + "holland": "Orleans", + "irasburgh": "Orleans", + "jay": "Orleans", + "lowell": "Orleans", + "morgan": "Orleans", + "newport": "Orleans", + "troy": "Orleans", + "westfield": "Orleans", + "westmore": "Orleans", + "benson": "Rutland", + "brandon": "Rutland", + "castleton": "Rutland", + "chittenden": "Rutland", + "clarendon": "Rutland", + "danby": "Rutland", + "fair haven": "Rutland", + "hubbardton": "Rutland", + "ira": "Rutland", + "killington": "Rutland", + "mendon": "Rutland", + "middletown springs": "Rutland", + "mount holly": "Rutland", + "mount tabor": "Rutland", + "pawlet": "Rutland", + "pittsfield": "Rutland", + "pittsford": "Rutland", + "poultney": "Rutland", + "proctor": "Rutland", + "rutland": "Rutland", + "shrewsbury": "Rutland", + "sudbury": "Rutland", + "tinmouth": "Rutland", + "wallingford": "Rutland", + "west haven": "Rutland", + "west rutland": "Rutland", + "barre": "Washington", + "berlin": "Washington", + "cabot": "Washington", + "calais": "Washington", + "duxbury": "Washington", + "east montpelier": "Washington", + "fayston": "Washington", + "marshfield": "Washington", + "middlesex": "Washington", + "montpelier": "Washington", + "moretown": "Washington", + "northfield": "Washington", + "plainfield": "Washington", + "roxbury": "Washington", + "waitsfield": "Washington", + "warren": "Washington", + "waterbury": "Washington", + "woodbury": "Washington", + "worcester": "Washington", + "athens": "Windham", + "brattleboro": "Windham", + "brookline": "Windham", + "dover": "Windham", + "dummerston": "Windham", + "grafton": "Windham", + "guilford": "Windham", + "halifax": "Windham", + "jamaica": "Windham", + "londonderry": "Windham", + "marlboro": "Windham", + "newfane": "Windham", + "putney": "Windham", + "rockingham": "Windham", + "somerset": "Windham", + "stratton": "Windham", + "townshend": "Windham", + "vernon": "Windham", + "wardsboro": "Windham", + "westminster": "Windham", + "whitingham": "Windham", + "wilmington": "Windham", + "windham": "Windham", + "andover": "Windsor", + "baltimore": "Windsor", + "barnard": "Windsor", + "bethel": "Windsor", + "bridgewater": "Windsor", + "cavendish": "Windsor", + "chester": "Windsor", + "hartford": "Windsor", + "hartland": "Windsor", + "ludlow": "Windsor", + "norwich": "Windsor", + "plymouth": "Windsor", + "pomfret": "Windsor", + "reading": "Windsor", + "rochester": "Windsor", + "royalton": "Windsor", + "sharon": "Windsor", + "springfield": "Windsor", + "stockbridge": "Windsor", + "weathersfield": "Windsor", + "weston": "Windsor", + "west windsor": "Windsor", + "windsor": "Windsor", + "woodstock": "Windsor", + }, +} + + +def resolve_county_from_service_area(service_area: str, state_abbr: str) -> str | None: + """ + Try to resolve a county name from a service area text string. + + Strategy: + 1. Check for direct county name mentions (e.g. "Throughout Rockingham County") + 2. Tokenize and check each token against TOWN_COUNTY_MAP + + Args: + service_area: Free-text service area from CheapestOil + state_abbr: Two-letter state code + + Returns: + County name string or None if no match found. + """ + if not service_area or state_abbr not in TOWN_COUNTY_MAP: + return None + + text = service_area.strip() + text_lower = text.lower() + + # 1. Check for direct county name mentions + counties = _NE_COUNTIES.get(state_abbr, []) + for county in counties: + if county.lower() in text_lower: + return county + + # 2. Tokenize and check against town map + town_map = TOWN_COUNTY_MAP[state_abbr] + # Split on common delimiters: commas, "and", semicolons, slashes + tokens = re.split(r'[,;/&]+|\band\b', text_lower) + for token in tokens: + token = token.strip().rstrip('.') + if not token: + continue + # Direct match + if token in town_map: + return town_map[token] + # Try without common prefixes/suffixes like "greater", "area", "surrounding" + for prefix in ("greater ", "the ", "town of ", "city of "): + if token.startswith(prefix): + cleaned = token[len(prefix):] + if cleaned in town_map: + return town_map[cleaned] + + return None diff --git a/docker-compose.yml b/docker-compose.yml index 83f9af3..1f526dc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,22 +1,11 @@ services: app: - build: . # Build the image from the Dockerfile in the current directory + build: . container_name: fuel_scraper_app_service + ports: + - "9553:9553" volumes: - # Mount current directory for live code updates during development - .:/app - # If your app needs to connect to a DB on the host, and host.docker.internal - # isn't working, you might need to add it to the host network (less secure, platform-dependent) - # or use 'extra_hosts' on Linux. - # For host.docker.internal to work on Linux, you might need: extra_hosts: - "host.docker.internal:host-gateway" - #environment: - # You can pass DATABASE_URL here to override database.py if needed - # DATABASE_URL: "postgresql://your_user:your_password@host.docker.internal:5432/fuelprices" - # PYTHONUNBUFFERED: 1 # Already in Dockerfile, but good practice - # The default command comes from the Dockerfile's CMD - # To keep the container running after the script finishes (for debugging or exec): - # tty: true - # stdin_open: true \ No newline at end of file diff --git a/fuel_scraper.py b/fuel_scraper.py deleted file mode 100644 index 1193362..0000000 --- a/fuel_scraper.py +++ /dev/null @@ -1,360 +0,0 @@ -#!/usr/bin/env python3 -import requests -from bs4 import BeautifulSoup -from datetime import datetime -import logging -import os -import re # For parsing zone number from slug - -from sqlalchemy.orm import Session -from database import SessionLocal, init_db -import models - -# --- SITES CONFIGURATION --- -SITES_CONFIG = [ - { - "site_name": "NewEnglandOil", - "base_url": "https://www.newenglandoil.com", - "url_template": "{base_url}/{state_slug}/{zone_slug}.asp?type={oil_type}", - "oil_type": 0, - "locations": { - "connecticut": [ - "zone1", "zone2", "zone3", "zone4", "zone5", "zone6", "zone7", - "zone8", "zone9", "zone10" - ], - "massachusetts": [ - "zone1", "zone2", "zone3", "zone4", "zone5", "zone6", - "zone7", "zone8", "zone9", "zone10", "zone11", "zone12", - "zone13","zone14","zone15" - ], - "newhampshire": [ - "zone1", "zone2", "zone3", "zone4", "zone5", "zone6" - ], - "rhodeisland": [ - "zone1", "zone2", "zone3", "zone4" - ], - - - } - }, - { - "site_name": "MaineOil", - "base_url": "https://www.maineoil.com", - "url_template": "{base_url}/{zone_slug}.asp?type={oil_type}", - "oil_type": 0, - "locations": { - "maine": [ - "zone1", "zone2", "zone3", "zone4", "zone5", - "zone6", "zone7" - ] - } - } -] - -# --- ZONE-TO-COUNTY MAPPING --- -# Maps (state_key, zone_number) -> (state_abbrev, county_name) -ZONE_COUNTY_MAP = { - ("connecticut", 1): ("CT", "New London"), - ("connecticut", 2): ("CT", "Windham"), - ("connecticut", 3): ("CT", "New Haven"), - ("connecticut", 4): ("CT", "Middlesex"), - ("connecticut", 5): ("CT", "New Haven"), - ("connecticut", 6): ("CT", "Hartford"), - ("connecticut", 7): ("CT", "Litchfield"), - ("connecticut", 8): ("CT", "Fairfield"), - ("connecticut", 9): ("CT", "Tolland"), - ("connecticut", 10): ("CT", "Litchfield"), - ("massachusetts", 1): ("MA", "Suffolk"), - ("massachusetts", 2): ("MA", "Middlesex"), - ("massachusetts", 3): ("MA", "Norfolk"), - ("massachusetts", 4): ("MA", "Plymouth"), - ("massachusetts", 5): ("MA", "Middlesex"), - ("massachusetts", 6): ("MA", "Bristol"), - ("massachusetts", 7): ("MA", "Barnstable"), - ("massachusetts", 8): ("MA", "Essex"), - ("massachusetts", 9): ("MA", "Essex"), - ("massachusetts", 10): ("MA", "Worcester"), - ("massachusetts", 11): ("MA", "Worcester"), - ("massachusetts", 12): ("MA", "Hampshire"), - ("massachusetts", 13): ("MA", "Hampden"), - ("massachusetts", 14): ("MA", "Franklin"), - ("massachusetts", 15): ("MA", "Berkshire"), - ("newhampshire", 1): ("NH", "Coos"), - ("newhampshire", 2): ("NH", "Strafford"), - ("newhampshire", 3): ("NH", "Merrimack"), - ("newhampshire", 4): ("NH", "Grafton"), - ("newhampshire", 5): ("NH", "Cheshire"), - ("newhampshire", 6): ("NH", "Hillsborough"), - ("rhodeisland", 1): ("RI", "Newport"), - ("rhodeisland", 2): ("RI", "Providence"), - ("rhodeisland", 3): ("RI", "Washington"), - ("rhodeisland", 4): ("RI", "Kent"), - ("maine", 1): ("ME", "Cumberland"), - ("maine", 2): ("ME", "Kennebec"), - ("maine", 3): ("ME", "Androscoggin"), - ("maine", 4): ("ME", "York"), - ("maine", 5): ("ME", "Knox"), - ("maine", 6): ("ME", "Penobscot"), - ("maine", 7): ("ME", "Washington"), -} - -LOG_FILE = "oil_scraper.log" -logging.basicConfig( - filename=LOG_FILE, - level=logging.INFO, - format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s' -) - -# --- Helper Functions --- -def make_request(url): - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' - } - try: - response = requests.get(url, headers=headers, timeout=20) - response.raise_for_status() - return BeautifulSoup(response.content, 'html.parser') - except requests.exceptions.RequestException as e: - logging.error(f"Error fetching {url}: {e}") - return None - -def parse_zone_slug_to_int(zone_slug_str): - """Extracts the numeric part of a zone slug (e.g., "zone1" -> 1, "zonema5" -> 5).""" - if not zone_slug_str: return None - match = re.search(r'\d+$', zone_slug_str) - if match: - return int(match.group(0)) - logging.warning(f"Could not parse numeric zone from slug: '{zone_slug_str}'") - return None - -def parse_price_table(soup, state_name_key, zone_slug_str): - """Parses price tables. state_name_key is "connecticut", "maine", etc. zone_slug_str is "zone1", "zonema5", etc.""" - data_dicts = [] - all_tables_on_page = soup.find_all('table') - logging.info(f"Found {len(all_tables_on_page)} table(s) on page for {state_name_key} - {zone_slug_str}.") - - if not all_tables_on_page: - logging.warning(f"No HTML tables found at all for {state_name_key} - {zone_slug_str}.") - return data_dicts - - # --- Convert zone_slug_str to integer --- - zone_int = parse_zone_slug_to_int(zone_slug_str) - if zone_int is None: - logging.error(f"Cannot parse zone number for {state_name_key} - {zone_slug_str}. Skipping.") - return data_dicts - - candidate_tables_found = 0 - for table_index, table in enumerate(all_tables_on_page): - thead = table.find('thead') - is_price_table = False - actual_column_indices = {} - - if thead: - headers_lower = [th.get_text(strip=True).lower() for th in thead.find_all('th')] - logging.debug(f"Table {table_index} on {state_name_key}/{zone_slug_str} - headers: {headers_lower}") - try: - actual_column_indices['company'] = headers_lower.index('company name') - price_col_name_part = 'price' - actual_column_indices['price'] = next(i for i, header in enumerate(headers_lower) if price_col_name_part in header) - actual_column_indices['date'] = headers_lower.index('date') - is_price_table = True - logging.debug(f"Table {table_index} identified as price table. Indices: {actual_column_indices}") - except (ValueError, StopIteration): - logging.debug(f"Table {table_index} headers do not contain all key columns.") - else: - logging.debug(f"Table {table_index} has no thead.") - - if not is_price_table: - continue - - candidate_tables_found += 1 - tbody = table.find('tbody') - if not tbody: - logging.warning(f"Price table identified by headers has no tbody. Skipping. State: {state_name_key}, Zone: {zone_slug_str}") - continue - rows = tbody.find_all('tr') - if not rows: - logging.debug(f"No rows found in tbody for price table in {state_name_key}/{zone_slug_str}") - continue - - for row_index, row in enumerate(rows): - cells = row.find_all('td') - max_required_index = max(actual_column_indices.values()) if actual_column_indices else -1 - - if max_required_index == -1: - logging.error(f"Logic error: is_price_table true but no column indices for {state_name_key}/{zone_slug_str}") - continue - - if len(cells) > max_required_index: - company_name_scraped = cells[actual_column_indices['company']].get_text(strip=True) - price_str = cells[actual_column_indices['price']].get_text(strip=True) - date_posted_str = cells[actual_column_indices['date']].get_text(strip=True) - - company_link = cells[actual_column_indices['company']].find('a') - if company_link: - company_name_scraped = company_link.get_text(strip=True) - - price_float = None - try: - cleaned_price_str = ''.join(filter(lambda x: x.isdigit() or x == '.', price_str)) - if cleaned_price_str: - price_float = float(cleaned_price_str) - except ValueError: - logging.warning(f"Could not parse price: '{price_str}' for {company_name_scraped} in {state_name_key}/{zone_slug_str}.") - except Exception as e: - logging.error(f"Unexpected error parsing price: '{price_str}' for {company_name_scraped}. Error: {e}") - - data_dicts.append({ - "state": state_name_key.capitalize(), # Use the passed state_name_key - "zone": zone_int, # Use the parsed integer zone - "name": company_name_scraped, - "price": price_float, - "date": date_posted_str, - }) - elif len(cells) > 0: - logging.warning(f"Skipping row {row_index+1} with insufficient cells ({len(cells)}, need {max_required_index+1}) in {state_name_key}/{zone_slug_str}") - - if candidate_tables_found == 0: - logging.warning(f"No tables matching expected price table structure found for {state_name_key} - {zone_slug_str}.") - return data_dicts - -# --- Helper: Build county lookup --- -def build_county_lookup(db_session): - """Build (state_abbrev, county_name) -> county_id lookup from DB.""" - counties = db_session.query(models.County).all() - lookup = {} - for c in counties: - lookup[(c.state, c.name)] = c.id - logging.info(f"Built county lookup with {len(lookup)} entries") - return lookup - - -def resolve_county_id(state_key, zone_number, county_lookup): - """Resolve county_id from ZONE_COUNTY_MAP and county lookup.""" - mapping = ZONE_COUNTY_MAP.get((state_key, zone_number)) - if not mapping: - return None - state_abbrev, county_name = mapping - return county_lookup.get((state_abbrev, county_name)) - - -# --- Main Script --- -def main(): - logging.info("Starting oil price scraper job.") - try: - init_db() - logging.info("Database initialized/checked successfully.") - except Exception as e: - logging.error(f"Failed to initialize database: {e}", exc_info=True) - return - - db_session: Session = SessionLocal() - total_records_added_this_run = 0 - - try: - # Build county lookup at startup - county_lookup = build_county_lookup(db_session) - - for site_config in SITES_CONFIG: - site_name = site_config["site_name"] - base_url = site_config["base_url"] - url_template = site_config["url_template"] - oil_type = site_config["oil_type"] - - logging.info(f"--- Processing site: {site_name} ---") - - for state_key_in_config, zone_slugs_list in site_config["locations"].items(): - - for zone_slug_from_list in zone_slugs_list: - format_params = { - "base_url": base_url, - "state_slug": state_key_in_config, - "zone_slug": zone_slug_from_list, - "oil_type": oil_type - } - target_url = url_template.format(**format_params) - - logging.info(f"Scraping: {target_url} (State: {state_key_in_config}, Zone Slug: {zone_slug_from_list})") - - soup = make_request(target_url) - if soup: - parsed_items = parse_price_table(soup, state_key_in_config, zone_slug_from_list) - - if parsed_items: - # Resolve county_id for this zone - zone_int = parse_zone_slug_to_int(zone_slug_from_list) - county_id = None - if zone_int is not None: - county_id = resolve_county_id(state_key_in_config, zone_int, county_lookup) - - for item_dict in parsed_items: - # Match by county_id when available to avoid duplicates - # when multiple zones map to the same county - if county_id is not None: - existing_record = db_session.query(models.OilPrice).filter( - models.OilPrice.name == item_dict["name"], - models.OilPrice.state == item_dict["state"], - models.OilPrice.county_id == county_id - ).first() - else: - existing_record = db_session.query(models.OilPrice).filter( - models.OilPrice.name == item_dict["name"], - models.OilPrice.state == item_dict["state"], - models.OilPrice.zone == item_dict["zone"] - ).first() - - if existing_record: - if existing_record.company_id is not None: - logging.debug(f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} due to non-null company_id") - else: - updated = False - if county_id is not None and existing_record.county_id != county_id: - existing_record.county_id = county_id - updated = True - if existing_record.price != item_dict["price"]: - existing_record.price = item_dict["price"] - existing_record.date = item_dict["date"] - existing_record.scrapetimestamp = datetime.utcnow() - logging.info(f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} to {item_dict['price']}") - elif updated: - existing_record.scrapetimestamp = datetime.utcnow() - logging.info(f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} to {county_id}") - else: - logging.debug(f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}") - else: - oil_price_record = models.OilPrice( - state=item_dict["state"], - zone=item_dict["zone"], - name=item_dict["name"], - price=item_dict["price"], - date=item_dict["date"], - county_id=county_id, - scrapetimestamp=datetime.utcnow() - ) - db_session.add(oil_price_record) - logging.info(f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} (county_id={county_id})") - total_records_added_this_run += len(parsed_items) - logging.info(f"Queued {len(parsed_items)} records from {site_name} - {state_key_in_config}/{zone_slug_from_list} for DB insertion.") - else: - logging.info(f"No data extracted from {target_url}") - else: - logging.warning(f"Failed to retrieve or parse {target_url}. Skipping.") - - if total_records_added_this_run > 0: - db_session.commit() - logging.info(f"Successfully committed {total_records_added_this_run} records to the database.") - else: - logging.info("No new records were queued for database insertion in this run.") - - except Exception as e: - logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True) - db_session.rollback() - logging.info("Database transaction rolled back due to error.") - finally: - db_session.close() - logging.info("Database session closed.") - - logging.info("Oil price scraper job finished.") - -if __name__ == "__main__": - main() diff --git a/fuel_scraper/db_operations.py b/fuel_scraper/db_operations.py deleted file mode 100644 index 030dce0..0000000 --- a/fuel_scraper/db_operations.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Database operations module for oil price CRUD operations. -""" -import logging -from datetime import datetime -from sqlalchemy.orm import Session - -import sys -import os -# Add parent directory to path for imports -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import models - - -def upsert_oil_price(db_session: Session, item_dict: dict) -> bool: - """ - Insert or update an oil price record. - - Logic: - - Match by (name, state, county_id) when county_id is available to avoid - duplicates when multiple zones map to the same county. - - Fall back to (name, state, zone) when county_id is not available. - - If record exists with non-null company_id: skip (vendor-managed price) - - If record exists with null company_id and different price: update - - If record exists with same price: skip (no change) - - If no record exists: insert new - - Args: - db_session: SQLAlchemy session - item_dict: Dictionary with state, zone, name, price, date, county_id - - Returns: - True if a record was inserted or updated, False otherwise - """ - county_id = item_dict.get("county_id") - - # Check if record already exists - prefer matching by county_id to avoid - # duplicates when multiple zones map to the same county - if county_id is not None: - existing_record = db_session.query(models.OilPrice).filter( - models.OilPrice.name == item_dict["name"], - models.OilPrice.state == item_dict["state"], - models.OilPrice.county_id == county_id - ).first() - else: - existing_record = db_session.query(models.OilPrice).filter( - models.OilPrice.name == item_dict["name"], - models.OilPrice.state == item_dict["state"], - models.OilPrice.zone == item_dict["zone"] - ).first() - - if existing_record: - # Record exists - check if we should update - if existing_record.company_id is not None: - logging.debug( - f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} " - "due to non-null company_id" - ) - return False - - # Always update county_id if we have one and it differs - updated = False - if county_id is not None and existing_record.county_id != county_id: - existing_record.county_id = county_id - updated = True - - # Company ID is null - check if price changed - if existing_record.price != item_dict["price"]: - existing_record.price = item_dict["price"] - existing_record.date = item_dict["date"] - existing_record.scrapetimestamp = datetime.utcnow() - logging.info( - f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} " - f"to {item_dict['price']}" - ) - return True - elif updated: - existing_record.scrapetimestamp = datetime.utcnow() - logging.info( - f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} " - f"to {county_id}" - ) - return True - else: - logging.debug( - f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}" - ) - return False - else: - # No record exists - create new - oil_price_record = models.OilPrice( - state=item_dict["state"], - zone=item_dict["zone"], - name=item_dict["name"], - price=item_dict["price"], - date=item_dict["date"], - county_id=county_id, - scrapetimestamp=datetime.utcnow() - ) - db_session.add(oil_price_record) - logging.info( - f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} " - f"(county_id={county_id})" - ) - return True diff --git a/fuel_scraper/http_client.py b/fuel_scraper/http_client.py deleted file mode 100644 index 4658518..0000000 --- a/fuel_scraper/http_client.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -HTTP client module for making web requests. -""" -import logging -import requests -from bs4 import BeautifulSoup - -# Default headers to mimic a browser -DEFAULT_HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' -} - -REQUEST_TIMEOUT = 20 - - -def make_request(url: str) -> BeautifulSoup | None: - """ - Fetch a URL and return a BeautifulSoup object. - - Args: - url: The URL to fetch - - Returns: - BeautifulSoup object if successful, None otherwise - """ - try: - response = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT) - response.raise_for_status() - return BeautifulSoup(response.content, 'html.parser') - except requests.exceptions.RequestException as e: - logging.error(f"Error fetching {url}: {e}") - return None diff --git a/fuel_scraper/scraper.py b/fuel_scraper/scraper.py deleted file mode 100644 index 08b939b..0000000 --- a/fuel_scraper/scraper.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python3 -""" -Main scraper orchestrator module. -Coordinates fetching, parsing, and storing oil price data. -""" -import logging -import sys -import os - -# Add parent directory to path for imports -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from sqlalchemy.orm import Session -from database import SessionLocal, init_db -import models - -from .config import SITES_CONFIG, ZONE_COUNTY_MAP, setup_logging -from .http_client import make_request -from .parsers import parse_price_table, parse_zone_slug_to_int -from .db_operations import upsert_oil_price - - -def _build_county_lookup(db_session: Session) -> dict: - """ - Build a lookup dict from (state_abbrev, county_name) -> county_id - by querying the county table. - """ - counties = db_session.query(models.County).all() - lookup = {} - for c in counties: - lookup[(c.state, c.name)] = c.id - logging.info(f"Built county lookup with {len(lookup)} entries") - return lookup - - -def _resolve_county_id(state_key: str, zone_number: int, county_lookup: dict) -> int | None: - """ - Resolve a county_id from ZONE_COUNTY_MAP and the county lookup. - Returns None if no mapping exists. - """ - mapping = ZONE_COUNTY_MAP.get((state_key, zone_number)) - if not mapping: - logging.debug(f"No zone-to-county mapping for ({state_key}, {zone_number})") - return None - state_abbrev, county_name = mapping - county_id = county_lookup.get((state_abbrev, county_name)) - if county_id is None: - logging.warning(f"County not found in DB: ({state_abbrev}, {county_name}) for zone ({state_key}, {zone_number})") - return county_id - - -def _scrape_zone( - db_session: Session, - site_name: str, - url_template: str, - base_url: str, - oil_type: int, - state_key: str, - zone_slug: str, - county_lookup: dict -) -> int: - """ - Scrape a single zone and store records. - - Returns: - Number of records processed - """ - format_params = { - "base_url": base_url, - "state_slug": state_key, - "zone_slug": zone_slug, - "oil_type": oil_type - } - target_url = url_template.format(**format_params) - - logging.info(f"Scraping: {target_url} (State: {state_key}, Zone Slug: {zone_slug})") - - soup = make_request(target_url) - if not soup: - logging.warning(f"Failed to retrieve or parse {target_url}. Skipping.") - return 0 - - parsed_items = parse_price_table(soup, state_key, zone_slug) - - if not parsed_items: - logging.info(f"No data extracted from {target_url}") - return 0 - - # Resolve county_id for this zone - zone_number = parse_zone_slug_to_int(zone_slug) - county_id = None - if zone_number is not None: - county_id = _resolve_county_id(state_key, zone_number, county_lookup) - - records_processed = 0 - for item_dict in parsed_items: - item_dict["county_id"] = county_id - if upsert_oil_price(db_session, item_dict): - records_processed += 1 - - logging.info( - f"Processed {len(parsed_items)} records from {site_name} - {state_key}/{zone_slug} " - f"({records_processed} inserted/updated, county_id={county_id})" - ) - - return len(parsed_items) - - -def _scrape_site(db_session: Session, site_config: dict, county_lookup: dict) -> int: - """ - Scrape all zones for a single site. - - Returns: - Total number of records processed - """ - site_name = site_config["site_name"] - base_url = site_config["base_url"] - url_template = site_config["url_template"] - oil_type = site_config["oil_type"] - - logging.info(f"--- Processing site: {site_name} ---") - - total_records = 0 - - for state_key, zone_slugs in site_config["locations"].items(): - for zone_slug in zone_slugs: - records = _scrape_zone( - db_session=db_session, - site_name=site_name, - url_template=url_template, - base_url=base_url, - oil_type=oil_type, - state_key=state_key, - zone_slug=zone_slug, - county_lookup=county_lookup - ) - total_records += records - - return total_records - - -def main(): - """ - Main entry point for the oil price scraper. - - Initializes database, iterates through all configured sites and zones, - scrapes price data, and stores it in the database. - """ - setup_logging() - logging.info("Starting oil price scraper job.") - - # Initialize database - try: - init_db() - logging.info("Database initialized/checked successfully.") - except Exception as e: - logging.error(f"Failed to initialize database: {e}", exc_info=True) - return - - db_session: Session = SessionLocal() - total_records = 0 - - try: - # Build county lookup at startup - county_lookup = _build_county_lookup(db_session) - - # Process each configured site - for site_config in SITES_CONFIG: - records = _scrape_site(db_session, site_config, county_lookup) - total_records += records - - # Commit all changes - if total_records > 0: - db_session.commit() - logging.info(f"Successfully committed records to the database.") - else: - logging.info("No new records were queued for database insertion in this run.") - - except Exception as e: - logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True) - db_session.rollback() - logging.info("Database transaction rolled back due to error.") - finally: - db_session.close() - logging.info("Database session closed.") - - logging.info("Oil price scraper job finished.") - - -if __name__ == "__main__": - main() diff --git a/models.py b/models.py index 4879848..ae7b643 100644 --- a/models.py +++ b/models.py @@ -25,6 +25,8 @@ class OilPrice(Base): company_id = Column(Integer, ForeignKey("company.id"), nullable=True) county_id = Column(Integer, nullable=True) + phone = Column(String(20), nullable=True) + url = Column(String(500), nullable=True) def __repr__(self): return (f"" \ No newline at end of file + return f"" + +# --- StatsPrice Model --- +class StatsPrice(Base): + __tablename__ = "stats_prices" + + id = Column(Integer, primary_key=True, index=True, autoincrement=True) + state = Column(String(2), nullable=False) + price = Column(Float, nullable=False) + created_at = Column(DateTime, default=datetime.utcnow) + + def __repr__(self): + return f"" \ No newline at end of file diff --git a/fuel_scraper/__init__.py b/newenglandoil/__init__.py similarity index 65% rename from fuel_scraper/__init__.py rename to newenglandoil/__init__.py index 4bcddf1..612370b 100644 --- a/fuel_scraper/__init__.py +++ b/newenglandoil/__init__.py @@ -1,4 +1,4 @@ -# fuel_scraper package +# newenglandoil package from .scraper import main __all__ = ["main"] diff --git a/fuel_scraper/config.py b/newenglandoil/config.py similarity index 94% rename from fuel_scraper/config.py rename to newenglandoil/config.py index 2087045..c0c791b 100644 --- a/fuel_scraper/config.py +++ b/newenglandoil/config.py @@ -43,6 +43,17 @@ SITES_CONFIG = [ } ] +# --- STATE ABBREVIATION MAP --- +# Maps lowercase state keys (as used in SITES_CONFIG locations) to 2-letter abbreviations +STATE_ABBREV_MAP = { + "connecticut": "CT", + "massachusetts": "MA", + "maine": "ME", + "newhampshire": "NH", + "rhodeisland": "RI", + "vermont": "VT", +} + # --- ZONE-TO-COUNTY MAPPING --- # Maps (state_key, zone_number) -> (state_abbrev, county_name) # state_key matches the keys in SITES_CONFIG locations (lowercase, no spaces) diff --git a/newenglandoil/db_operations.py b/newenglandoil/db_operations.py new file mode 100644 index 0000000..8ffacc1 --- /dev/null +++ b/newenglandoil/db_operations.py @@ -0,0 +1,131 @@ +""" +Database operations module for oil price CRUD operations. +""" +import logging +import sys +import os +from datetime import datetime +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy.orm import Session +from sqlalchemy import func +import models + + +def upsert_oil_price(db_session: Session, item_dict: dict, force_update_metadata: bool = False) -> bool: + """ + Insert or update an oil price record. + + Logic: + - Match by (name, state, county_id) - case insensitive on name! + - If county_id is None, fall back to (name, state, zone). + - If match found: + - If company_id is set: SKIP (vendor managed). + - Update name to formatted version (e.g. "Leblanc Oil" vs "LEBLANC OIL"). + - Update phone/url if missing OR force_update_metadata is True. + - Update price/date if changed. + - If no match: INSERT. + + Args: + db_session: SQLAlchemy session + item_dict: Dictionary with state, zone, name, price, date, county_id + force_update_metadata: If True, overwrite existing phone/url + """ + county_id = item_dict.get("county_id") + site_name = item_dict.get("site_name", "NewEnglandOil") + name_clean = item_dict["name"].strip() + + # Query for existing record - Case Insensitive + query = db_session.query(models.OilPrice).filter( + func.lower(models.OilPrice.name) == name_clean.lower(), + models.OilPrice.state == item_dict["state"] + ) + + if county_id is not None: + query = query.filter(models.OilPrice.county_id == county_id) + else: + query = query.filter(models.OilPrice.zone == item_dict["zone"]) + + existing_record = query.first() + + new_phone = item_dict.get("phone") + new_url = item_dict.get("url") + + if existing_record: + # Record exists + if existing_record.company_id is not None: + logging.debug( + f"[{site_name}] Skipping update for {name_clean} (ID={existing_record.id}) " + "due to non-null company_id" + ) + return False + + updated = False + + # 1. Update name casing if different (and new name looks "better" e.g. not all caps) + # Simple heuristic: if existing is all caps and new is mixed, take new. + if existing_record.name != name_clean: + # We trust the scraper's _smart_title() output is generally good + existing_record.name = name_clean + updated = True + + # 2. Update county_id if we have one (scraper resolved it) and DB didn't have it + if county_id is not None and existing_record.county_id != county_id: + existing_record.county_id = county_id + updated = True + + # 3. Backfill or Force Update phone/url + if new_phone: + if not existing_record.phone or (force_update_metadata and existing_record.phone != new_phone): + existing_record.phone = new_phone + updated = True + + if new_url: + if not existing_record.url or (force_update_metadata and existing_record.url != new_url): + existing_record.url = new_url + updated = True + + # 4. Check Price Change + # We compare as float provided logic is sound, but float equality can be tricky. + # However, price is usually 2 decimals. + if abs(existing_record.price - item_dict["price"]) > 0.001: + existing_record.price = item_dict["price"] + existing_record.date = item_dict["date"] + existing_record.scrapetimestamp = datetime.utcnow() + logging.info( + f"[{site_name}] Updated price for {name_clean} (ID={existing_record.id}) " + f"to {item_dict['price']}" + ) + return True + elif updated: + existing_record.scrapetimestamp = datetime.utcnow() + logging.info( + f"[{site_name}] Updated metadata for {name_clean} (ID={existing_record.id})" + ) + return True + else: + # No meaningful change + logging.debug( + f"[{site_name}] Price unchanged for {name_clean} in {item_dict['state']} zone {item_dict['zone']}" + ) + return False + else: + # Create new + oil_price_record = models.OilPrice( + state=item_dict["state"], + zone=item_dict["zone"], + name=name_clean, + price=item_dict["price"], + date=item_dict["date"], + county_id=county_id, + phone=new_phone, + url=new_url, + scrapetimestamp=datetime.utcnow() + ) + db_session.add(oil_price_record) + logging.info( + f"[{site_name}] Added new record for {name_clean} in {item_dict['state']} zone {item_dict['zone']} " + f"(county_id={county_id})" + ) + return True diff --git a/newenglandoil/http_client.py b/newenglandoil/http_client.py new file mode 100644 index 0000000..168bdde --- /dev/null +++ b/newenglandoil/http_client.py @@ -0,0 +1,111 @@ +""" +HTTP client module for making web requests. +""" +import logging +import re +import time +import requests +from bs4 import BeautifulSoup + +# Default headers to mimic a browser +DEFAULT_HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' +} + +REQUEST_TIMEOUT = 20 +PHONE_FETCH_DELAY = 1 # seconds between phone page requests + + +def make_request(url: str) -> BeautifulSoup | None: + """ + Fetch a URL and return a BeautifulSoup object. + + Args: + url: The URL to fetch + + Returns: + BeautifulSoup object if successful, None otherwise + """ + try: + response = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT) + response.raise_for_status() + return BeautifulSoup(response.content, 'html.parser') + except requests.exceptions.RequestException as e: + logging.error(f"Error fetching {url}: {e}") + return None + + +def fetch_phone_number(base_url: str, phone_page_path: str, state_slug: str = "") -> str | None: + """ + Fetch a phone number from a newenglandoil phones.asp page. + + Args: + base_url: Site base URL (e.g. "https://www.newenglandoil.com") + phone_page_path: Relative path like "phones.asp?zone=1&ID=10&a=MA1" + state_slug: State slug for URL path (e.g. "massachusetts") + + Returns: + Phone number string or None if not found. + """ + # Build full URL - phone_page_path may be relative + if phone_page_path.startswith('http'): + url = phone_page_path + elif state_slug: + url = f"{base_url}/{state_slug}/{phone_page_path}" + else: + url = f"{base_url}/{phone_page_path}" + + time.sleep(PHONE_FETCH_DELAY) + + soup = make_request(url) + if not soup: + return None + + # Look for phone number patterns in the page text + page_text = soup.get_text(" ", strip=True) + + # Common US phone formats: (508) 555-1234, 508-555-1234, 508.555.1234, 5085551234 + # Captures: + # 1. Optional open paren + # 2. 3 digits (area code) + # 3. Optional close paren + # 4. Separator (space, dot, dash) + # 5. 3 digits (prefix) + # 6. Separator + # 7. 4 digits (line number) + phone_pattern = re.compile( + r'(?:\(?(\d{3})\)?[\s.\-]?(\d{3})[\s.\-]?(\d{4}))' + ) + + # Try to find a phone number near "Phone:" or "Tel:" first + keyword_pattern = re.compile(r'(?:Phone|Tel|Call|Contact).*?(\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4})', re.IGNORECASE) + keyword_match = keyword_pattern.search(page_text) + + candidate = None + if keyword_match: + # If we found a number near a keyword, use that one. + candidate = keyword_match.group(1) + else: + # Otherwise, look for the first valid phone pattern + matches = phone_pattern.findall(page_text) + for m in matches: + # m is a tuple of groups: ('508', '555', '1234') + full_num = "".join(m) + + # Simple heuristic to avoid dates like 2024, 2025 or common years if adjacent + # But the regex requires 3-3-4 structure so a simple "2024" won't match unless it's like 202-455-1234 + # We can filter out obviously bad "numbers" if needed, e.g. 000-000-0000 + if full_num.startswith('000'): + continue + + candidate = f"{m[0]}-{m[1]}-{m[2]}" + break + + if candidate: + digits = re.sub(r'\D', '', candidate) + if len(digits) == 10: + return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}" + return candidate + + logging.debug(f"No phone number found on {url}") + return None diff --git a/fuel_scraper/parsers.py b/newenglandoil/parsers.py similarity index 54% rename from fuel_scraper/parsers.py rename to newenglandoil/parsers.py index 0fb4ccc..bab74c0 100644 --- a/fuel_scraper/parsers.py +++ b/newenglandoil/parsers.py @@ -3,8 +3,11 @@ HTML parsing module for extracting oil price data from web pages. """ import logging import re +from urllib.parse import urlparse, parse_qs from bs4 import BeautifulSoup +from .config import STATE_ABBREV_MAP + def parse_zone_slug_to_int(zone_slug_str: str) -> int | None: """ @@ -54,31 +57,132 @@ def _find_price_table_columns(thead) -> dict | None: return None +def _smart_title(name: str) -> str: + """ + Convert a company name to title case, preserving common abbreviations. + + Handles: LLC, INC, CO, LP, HVAC, A1, etc. + """ + # Common abbreviations that should stay uppercase + keep_upper = {"LLC", "INC", "LP", "HVAC", "II", "III", "IV", "USA", "CT", "MA", "NH", "ME", "RI", "VT"} + words = name.title().split() + result = [] + for word in words: + if word.upper() in keep_upper: + result.append(word.upper()) + else: + result.append(word) + return " ".join(result) + + +def _extract_company_url(company_link) -> str | None: + """ + Extract the actual company URL from a link. + + Handles: + 1. Redirects: click.asp?x=http://example.com&... -> http://example.com + 2. Direct links: http://example.com -> http://example.com + """ + if not company_link: + return None + + href = company_link.get('href', '') + if not href: + return None + + url_candidate = None + + if 'click.asp' in href: + # Parse the x parameter which contains the actual URL + try: + parsed = urlparse(href) + params = parse_qs(parsed.query) + extracted = params.get('x', [None])[0] + if extracted: + url_candidate = extracted + except Exception: + pass + elif href.startswith(('http://', 'https://')): + # Direct link + url_candidate = href + + # Validate the candidate URL + if url_candidate: + try: + # Basic validation + if not url_candidate.startswith(('http://', 'https://')): + return None + + lower_url = url_candidate.lower() + # Filter out internal or competitor site loops + if 'newenglandoil.com' in lower_url or 'cheapestoil.com' in lower_url: + return None + + return url_candidate + except Exception: + pass + + return None + + +def _extract_phone_link(cells: list) -> dict | None: + """ + Extract the phone page link info from a row's phone cell. + + Phone link format: phones.asp?zone=1&ID=10&a=MA1 + Returns dict with {url, company_neo_id} or None. + """ + for cell in cells: + link = cell.find('a', href=lambda h: h and 'phones.asp' in h) + if link: + href = link.get('href', '') + try: + parsed = urlparse(href) + params = parse_qs(parsed.query) + neo_id = params.get('ID', [None])[0] + return { + "phone_page_path": href, + "neo_id": neo_id, + } + except Exception: + pass + return None + + def _parse_row(cells: list, column_indices: dict, state_name: str, zone: int) -> dict | None: """ Parse a single table row into a price record. - + Args: cells: List of td elements column_indices: Dictionary mapping column names to indices - state_name: State name string + state_name: State name string (lowercase key like "connecticut") zone: Zone number - + Returns: Dictionary with parsed data or None if parsing fails """ max_required_index = max(column_indices.values()) - + if len(cells) <= max_required_index: return None - + # Extract company name (prefer link text if available) company_cell = cells[column_indices['company']] company_name = company_cell.get_text(strip=True) company_link = company_cell.find('a') if company_link: company_name = company_link.get_text(strip=True) - + + # Apply title case normalization + company_name = _smart_title(company_name) + + # Extract company URL from click.asp link + company_url = _extract_company_url(company_link) + + # Extract phone page link info + phone_info = _extract_phone_link(cells) + # Extract and parse price price_str = cells[column_indices['price']].get_text(strip=True) price_float = None @@ -90,20 +194,28 @@ def _parse_row(cells: list, column_indices: dict, state_name: str, zone: int) -> logging.warning(f"Could not parse price: '{price_str}' for {company_name} in {state_name}/zone{zone}.") except Exception as e: logging.error(f"Unexpected error parsing price: '{price_str}' for {company_name}. Error: {e}") - + # Extract date date_posted_str = cells[column_indices['date']].get_text(strip=True) - + + # Convert state name to 2-letter abbreviation + state_abbr = STATE_ABBREV_MAP.get(state_name.lower()) + if not state_abbr: + logging.warning(f"Unknown state key: {state_name}, using capitalized form") + state_abbr = state_name.capitalize() + return { - "state": state_name.capitalize(), + "state": state_abbr, "zone": zone, "name": company_name, "price": price_float, "date": date_posted_str, + "url": company_url, + "phone_info": phone_info, } -def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: str) -> list[dict]: +def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: str, site_name: str = "NewEnglandOil") -> list[dict]: """ Parse price tables from a BeautifulSoup page. @@ -117,16 +229,16 @@ def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: s """ data_dicts = [] all_tables = soup.find_all('table') - logging.info(f"Found {len(all_tables)} table(s) on page for {state_name_key} - {zone_slug_str}.") + logging.info(f"[{site_name}] Found {len(all_tables)} table(s) on page for {state_name_key} - {zone_slug_str}.") if not all_tables: - logging.warning(f"No HTML tables found at all for {state_name_key} - {zone_slug_str}.") + logging.warning(f"[{site_name}] No HTML tables found at all for {state_name_key} - {zone_slug_str}.") return data_dicts # Parse zone number from slug zone_int = parse_zone_slug_to_int(zone_slug_str) if zone_int is None: - logging.error(f"Cannot parse zone number for {state_name_key} - {zone_slug_str}. Skipping.") + logging.error(f"[{site_name}] Cannot parse zone number for {state_name_key} - {zone_slug_str}. Skipping.") return data_dicts candidate_tables_found = 0 @@ -149,7 +261,7 @@ def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: s # Parse table body tbody = table.find('tbody') if not tbody: - logging.warning(f"Price table identified by headers has no tbody. Skipping. State: {state_name_key}, Zone: {zone_slug_str}") + logging.warning(f"[{site_name}] Price table identified by headers has no tbody. Skipping. State: {state_name_key}, Zone: {zone_slug_str}") continue rows = tbody.find_all('tr') @@ -167,11 +279,11 @@ def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: s elif len(cells) > 0: max_required = max(column_indices.values()) + 1 logging.warning( - f"Skipping row {row_index+1} with insufficient cells ({len(cells)}, need {max_required}) " + f"[{site_name}] Skipping row {row_index+1} with insufficient cells ({len(cells)}, need {max_required}) " f"in {state_name_key}/{zone_slug_str}" ) if candidate_tables_found == 0: - logging.warning(f"No tables matching expected price table structure found for {state_name_key} - {zone_slug_str}.") + logging.warning(f"[{site_name}] No tables matching expected price table structure found for {state_name_key} - {zone_slug_str}.") return data_dicts diff --git a/newenglandoil/scraper.py b/newenglandoil/scraper.py new file mode 100644 index 0000000..2d72634 --- /dev/null +++ b/newenglandoil/scraper.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +""" +Main scraper orchestrator module. +Coordinates fetching, parsing, and storing oil price data. +""" +import logging +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy.orm import Session +from database import SessionLocal, init_db +import models + +from .config import SITES_CONFIG, ZONE_COUNTY_MAP, setup_logging, STATE_ABBREV_MAP +from .http_client import make_request, fetch_phone_number +from .parsers import parse_price_table, parse_zone_slug_to_int +from .db_operations import upsert_oil_price + + +def _build_county_lookup(db_session: Session) -> dict: + """ + Build a lookup dict from (state_abbrev, county_name) -> county_id + by querying the county table. + """ + counties = db_session.query(models.County).all() + lookup = {} + for c in counties: + if c.name: + lookup[(c.state, c.name.strip())] = c.id + logging.info(f"Built county lookup with {len(lookup)} entries") + return lookup + + +def _resolve_county_id(state_key: str, zone_number: int, county_lookup: dict) -> int | None: + """ + Resolve a county_id from ZONE_COUNTY_MAP and the county lookup. + Returns None if no mapping exists. + """ + mapping = ZONE_COUNTY_MAP.get((state_key, zone_number)) + if not mapping: + logging.debug(f"No zone-to-county mapping for ({state_key}, {zone_number})") + return None + state_abbrev, county_name = mapping + county_id = county_lookup.get((state_abbrev, county_name)) + if county_id is None: + logging.warning(f"County not found in DB: ({state_abbrev}, {county_name}) for zone ({state_key}, {zone_number})") + return county_id + + +def _scrape_zone( + db_session: Session, + site_name: str, + url_template: str, + base_url: str, + oil_type: int, + state_key: str, + zone_slug: str, + county_lookup: dict, + phone_cache: dict, + refresh_metadata: bool = False, +) -> int: + """ + Scrape a single zone and store records. + + Args: + phone_cache: Dict mapping neo_id -> phone string. Shared across zones + to avoid re-fetching the same company's phone page. + refresh_metadata: If True, force re-fetch phone even if in cache (or not cached yet) + and overwrite DB values. + + Returns: + Number of records processed + """ + format_params = { + "base_url": base_url, + "state_slug": state_key, + "zone_slug": zone_slug, + "oil_type": oil_type + } + target_url = url_template.format(**format_params) + + logging.info(f"[{site_name}] Scraping: {target_url} (State: {state_key}, Zone Slug: {zone_slug})") + + soup = make_request(target_url) + if not soup: + logging.warning(f"[{site_name}] Failed to retrieve or parse {target_url}. Skipping.") + return 0 + + parsed_items = parse_price_table(soup, state_key, zone_slug, site_name) + + if not parsed_items: + logging.info(f"[{site_name}] No data extracted from {target_url}") + return 0 + + # Resolve county_id for this zone + zone_number = parse_zone_slug_to_int(zone_slug) + county_id = None + if zone_number is not None: + county_id = _resolve_county_id(state_key, zone_number, county_lookup) + + records_processed = 0 + for item_dict in parsed_items: + item_dict["county_id"] = county_id + item_dict["site_name"] = site_name + + # Fetch phone number if we have phone_info and haven't fetched this company yet + phone_info = item_dict.pop("phone_info", None) + if phone_info: + neo_id = phone_info.get("neo_id") + + # If refresh_metadata is True, we want to fetch regardless of cache check initially + # to refresh the cache value if needed. + # Use phone_page_path as the cache key because neo_id is only unique per zone. + # phone_page_path typically looks like "phones.asp?zone=1&ID=10&a=MA1" effectively unique. + phone_key = phone_info.get("phone_page_path") + + if phone_key: + should_fetch = False + if phone_key in phone_cache: + if refresh_metadata: + # Even if in cache, we might want to refetch? + # Or maybe just trust first fetch in this run. + # Let's say cache handles current runtime, refresh_metadata handles DB. + # BUT if we want to refresh, we should fetch it at least once this run. + item_dict["phone"] = phone_cache[phone_key] + else: + item_dict["phone"] = phone_cache[phone_key] + else: + should_fetch = True + + if should_fetch: + # Only include state_slug in phone URL if the site uses it in its URL template + slug = state_key if "{state_slug}" in url_template else "" + phone = fetch_phone_number(base_url, phone_info["phone_page_path"], slug) + phone_cache[phone_key] = phone + item_dict["phone"] = phone + if phone: + logging.info(f"[{site_name}] Fetched phone for {item_dict['name']} (ID={neo_id}): {phone}") + + if upsert_oil_price(db_session, item_dict, force_update_metadata=refresh_metadata): + records_processed += 1 + + logging.info( + f"[{site_name}] Processed {len(parsed_items)} records from {site_name} - {state_key}/{zone_slug} " + f"({records_processed} inserted/updated, county_id={county_id}) (Size: {len(parsed_items)})" + ) + + return len(parsed_items) + + +def _scrape_site(db_session: Session, site_config: dict, county_lookup: dict, refresh_metadata: bool = False) -> int: + """ + Scrape all zones for a single site. + + Returns: + Total number of records processed + """ + site_name = site_config["site_name"] + base_url = site_config["base_url"] + url_template = site_config["url_template"] + oil_type = site_config["oil_type"] + + logging.info(f"--- Processing site: {site_name} ---") + + total_records = 0 + # Shared phone cache across all zones for this site to avoid redundant fetches + phone_cache = {} + + for state_key, zone_slugs in site_config["locations"].items(): + for zone_slug in zone_slugs: + records = _scrape_zone( + db_session=db_session, + site_name=site_name, + url_template=url_template, + base_url=base_url, + oil_type=oil_type, + state_key=state_key, + zone_slug=zone_slug, + county_lookup=county_lookup, + phone_cache=phone_cache, + refresh_metadata=refresh_metadata, + ) + total_records += records + + logging.info(f"Phone cache: fetched {len(phone_cache)} unique company phones for {site_name}") + return total_records + + +def main(refresh_metadata: bool = False, target_state_abbr: str | None = None): + """ + Main entry point for the oil price scraper. + + Args: + refresh_metadata: If True, force re-fetch details. + target_state_abbr: If set (e.g. "MA"), only scrape that state. + """ + setup_logging() + + state_msg = f" (State: {target_state_abbr})" if target_state_abbr else "" + logging.info(f"Starting oil price scraper job.{state_msg} (Refresh Metadata: {refresh_metadata})") + + # Initialize database + try: + init_db() + logging.info("Database initialized/checked successfully.") + except Exception as e: + logging.error(f"Failed to initialize database: {e}", exc_info=True) + return + + db_session: Session = SessionLocal() + total_records = 0 + + try: + # Build county lookup at startup + county_lookup = _build_county_lookup(db_session) + + # Build reverse map for state filtering + abbrev_to_state = {v: k for k, v in STATE_ABBREV_MAP.items()} + target_state_key = abbrev_to_state.get(target_state_abbr.upper()) if target_state_abbr else None + + if target_state_abbr and not target_state_key: + logging.error(f"Unknown state abbreviation: {target_state_abbr}") + return + + # Process each configured site + for site_config in SITES_CONFIG: + # If filtering by state, create a shallow copy of config with filtered locations + config_to_use = site_config + if target_state_key: + # Check if this site has the target state + if target_state_key in site_config["locations"]: + # Create filtered config + config_to_use = site_config.copy() + config_to_use["locations"] = { + target_state_key: site_config["locations"][target_state_key] + } + else: + logging.info(f"Skipping {site_config['site_name']} (does not cover {target_state_abbr})") + continue + + records = _scrape_site(db_session, config_to_use, county_lookup, refresh_metadata=refresh_metadata) + total_records += records + + # Commit all changes + if total_records > 0: + db_session.commit() + logging.info(f"Successfully committed records to the database.") + else: + logging.info("No new records were queued for database insertion in this run.") + + except Exception as e: + logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True) + db_session.rollback() + logging.info("Database transaction rolled back due to error.") + finally: + db_session.close() + logging.info("Database session closed.") + + logging.info("Oil price scraper job finished.") + + +if __name__ == "__main__": + main() diff --git a/oil_scraper.log b/oil_scraper.log deleted file mode 100644 index 89d8c68..0000000 --- a/oil_scraper.log +++ /dev/null @@ -1,689 +0,0 @@ -2025-06-01 20:36:58,558 - INFO - [run.py:30] - Starting the fuel price scraper... -2025-06-01 20:36:58,558 - INFO - [fuel_scraper.py:186] - Starting oil price scraper job. -2025-06-01 20:36:58,576 - INFO - [fuel_scraper.py:189] - Database initialized/checked successfully. -2025-06-01 20:36:58,576 - INFO - [fuel_scraper.py:204] - --- Processing site: NewEnglandOil --- -2025-06-01 20:36:58,576 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone1.asp?type=0 (State: connecticut, Zone Slug: zone1) -2025-06-01 20:36:58,790 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone1. -2025-06-01 20:36:58,799 - INFO - [fuel_scraper.py:257] - Queued 5 records from NewEnglandOil - connecticut/zone1 for DB insertion. -2025-06-01 20:36:58,799 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone2.asp?type=0 (State: connecticut, Zone Slug: zone2) -2025-06-01 20:36:59,009 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone2. -2025-06-01 20:36:59,018 - INFO - [fuel_scraper.py:257] - Queued 8 records from NewEnglandOil - connecticut/zone2 for DB insertion. -2025-06-01 20:36:59,018 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone3.asp?type=0 (State: connecticut, Zone Slug: zone3) -2025-06-01 20:36:59,253 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone3. -2025-06-01 20:36:59,255 - INFO - [fuel_scraper.py:255] - Added new record for RESIDENTIAL FUEL SYSTEMS in Connecticut zone 3 -2025-06-01 20:36:59,256 - INFO - [fuel_scraper.py:255] - Added new record for CORPORAL HEATING, LLC in Connecticut zone 3 -2025-06-01 20:36:59,257 - INFO - [fuel_scraper.py:255] - Added new record for FORBES FUEL FUEL in Connecticut zone 3 -2025-06-01 20:36:59,258 - INFO - [fuel_scraper.py:255] - Added new record for CENTS-ABLE Oil in Connecticut zone 3 -2025-06-01 20:36:59,259 - INFO - [fuel_scraper.py:255] - Added new record for PURPLEFUELS, LLC in Connecticut zone 3 -2025-06-01 20:36:59,260 - INFO - [fuel_scraper.py:255] - Added new record for BLUE FLAME OIL in Connecticut zone 3 -2025-06-01 20:36:59,262 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN FUEL in Connecticut zone 3 -2025-06-01 20:36:59,263 - INFO - [fuel_scraper.py:255] - Added new record for POLAR ENERGY in Connecticut zone 3 -2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:255] - Added new record for HI-HO PETROLEUM in Connecticut zone 3 -2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:255] - Added new record for JOES FUEL CO in Connecticut zone 3 -2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:257] - Queued 10 records from NewEnglandOil - connecticut/zone3 for DB insertion. -2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone4.asp?type=0 (State: connecticut, Zone Slug: zone4) -2025-06-01 20:36:59,477 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone4. -2025-06-01 20:36:59,478 - INFO - [fuel_scraper.py:255] - Added new record for CORPORAL HEATING, LLC in Connecticut zone 4 -2025-06-01 20:36:59,479 - INFO - [fuel_scraper.py:255] - Added new record for PURPLEFUELS, LLC in Connecticut zone 4 -2025-06-01 20:36:59,481 - INFO - [fuel_scraper.py:255] - Added new record for WESTBROOK OIL in Connecticut zone 4 -2025-06-01 20:36:59,481 - INFO - [fuel_scraper.py:255] - Added new record for J J SULLIVAN INC in Connecticut zone 4 -2025-06-01 20:36:59,483 - INFO - [fuel_scraper.py:255] - Added new record for BRAZOS OIL in Connecticut zone 4 -2025-06-01 20:36:59,484 - INFO - [fuel_scraper.py:255] - Added new record for MADISON OIL CO in Connecticut zone 4 -2025-06-01 20:36:59,484 - INFO - [fuel_scraper.py:257] - Queued 6 records from NewEnglandOil - connecticut/zone4 for DB insertion. -2025-06-01 20:36:59,484 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone5.asp?type=0 (State: connecticut, Zone Slug: zone5) -2025-06-01 20:36:59,701 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone5. -2025-06-01 20:36:59,703 - INFO - [fuel_scraper.py:255] - Added new record for SIMPLY HEATING OIL in Connecticut zone 5 -2025-06-01 20:36:59,704 - INFO - [fuel_scraper.py:255] - Added new record for CORPORAL HEATING, LLC in Connecticut zone 5 -2025-06-01 20:36:59,705 - INFO - [fuel_scraper.py:255] - Added new record for RESIDENTIAL FUEL SYSTEMS in Connecticut zone 5 -2025-06-01 20:36:59,706 - INFO - [fuel_scraper.py:255] - Added new record for OMNI ENERGY in Connecticut zone 5 -2025-06-01 20:36:59,707 - INFO - [fuel_scraper.py:255] - Added new record for QUALITY OIL CO LLC in Connecticut zone 5 -2025-06-01 20:36:59,708 - INFO - [fuel_scraper.py:255] - Added new record for FIRST FUEL OIL in Connecticut zone 5 -2025-06-01 20:36:59,709 - INFO - [fuel_scraper.py:255] - Added new record for VADNEY FUEL CO in Connecticut zone 5 -2025-06-01 20:36:59,710 - INFO - [fuel_scraper.py:255] - Added new record for WESSON ENERGY INC in Connecticut zone 5 -2025-06-01 20:36:59,710 - INFO - [fuel_scraper.py:255] - Added new record for MANN FUEL OIL in Connecticut zone 5 -2025-06-01 20:36:59,711 - INFO - [fuel_scraper.py:255] - Added new record for DAVIS OIL CO in Connecticut zone 5 -2025-06-01 20:36:59,712 - INFO - [fuel_scraper.py:255] - Added new record for MIMS OIL LLC in Connecticut zone 5 -2025-06-01 20:36:59,713 - INFO - [fuel_scraper.py:255] - Added new record for MCKINLEY OIL LLC in Connecticut zone 5 -2025-06-01 20:36:59,713 - INFO - [fuel_scraper.py:257] - Queued 12 records from NewEnglandOil - connecticut/zone5 for DB insertion. -2025-06-01 20:36:59,713 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone6.asp?type=0 (State: connecticut, Zone Slug: zone6) -2025-06-01 20:36:59,915 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone6. -2025-06-01 20:36:59,917 - INFO - [fuel_scraper.py:255] - Added new record for COST LESS OIL in Connecticut zone 6 -2025-06-01 20:36:59,918 - INFO - [fuel_scraper.py:255] - Added new record for BROTHERS OIL CO in Connecticut zone 6 -2025-06-01 20:36:59,919 - INFO - [fuel_scraper.py:255] - Added new record for SIMPLY HEATING OIL in Connecticut zone 6 -2025-06-01 20:36:59,920 - INFO - [fuel_scraper.py:255] - Added new record for FERGUSON OIL in Connecticut zone 6 -2025-06-01 20:36:59,921 - INFO - [fuel_scraper.py:255] - Added new record for TOWN OIL CO in Connecticut zone 6 -2025-06-01 20:36:59,923 - INFO - [fuel_scraper.py:255] - Added new record for OMNI ENERGY in Connecticut zone 6 -2025-06-01 20:36:59,924 - INFO - [fuel_scraper.py:255] - Added new record for SPRINGERS OIL SERVICE in Connecticut zone 6 -2025-06-01 20:36:59,924 - INFO - [fuel_scraper.py:257] - Queued 7 records from NewEnglandOil - connecticut/zone6 for DB insertion. -2025-06-01 20:36:59,924 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone7.asp?type=0 (State: connecticut, Zone Slug: zone7) -2025-06-01 20:37:00,151 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone7. -2025-06-01 20:37:00,152 - INFO - [fuel_scraper.py:255] - Added new record for OMNI ENERGY in Connecticut zone 7 -2025-06-01 20:37:00,153 - INFO - [fuel_scraper.py:255] - Added new record for DIME OIL COMPANY in Connecticut zone 7 -2025-06-01 20:37:00,155 - INFO - [fuel_scraper.py:255] - Added new record for 24 7 OIL in Connecticut zone 7 -2025-06-01 20:37:00,156 - INFO - [fuel_scraper.py:255] - Added new record for PRICERITE OIL in Connecticut zone 7 -2025-06-01 20:37:00,157 - INFO - [fuel_scraper.py:255] - Added new record for PLYMOUTH OIL SERVICES in Connecticut zone 7 -2025-06-01 20:37:00,158 - INFO - [fuel_scraper.py:255] - Added new record for THOMASTON OIL & PROPANE in Connecticut zone 7 -2025-06-01 20:37:00,159 - INFO - [fuel_scraper.py:255] - Added new record for CT OIL DIRECT in Connecticut zone 7 -2025-06-01 20:37:00,160 - INFO - [fuel_scraper.py:255] - Added new record for ANYTIME OIL in Connecticut zone 7 -2025-06-01 20:37:00,160 - INFO - [fuel_scraper.py:255] - Added new record for THURSTON ENERGY in Connecticut zone 7 -2025-06-01 20:37:00,161 - INFO - [fuel_scraper.py:255] - Added new record for JENNINGS OIL CO in Connecticut zone 7 -2025-06-01 20:37:00,161 - INFO - [fuel_scraper.py:257] - Queued 10 records from NewEnglandOil - connecticut/zone7 for DB insertion. -2025-06-01 20:37:00,161 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone8.asp?type=0 (State: connecticut, Zone Slug: zone8) -2025-06-01 20:37:00,384 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone8. -2025-06-01 20:37:00,385 - INFO - [fuel_scraper.py:255] - Added new record for FIORILLA HEATING OIL in Connecticut zone 8 -2025-06-01 20:37:00,386 - INFO - [fuel_scraper.py:255] - Added new record for PARK CITY FUEL in Connecticut zone 8 -2025-06-01 20:37:00,387 - INFO - [fuel_scraper.py:255] - Added new record for WESTMORE OIL EXPRESS in Connecticut zone 8 -2025-06-01 20:37:00,388 - INFO - [fuel_scraper.py:255] - Added new record for COASTAL ENERGY CT in Connecticut zone 8 -2025-06-01 20:37:00,389 - INFO - [fuel_scraper.py:255] - Added new record for PIRO PETROLEUM in Connecticut zone 8 -2025-06-01 20:37:00,389 - INFO - [fuel_scraper.py:257] - Queued 5 records from NewEnglandOil - connecticut/zone8 for DB insertion. -2025-06-01 20:37:00,389 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone9.asp?type=0 (State: connecticut, Zone Slug: zone9) -2025-06-01 20:37:00,627 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone9. -2025-06-01 20:37:00,629 - INFO - [fuel_scraper.py:255] - Added new record for CASHWAY OIL in Connecticut zone 9 -2025-06-01 20:37:00,630 - INFO - [fuel_scraper.py:255] - Added new record for CT VALLEY OIL in Connecticut zone 9 -2025-06-01 20:37:00,631 - INFO - [fuel_scraper.py:255] - Added new record for E-Z OIL CO in Connecticut zone 9 -2025-06-01 20:37:00,632 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN FUEL OIL INC in Connecticut zone 9 -2025-06-01 20:37:00,633 - INFO - [fuel_scraper.py:255] - Added new record for A1 Oil in Connecticut zone 9 -2025-06-01 20:37:00,634 - INFO - [fuel_scraper.py:255] - Added new record for FERGUSON OIL in Connecticut zone 9 -2025-06-01 20:37:00,634 - INFO - [fuel_scraper.py:257] - Queued 6 records from NewEnglandOil - connecticut/zone9 for DB insertion. -2025-06-01 20:37:00,635 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone10.asp?type=0 (State: connecticut, Zone Slug: zone10) -2025-06-01 20:37:00,876 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone10. -2025-06-01 20:37:00,878 - INFO - [fuel_scraper.py:255] - Added new record for ENERGY DIRECT LLC in Connecticut zone 10 -2025-06-01 20:37:00,879 - INFO - [fuel_scraper.py:255] - Added new record for PLAINVILLE OIL CO in Connecticut zone 10 -2025-06-01 20:37:00,881 - INFO - [fuel_scraper.py:255] - Added new record for ROBERTS DISCOUNT FUEL CO in Connecticut zone 10 -2025-06-01 20:37:00,882 - INFO - [fuel_scraper.py:255] - Added new record for TOWER ENERGY in Connecticut zone 10 -2025-06-01 20:37:00,882 - INFO - [fuel_scraper.py:257] - Queued 4 records from NewEnglandOil - connecticut/zone10 for DB insertion. -2025-06-01 20:37:00,882 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone11.asp?type=0 (State: connecticut, Zone Slug: zone11) -2025-06-01 20:37:01,041 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone11.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone11.asp?type=0 -2025-06-01 20:37:01,041 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone11.asp?type=0. Skipping. -2025-06-01 20:37:01,041 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone12.asp?type=0 (State: connecticut, Zone Slug: zone12) -2025-06-01 20:37:01,220 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone12.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone12.asp?type=0 -2025-06-01 20:37:01,221 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone12.asp?type=0. Skipping. -2025-06-01 20:37:01,221 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone13.asp?type=0 (State: connecticut, Zone Slug: zone13) -2025-06-01 20:37:01,382 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone13.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone13.asp?type=0 -2025-06-01 20:37:01,382 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone13.asp?type=0. Skipping. -2025-06-01 20:37:01,382 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone14.asp?type=0 (State: connecticut, Zone Slug: zone14) -2025-06-01 20:37:01,545 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone14.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone14.asp?type=0 -2025-06-01 20:37:01,545 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone14.asp?type=0. Skipping. -2025-06-01 20:37:01,545 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone15.asp?type=0 (State: connecticut, Zone Slug: zone15) -2025-06-01 20:37:01,705 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone15.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone15.asp?type=0 -2025-06-01 20:37:01,705 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone15.asp?type=0. Skipping. -2025-06-01 20:37:01,705 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone16.asp?type=0 (State: connecticut, Zone Slug: zone16) -2025-06-01 20:37:01,833 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone16.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone16.asp?type=0 -2025-06-01 20:37:01,834 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone16.asp?type=0. Skipping. -2025-06-01 20:37:01,834 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone1.asp?type=0 (State: massachusetts, Zone Slug: zone1) -2025-06-01 20:37:02,148 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone1. -2025-06-01 20:37:02,151 - INFO - [fuel_scraper.py:255] - Added new record for OILMAN INC. in Massachusetts zone 1 -2025-06-01 20:37:02,152 - INFO - [fuel_scraper.py:255] - Added new record for GUARANTEE FUEL in Massachusetts zone 1 -2025-06-01 20:37:02,152 - INFO - [fuel_scraper.py:255] - Added new record for SWEET HEAT in Massachusetts zone 1 -2025-06-01 20:37:02,153 - INFO - [fuel_scraper.py:255] - Added new record for BRIDGEWATER FUEL in Massachusetts zone 1 -2025-06-01 20:37:02,154 - INFO - [fuel_scraper.py:255] - Added new record for LAPUMA FUEL in Massachusetts zone 1 -2025-06-01 20:37:02,154 - INFO - [fuel_scraper.py:255] - Added new record for CAREYS DISCOUNT OIL in Massachusetts zone 1 -2025-06-01 20:37:02,155 - INFO - [fuel_scraper.py:255] - Added new record for FOSSIL FUEL ENTERPRISES in Massachusetts zone 1 -2025-06-01 20:37:02,156 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 1 -2025-06-01 20:37:02,157 - INFO - [fuel_scraper.py:255] - Added new record for G&G FUEL INC in Massachusetts zone 1 -2025-06-01 20:37:02,158 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 1 -2025-06-01 20:37:02,158 - INFO - [fuel_scraper.py:255] - Added new record for OHARA FUEL in Massachusetts zone 1 -2025-06-01 20:37:02,159 - INFO - [fuel_scraper.py:255] - Added new record for HIGHWAY FUEL in Massachusetts zone 1 -2025-06-01 20:37:02,160 - INFO - [fuel_scraper.py:255] - Added new record for BURKE OIL in Massachusetts zone 1 -2025-06-01 20:37:02,160 - INFO - [fuel_scraper.py:257] - Queued 13 records from NewEnglandOil - massachusetts/zone1 for DB insertion. -2025-06-01 20:37:02,160 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone2.asp?type=0 (State: massachusetts, Zone Slug: zone2) -2025-06-01 20:37:02,461 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone2. -2025-06-01 20:37:02,463 - INFO - [fuel_scraper.py:255] - Added new record for BOBS OIL COMPANY in Massachusetts zone 2 -2025-06-01 20:37:02,464 - INFO - [fuel_scraper.py:255] - Added new record for FIREMANS FUEL in Massachusetts zone 2 -2025-06-01 20:37:02,465 - INFO - [fuel_scraper.py:255] - Added new record for NARDONE OIL in Massachusetts zone 2 -2025-06-01 20:37:02,466 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 2 -2025-06-01 20:37:02,467 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Massachusetts zone 2 -2025-06-01 20:37:02,468 - INFO - [fuel_scraper.py:255] - Added new record for ARLINGTON ENERGY in Massachusetts zone 2 -2025-06-01 20:37:02,469 - INFO - [fuel_scraper.py:255] - Added new record for NORTHEAST OIL DELIVERY in Massachusetts zone 2 -2025-06-01 20:37:02,469 - INFO - [fuel_scraper.py:255] - Added new record for SAVINO & SONS OIL in Massachusetts zone 2 -2025-06-01 20:37:02,470 - INFO - [fuel_scraper.py:255] - Added new record for GO GREEN OIL in Massachusetts zone 2 -2025-06-01 20:37:02,471 - INFO - [fuel_scraper.py:255] - Added new record for JOHNSON FUEL CO in Massachusetts zone 2 -2025-06-01 20:37:02,472 - INFO - [fuel_scraper.py:255] - Added new record for S&D OIL CO in Massachusetts zone 2 -2025-06-01 20:37:02,473 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 2 -2025-06-01 20:37:02,474 - INFO - [fuel_scraper.py:255] - Added new record for MARCHETTI COMMERCIAL FUELS INC. in Massachusetts zone 2 -2025-06-01 20:37:02,475 - INFO - [fuel_scraper.py:255] - Added new record for KATIES DISCOUNT OIL in Massachusetts zone 2 -2025-06-01 20:37:02,475 - INFO - [fuel_scraper.py:257] - Queued 14 records from NewEnglandOil - massachusetts/zone2 for DB insertion. -2025-06-01 20:37:02,475 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone3.asp?type=0 (State: massachusetts, Zone Slug: zone3) -2025-06-01 20:37:02,778 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone3. -2025-06-01 20:37:02,781 - INFO - [fuel_scraper.py:255] - Added new record for ARROW FUEL in Massachusetts zone 3 -2025-06-01 20:37:02,782 - INFO - [fuel_scraper.py:255] - Added new record for OILMAN INC. in Massachusetts zone 3 -2025-06-01 20:37:02,783 - INFO - [fuel_scraper.py:255] - Added new record for NICCOLI OIL & ENERGY in Massachusetts zone 3 -2025-06-01 20:37:02,784 - INFO - [fuel_scraper.py:255] - Added new record for LAW FUEL AND ENERGY in Massachusetts zone 3 -2025-06-01 20:37:02,785 - INFO - [fuel_scraper.py:255] - Added new record for BLACKSTONE VALLEY OIL in Massachusetts zone 3 -2025-06-01 20:37:02,786 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 3 -2025-06-01 20:37:02,787 - INFO - [fuel_scraper.py:255] - Added new record for OIL ONLY in Massachusetts zone 3 -2025-06-01 20:37:02,788 - INFO - [fuel_scraper.py:255] - Added new record for GUARANTEE FUEL in Massachusetts zone 3 -2025-06-01 20:37:02,789 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 3 -2025-06-01 20:37:02,790 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Massachusetts zone 3 -2025-06-01 20:37:02,791 - INFO - [fuel_scraper.py:255] - Added new record for GEORGES OIL CO in Massachusetts zone 3 -2025-06-01 20:37:02,792 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Massachusetts zone 3 -2025-06-01 20:37:02,793 - INFO - [fuel_scraper.py:255] - Added new record for PLAINVILLE OIL in Massachusetts zone 3 -2025-06-01 20:37:02,794 - INFO - [fuel_scraper.py:255] - Added new record for 4 SEASONS TRANSPORT LLC in Massachusetts zone 3 -2025-06-01 20:37:02,795 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Massachusetts zone 3 -2025-06-01 20:37:02,795 - INFO - [fuel_scraper.py:257] - Queued 15 records from NewEnglandOil - massachusetts/zone3 for DB insertion. -2025-06-01 20:37:02,795 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone4.asp?type=0 (State: massachusetts, Zone Slug: zone4) -2025-06-01 20:37:03,106 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone4. -2025-06-01 20:37:03,109 - INFO - [fuel_scraper.py:255] - Added new record for NICCOLI OIL & ENERGY in Massachusetts zone 4 -2025-06-01 20:37:03,110 - INFO - [fuel_scraper.py:255] - Added new record for BRIDGEWATER FUEL in Massachusetts zone 4 -2025-06-01 20:37:03,111 - INFO - [fuel_scraper.py:255] - Added new record for KEN DUVAL OIL in Massachusetts zone 4 -2025-06-01 20:37:03,112 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN FUEL OIL CO in Massachusetts zone 4 -2025-06-01 20:37:03,113 - INFO - [fuel_scraper.py:255] - Added new record for CAREYS DISCOUNT OIL in Massachusetts zone 4 -2025-06-01 20:37:03,114 - INFO - [fuel_scraper.py:255] - Added new record for CURTIN BROS OIL in Massachusetts zone 4 -2025-06-01 20:37:03,115 - INFO - [fuel_scraper.py:255] - Added new record for SWEET HEAT in Massachusetts zone 4 -2025-06-01 20:37:03,116 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 4 -2025-06-01 20:37:03,117 - INFO - [fuel_scraper.py:255] - Added new record for GUARANTEE FUEL in Massachusetts zone 4 -2025-06-01 20:37:03,118 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT DISCOUNT FUEL in Massachusetts zone 4 -2025-06-01 20:37:03,119 - INFO - [fuel_scraper.py:255] - Added new record for C.O.D. PETRO in Massachusetts zone 4 -2025-06-01 20:37:03,120 - INFO - [fuel_scraper.py:255] - Added new record for YANKEE FUEL in Massachusetts zone 4 -2025-06-01 20:37:03,121 - INFO - [fuel_scraper.py:255] - Added new record for FORNI BROTHERS OIL CO in Massachusetts zone 4 -2025-06-01 20:37:03,122 - INFO - [fuel_scraper.py:255] - Added new record for HIGHWAY FUEL in Massachusetts zone 4 -2025-06-01 20:37:03,123 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 4 -2025-06-01 20:37:03,124 - INFO - [fuel_scraper.py:255] - Added new record for BURKE OIL in Massachusetts zone 4 -2025-06-01 20:37:03,125 - INFO - [fuel_scraper.py:255] - Added new record for OHARA FUEL in Massachusetts zone 4 -2025-06-01 20:37:03,126 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 4 -2025-06-01 20:37:03,127 - INFO - [fuel_scraper.py:255] - Added new record for CESARS OIL in Massachusetts zone 4 -2025-06-01 20:37:03,128 - INFO - [fuel_scraper.py:255] - Added new record for G&G FUEL INC in Massachusetts zone 4 -2025-06-01 20:37:03,129 - INFO - [fuel_scraper.py:255] - Added new record for RAYNARD BROTHERS OIL in Massachusetts zone 4 -2025-06-01 20:37:03,129 - INFO - [fuel_scraper.py:257] - Queued 21 records from NewEnglandOil - massachusetts/zone4 for DB insertion. -2025-06-01 20:37:03,129 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone5.asp?type=0 (State: massachusetts, Zone Slug: zone5) -2025-06-01 20:37:03,423 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone5. -2025-06-01 20:37:03,425 - INFO - [fuel_scraper.py:255] - Added new record for FIREMANS FUEL in Massachusetts zone 5 -2025-06-01 20:37:03,426 - INFO - [fuel_scraper.py:255] - Added new record for LAW FUEL AND ENERGY in Massachusetts zone 5 -2025-06-01 20:37:03,428 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 5 -2025-06-01 20:37:03,428 - INFO - [fuel_scraper.py:255] - Added new record for SAVINO & SONS OIL in Massachusetts zone 5 -2025-06-01 20:37:03,429 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 5 -2025-06-01 20:37:03,430 - INFO - [fuel_scraper.py:255] - Added new record for ARLINGTON ENERGY in Massachusetts zone 5 -2025-06-01 20:37:03,431 - INFO - [fuel_scraper.py:255] - Added new record for JOHNSON FUEL CO in Massachusetts zone 5 -2025-06-01 20:37:03,432 - INFO - [fuel_scraper.py:255] - Added new record for S&D OIL CO in Massachusetts zone 5 -2025-06-01 20:37:03,433 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 5 -2025-06-01 20:37:03,434 - INFO - [fuel_scraper.py:255] - Added new record for 4 SEASONS TRANSPORT LLC in Massachusetts zone 5 -2025-06-01 20:37:03,434 - INFO - [fuel_scraper.py:257] - Queued 10 records from NewEnglandOil - massachusetts/zone5 for DB insertion. -2025-06-01 20:37:03,434 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone6.asp?type=0 (State: massachusetts, Zone Slug: zone6) -2025-06-01 20:37:03,700 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone6. -2025-06-01 20:37:03,703 - INFO - [fuel_scraper.py:255] - Added new record for ARROW FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,704 - INFO - [fuel_scraper.py:255] - Added new record for PRICERITE OIL INC in Massachusetts zone 6 -2025-06-01 20:37:03,705 - INFO - [fuel_scraper.py:255] - Added new record for NICCOLI OIL & ENERGY in Massachusetts zone 6 -2025-06-01 20:37:03,706 - INFO - [fuel_scraper.py:255] - Added new record for LUZO FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,707 - INFO - [fuel_scraper.py:255] - Added new record for BRODEUR & SONS INC in Massachusetts zone 6 -2025-06-01 20:37:03,708 - INFO - [fuel_scraper.py:255] - Added new record for FUEL MAN LLC in Massachusetts zone 6 -2025-06-01 20:37:03,709 - INFO - [fuel_scraper.py:255] - Added new record for AFFORDABLE FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,710 - INFO - [fuel_scraper.py:255] - Added new record for PAPAS FUELS in Massachusetts zone 6 -2025-06-01 20:37:03,710 - INFO - [fuel_scraper.py:255] - Added new record for MIAMI HEAT DISCOUNT FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,711 - INFO - [fuel_scraper.py:255] - Added new record for SAV-ON OIL in Massachusetts zone 6 -2025-06-01 20:37:03,712 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 6 -2025-06-01 20:37:03,713 - INFO - [fuel_scraper.py:255] - Added new record for NITE OIL CO., INC. in Massachusetts zone 6 -2025-06-01 20:37:03,714 - INFO - [fuel_scraper.py:255] - Added new record for GEORGES OIL in Massachusetts zone 6 -2025-06-01 20:37:03,715 - INFO - [fuel_scraper.py:255] - Added new record for CHARLIES OIL COMPANY in Massachusetts zone 6 -2025-06-01 20:37:03,716 - INFO - [fuel_scraper.py:255] - Added new record for OIL ONLY in Massachusetts zone 6 -2025-06-01 20:37:03,717 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Massachusetts zone 6 -2025-06-01 20:37:03,718 - INFO - [fuel_scraper.py:255] - Added new record for GUARD OIL in Massachusetts zone 6 -2025-06-01 20:37:03,719 - INFO - [fuel_scraper.py:255] - Added new record for BUTCHIE OIL in Massachusetts zone 6 -2025-06-01 20:37:03,719 - INFO - [fuel_scraper.py:255] - Added new record for PAQUETTES FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,720 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Massachusetts zone 6 -2025-06-01 20:37:03,721 - INFO - [fuel_scraper.py:255] - Added new record for T & M FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,722 - INFO - [fuel_scraper.py:255] - Added new record for ELITE OIL HEATING & AIR CONDITIONING in Massachusetts zone 6 -2025-06-01 20:37:03,723 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 6 -2025-06-01 20:37:03,724 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Massachusetts zone 6 -2025-06-01 20:37:03,724 - INFO - [fuel_scraper.py:257] - Queued 24 records from NewEnglandOil - massachusetts/zone6 for DB insertion. -2025-06-01 20:37:03,724 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone7.asp?type=0 (State: massachusetts, Zone Slug: zone7) -2025-06-01 20:37:04,018 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone7. -2025-06-01 20:37:04,020 - INFO - [fuel_scraper.py:255] - Added new record for RED WING OIL CO in Massachusetts zone 7 -2025-06-01 20:37:04,021 - INFO - [fuel_scraper.py:255] - Added new record for MID CAPE DISCOUNT OIL in Massachusetts zone 7 -2025-06-01 20:37:04,022 - INFO - [fuel_scraper.py:255] - Added new record for CAPE DISCOUNT FUEL in Massachusetts zone 7 -2025-06-01 20:37:04,023 - INFO - [fuel_scraper.py:255] - Added new record for COD DISCOUNT FUEL in Massachusetts zone 7 -2025-06-01 20:37:04,024 - INFO - [fuel_scraper.py:255] - Added new record for PILGRIM DISCOUNT OIL in Massachusetts zone 7 -2025-06-01 20:37:04,025 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 7 -2025-06-01 20:37:04,026 - INFO - [fuel_scraper.py:255] - Added new record for PAPAS FUELS in Massachusetts zone 7 -2025-06-01 20:37:04,027 - INFO - [fuel_scraper.py:255] - Added new record for MARKET PRICE OIL in Massachusetts zone 7 -2025-06-01 20:37:04,028 - INFO - [fuel_scraper.py:255] - Added new record for CAPE COD BIOFUELS in Massachusetts zone 7 -2025-06-01 20:37:04,029 - INFO - [fuel_scraper.py:255] - Added new record for THE OIL PEDDLER in Massachusetts zone 7 -2025-06-01 20:37:04,030 - INFO - [fuel_scraper.py:255] - Added new record for GUARD OIL in Massachusetts zone 7 -2025-06-01 20:37:04,031 - INFO - [fuel_scraper.py:255] - Added new record for YOUNGMANS OIL in Massachusetts zone 7 -2025-06-01 20:37:04,031 - INFO - [fuel_scraper.py:257] - Queued 12 records from NewEnglandOil - massachusetts/zone7 for DB insertion. -2025-06-01 20:37:04,031 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone8.asp?type=0 (State: massachusetts, Zone Slug: zone8) -2025-06-01 20:37:04,309 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone8. -2025-06-01 20:37:04,312 - INFO - [fuel_scraper.py:255] - Added new record for NARDONE OIL in Massachusetts zone 8 -2025-06-01 20:37:04,313 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Massachusetts zone 8 -2025-06-01 20:37:04,314 - INFO - [fuel_scraper.py:255] - Added new record for S&D OIL CO in Massachusetts zone 8 -2025-06-01 20:37:04,315 - INFO - [fuel_scraper.py:255] - Added new record for COUNTY ENERGY in Massachusetts zone 8 -2025-06-01 20:37:04,316 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 8 -2025-06-01 20:37:04,317 - INFO - [fuel_scraper.py:255] - Added new record for MAHONEY OIL CO in Massachusetts zone 8 -2025-06-01 20:37:04,318 - INFO - [fuel_scraper.py:255] - Added new record for JOHNSON FUEL CO in Massachusetts zone 8 -2025-06-01 20:37:04,319 - INFO - [fuel_scraper.py:255] - Added new record for COLONIAL OIL CO in Massachusetts zone 8 -2025-06-01 20:37:04,320 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 8 -2025-06-01 20:37:04,321 - INFO - [fuel_scraper.py:255] - Added new record for GO GREEN OIL in Massachusetts zone 8 -2025-06-01 20:37:04,322 - INFO - [fuel_scraper.py:255] - Added new record for J A HEALY & SONS OIL CO in Massachusetts zone 8 -2025-06-01 20:37:04,323 - INFO - [fuel_scraper.py:255] - Added new record for BOBS OIL COMPANY in Massachusetts zone 8 -2025-06-01 20:37:04,324 - INFO - [fuel_scraper.py:255] - Added new record for KATIES DISCOUNT OIL in Massachusetts zone 8 -2025-06-01 20:37:04,324 - INFO - [fuel_scraper.py:257] - Queued 13 records from NewEnglandOil - massachusetts/zone8 for DB insertion. -2025-06-01 20:37:04,324 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone9.asp?type=0 (State: massachusetts, Zone Slug: zone9) -2025-06-01 20:37:04,653 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone9. -2025-06-01 20:37:04,655 - INFO - [fuel_scraper.py:255] - Added new record for EATON OIL CO. in Massachusetts zone 9 -2025-06-01 20:37:04,656 - INFO - [fuel_scraper.py:255] - Added new record for DIRECT FUEL in Massachusetts zone 9 -2025-06-01 20:37:04,657 - INFO - [fuel_scraper.py:255] - Added new record for FIREMANS FUEL in Massachusetts zone 9 -2025-06-01 20:37:04,659 - INFO - [fuel_scraper.py:255] - Added new record for YNOT OIL in Massachusetts zone 9 -2025-06-01 20:37:04,660 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 9 -2025-06-01 20:37:04,661 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 9 -2025-06-01 20:37:04,662 - INFO - [fuel_scraper.py:255] - Added new record for SOLS FUEL CO in Massachusetts zone 9 -2025-06-01 20:37:04,663 - INFO - [fuel_scraper.py:255] - Added new record for NORTHEAST OIL DELIVERY in Massachusetts zone 9 -2025-06-01 20:37:04,664 - INFO - [fuel_scraper.py:255] - Added new record for GO GREEN OIL in Massachusetts zone 9 -2025-06-01 20:37:04,665 - INFO - [fuel_scraper.py:255] - Added new record for LEIGHTONS HEATING & COOLING INC. in Massachusetts zone 9 -2025-06-01 20:37:04,666 - INFO - [fuel_scraper.py:255] - Added new record for ATLANTIC OIL in Massachusetts zone 9 -2025-06-01 20:37:04,667 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Massachusetts zone 9 -2025-06-01 20:37:04,668 - INFO - [fuel_scraper.py:255] - Added new record for EDGEMONT OIL LLC in Massachusetts zone 9 -2025-06-01 20:37:04,669 - INFO - [fuel_scraper.py:255] - Added new record for SENIOR CITIZENS HEATING OIL in Massachusetts zone 9 -2025-06-01 20:37:04,669 - INFO - [fuel_scraper.py:255] - Added new record for SPARTAN OIL in Massachusetts zone 9 -2025-06-01 20:37:04,670 - INFO - [fuel_scraper.py:255] - Added new record for MARCHETTI COMMERCIAL FUELS INC. in Massachusetts zone 9 -2025-06-01 20:37:04,671 - INFO - [fuel_scraper.py:255] - Added new record for KATIES DISCOUNT OIL in Massachusetts zone 9 -2025-06-01 20:37:04,672 - INFO - [fuel_scraper.py:255] - Added new record for SAVINO & SONS OIL in Massachusetts zone 9 -2025-06-01 20:37:04,673 - INFO - [fuel_scraper.py:257] - Queued 18 records from NewEnglandOil - massachusetts/zone9 for DB insertion. -2025-06-01 20:37:04,673 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone10.asp?type=0 (State: massachusetts, Zone Slug: zone10) -2025-06-01 20:37:04,977 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone10. -2025-06-01 20:37:04,980 - INFO - [fuel_scraper.py:255] - Added new record for CHARLTON OIL & PROPANE in Massachusetts zone 10 -2025-06-01 20:37:04,981 - INFO - [fuel_scraper.py:255] - Added new record for LEBLANC OIL LLC in Massachusetts zone 10 -2025-06-01 20:37:04,982 - INFO - [fuel_scraper.py:255] - Added new record for RED STAR OIL CO. in Massachusetts zone 10 -2025-06-01 20:37:04,983 - INFO - [fuel_scraper.py:255] - Added new record for NYDAM OIL SVC in Massachusetts zone 10 -2025-06-01 20:37:04,984 - INFO - [fuel_scraper.py:255] - Added new record for PETERSON OIL SVC in Massachusetts zone 10 -2025-06-01 20:37:04,985 - INFO - [fuel_scraper.py:255] - Added new record for HARRIS OIL CO in Massachusetts zone 10 -2025-06-01 20:37:04,986 - INFO - [fuel_scraper.py:255] - Added new record for KENS OIL & HEATING INC in Massachusetts zone 10 -2025-06-01 20:37:04,988 - INFO - [fuel_scraper.py:255] - Added new record for NALA INDUSTRIES INC in Massachusetts zone 10 -2025-06-01 20:37:04,989 - INFO - [fuel_scraper.py:255] - Added new record for HELLEN FUELS CORP in Massachusetts zone 10 -2025-06-01 20:37:04,989 - INFO - [fuel_scraper.py:255] - Added new record for BLACKSTONE VALLEY OIL in Massachusetts zone 10 -2025-06-01 20:37:04,990 - INFO - [fuel_scraper.py:255] - Added new record for OLD MAN OIL in Massachusetts zone 10 -2025-06-01 20:37:04,991 - INFO - [fuel_scraper.py:255] - Added new record for ALS OIL SERVICE in Massachusetts zone 10 -2025-06-01 20:37:04,992 - INFO - [fuel_scraper.py:255] - Added new record for ENDICOTT OIL SERVICE in Massachusetts zone 10 -2025-06-01 20:37:04,993 - INFO - [fuel_scraper.py:255] - Added new record for JUST OIL INC in Massachusetts zone 10 -2025-06-01 20:37:04,994 - INFO - [fuel_scraper.py:255] - Added new record for SOUTHBRIDGE TIRE CO in Massachusetts zone 10 -2025-06-01 20:37:04,995 - INFO - [fuel_scraper.py:255] - Added new record for AUBURN OIL in Massachusetts zone 10 -2025-06-01 20:37:04,996 - INFO - [fuel_scraper.py:255] - Added new record for LMT Oil, Inc. in Massachusetts zone 10 -2025-06-01 20:37:04,997 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 10 -2025-06-01 20:37:04,998 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Massachusetts zone 10 -2025-06-01 20:37:04,999 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Massachusetts zone 10 -2025-06-01 20:37:05,000 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Massachusetts zone 10 -2025-06-01 20:37:05,001 - INFO - [fuel_scraper.py:255] - Added new record for SHERMAN OIL in Massachusetts zone 10 -2025-06-01 20:37:05,002 - INFO - [fuel_scraper.py:255] - Added new record for CAMS OIL SERVICE in Massachusetts zone 10 -2025-06-01 20:37:05,003 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN DISCOUNT OIL & PROPANE in Massachusetts zone 10 -2025-06-01 20:37:05,004 - INFO - [fuel_scraper.py:255] - Added new record for RADIO OIL CO in Massachusetts zone 10 -2025-06-01 20:37:05,005 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT OIL SERVICE in Massachusetts zone 10 -2025-06-01 20:37:05,006 - INFO - [fuel_scraper.py:255] - Added new record for VALUE OIL INC in Massachusetts zone 10 -2025-06-01 20:37:05,007 - INFO - [fuel_scraper.py:255] - Added new record for DADDYS OIL in Massachusetts zone 10 -2025-06-01 20:37:05,008 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Massachusetts zone 10 -2025-06-01 20:37:05,009 - INFO - [fuel_scraper.py:255] - Added new record for FAIAS OIL in Massachusetts zone 10 -2025-06-01 20:37:05,010 - INFO - [fuel_scraper.py:255] - Added new record for PIONEER VALLEY OIL & PROPANE in Massachusetts zone 10 -2025-06-01 20:37:05,011 - INFO - [fuel_scraper.py:255] - Added new record for OIL4LESS & PROPANE in Massachusetts zone 10 -2025-06-01 20:37:05,011 - INFO - [fuel_scraper.py:257] - Queued 32 records from NewEnglandOil - massachusetts/zone10 for DB insertion. -2025-06-01 20:37:05,011 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone11.asp?type=0 (State: massachusetts, Zone Slug: zone11) -2025-06-01 20:37:05,338 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone11. -2025-06-01 20:37:05,340 - INFO - [fuel_scraper.py:255] - Added new record for NALA INDUSTRIES INC in Massachusetts zone 11 -2025-06-01 20:37:05,341 - INFO - [fuel_scraper.py:255] - Added new record for ORLANDO FUEL SERVICE in Massachusetts zone 11 -2025-06-01 20:37:05,342 - INFO - [fuel_scraper.py:255] - Added new record for LOW COST FUEL in Massachusetts zone 11 -2025-06-01 20:37:05,343 - INFO - [fuel_scraper.py:255] - Added new record for J A HEALY & SONS OIL CO in Massachusetts zone 11 -2025-06-01 20:37:05,344 - INFO - [fuel_scraper.py:255] - Added new record for DORTENZIO OIL COMPANY in Massachusetts zone 11 -2025-06-01 20:37:05,345 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN DISCOUNT OIL & PROPANE in Massachusetts zone 11 -2025-06-01 20:37:05,346 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT OIL SERVICE in Massachusetts zone 11 -2025-06-01 20:37:05,347 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 11 -2025-06-01 20:37:05,348 - INFO - [fuel_scraper.py:255] - Added new record for BLACKSTONE VALLEY OIL in Massachusetts zone 11 -2025-06-01 20:37:05,349 - INFO - [fuel_scraper.py:255] - Added new record for WILL & SON TRUCKING INC in Massachusetts zone 11 -2025-06-01 20:37:05,350 - INFO - [fuel_scraper.py:255] - Added new record for PIONEER VALLEY OIL & PROPANE in Massachusetts zone 11 -2025-06-01 20:37:05,351 - INFO - [fuel_scraper.py:255] - Added new record for JUST OIL INC in Massachusetts zone 11 -2025-06-01 20:37:05,352 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Massachusetts zone 11 -2025-06-01 20:37:05,353 - INFO - [fuel_scraper.py:255] - Added new record for OIL4LESS & PROPANE in Massachusetts zone 11 -2025-06-01 20:37:05,354 - INFO - [fuel_scraper.py:255] - Added new record for VALUE OIL INC in Massachusetts zone 11 -2025-06-01 20:37:05,354 - INFO - [fuel_scraper.py:255] - Added new record for DADDYS OIL in Massachusetts zone 11 -2025-06-01 20:37:05,355 - INFO - [fuel_scraper.py:257] - Queued 16 records from NewEnglandOil - massachusetts/zone11 for DB insertion. -2025-06-01 20:37:05,355 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone12.asp?type=0 (State: massachusetts, Zone Slug: zone12) -2025-06-01 20:37:05,667 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone12. -2025-06-01 20:37:05,669 - INFO - [fuel_scraper.py:255] - Added new record for KIERAS OIL INC in Massachusetts zone 12 -2025-06-01 20:37:05,670 - INFO - [fuel_scraper.py:255] - Added new record for SURNER DISCOUNT OIL in Massachusetts zone 12 -2025-06-01 20:37:05,672 - INFO - [fuel_scraper.py:255] - Added new record for FUELCO in Massachusetts zone 12 -2025-06-01 20:37:05,673 - INFO - [fuel_scraper.py:255] - Added new record for FAST FILL OIL in Massachusetts zone 12 -2025-06-01 20:37:05,674 - INFO - [fuel_scraper.py:255] - Added new record for RICHARDS FUEL INC in Massachusetts zone 12 -2025-06-01 20:37:05,675 - INFO - [fuel_scraper.py:255] - Added new record for DONOVAN OIL CO in Massachusetts zone 12 -2025-06-01 20:37:05,676 - INFO - [fuel_scraper.py:255] - Added new record for U S OIL CO in Massachusetts zone 12 -2025-06-01 20:37:05,677 - INFO - [fuel_scraper.py:255] - Added new record for BOTTOM LINE OIL in Massachusetts zone 12 -2025-06-01 20:37:05,678 - INFO - [fuel_scraper.py:255] - Added new record for PIONEER VALLEY OIL & PROPANE in Massachusetts zone 12 -2025-06-01 20:37:05,679 - INFO - [fuel_scraper.py:255] - Added new record for DANS OIL CO in Massachusetts zone 12 -2025-06-01 20:37:05,680 - INFO - [fuel_scraper.py:255] - Added new record for FRASCO FUEL OIL in Massachusetts zone 12 -2025-06-01 20:37:05,680 - INFO - [fuel_scraper.py:257] - Queued 11 records from NewEnglandOil - massachusetts/zone12 for DB insertion. -2025-06-01 20:37:05,680 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone1.asp?type=0 (State: newhampshire, Zone Slug: zone1) -2025-06-01 20:37:06,017 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone1. -2025-06-01 20:37:06,019 - INFO - [fuel_scraper.py:255] - Added new record for HARRIS ENERGY in Newhampshire zone 1 -2025-06-01 20:37:06,021 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 1 -2025-06-01 20:37:06,022 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 1 -2025-06-01 20:37:06,023 - INFO - [fuel_scraper.py:255] - Added new record for PRESBY OIL in Newhampshire zone 1 -2025-06-01 20:37:06,024 - INFO - [fuel_scraper.py:255] - Added new record for AL'S PLUMBING HEATING & FUELS in Newhampshire zone 1 -2025-06-01 20:37:06,025 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 1 -2025-06-01 20:37:06,026 - INFO - [fuel_scraper.py:255] - Added new record for FITCH FUEL CO in Newhampshire zone 1 -2025-06-01 20:37:06,026 - INFO - [fuel_scraper.py:257] - Queued 7 records from NewEnglandOil - newhampshire/zone1 for DB insertion. -2025-06-01 20:37:06,026 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone2.asp?type=0 (State: newhampshire, Zone Slug: zone2) -2025-06-01 20:37:06,280 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone2. -2025-06-01 20:37:06,283 - INFO - [fuel_scraper.py:255] - Added new record for NEIGHBORS OIL in Newhampshire zone 2 -2025-06-01 20:37:06,284 - INFO - [fuel_scraper.py:255] - Added new record for FIELDINGS OIL & PROPANE in Newhampshire zone 2 -2025-06-01 20:37:06,285 - INFO - [fuel_scraper.py:255] - Added new record for GRANITE STATE OIL in Newhampshire zone 2 -2025-06-01 20:37:06,286 - INFO - [fuel_scraper.py:255] - Added new record for QUALITY FUELS LLC in Newhampshire zone 2 -2025-06-01 20:37:06,287 - INFO - [fuel_scraper.py:255] - Added new record for NIBROC OIL in Newhampshire zone 2 -2025-06-01 20:37:06,288 - INFO - [fuel_scraper.py:255] - Added new record for WELCH OIL in Newhampshire zone 2 -2025-06-01 20:37:06,289 - INFO - [fuel_scraper.py:255] - Added new record for CARDINAL & GLIDDEN OIL CO., INC. in Newhampshire zone 2 -2025-06-01 20:37:06,290 - INFO - [fuel_scraper.py:255] - Added new record for ATLANTC OIL in Newhampshire zone 2 -2025-06-01 20:37:06,291 - INFO - [fuel_scraper.py:255] - Added new record for REED FAMILY ENERGY in Newhampshire zone 2 -2025-06-01 20:37:06,292 - INFO - [fuel_scraper.py:255] - Added new record for LEOS FUEL in Newhampshire zone 2 -2025-06-01 20:37:06,293 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Newhampshire zone 2 -2025-06-01 20:37:06,294 - INFO - [fuel_scraper.py:255] - Added new record for 603 OIL CO. in Newhampshire zone 2 -2025-06-01 20:37:06,295 - INFO - [fuel_scraper.py:255] - Added new record for NOBLE FUELS in Newhampshire zone 2 -2025-06-01 20:37:06,296 - INFO - [fuel_scraper.py:255] - Added new record for ONLINE FUEL CO in Newhampshire zone 2 -2025-06-01 20:37:06,297 - INFO - [fuel_scraper.py:255] - Added new record for RC NIGHELLI HEATING SERVICES, LLC in Newhampshire zone 2 -2025-06-01 20:37:06,298 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Newhampshire zone 2 -2025-06-01 20:37:06,299 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 2 -2025-06-01 20:37:06,300 - INFO - [fuel_scraper.py:255] - Added new record for DEKES FUEL, LLC in Newhampshire zone 2 -2025-06-01 20:37:06,301 - INFO - [fuel_scraper.py:255] - Added new record for LOCAL PRIDE HEATING OIL in Newhampshire zone 2 -2025-06-01 20:37:06,302 - INFO - [fuel_scraper.py:255] - Added new record for HOMETOWN OIL in Newhampshire zone 2 -2025-06-01 20:37:06,303 - INFO - [fuel_scraper.py:255] - Added new record for SNH CLEAN ENERGY in Newhampshire zone 2 -2025-06-01 20:37:06,304 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT ENERGY in Newhampshire zone 2 -2025-06-01 20:37:06,304 - INFO - [fuel_scraper.py:257] - Queued 22 records from NewEnglandOil - newhampshire/zone2 for DB insertion. -2025-06-01 20:37:06,304 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone3.asp?type=0 (State: newhampshire, Zone Slug: zone3) -2025-06-01 20:37:06,664 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone3. -2025-06-01 20:37:06,666 - INFO - [fuel_scraper.py:255] - Added new record for HEBERT FUEL CO in Newhampshire zone 3 -2025-06-01 20:37:06,667 - INFO - [fuel_scraper.py:255] - Added new record for CONTOOCOOK VALLEY FUEL SVC in Newhampshire zone 3 -2025-06-01 20:37:06,669 - INFO - [fuel_scraper.py:255] - Added new record for 603 OIL CO. in Newhampshire zone 3 -2025-06-01 20:37:06,669 - INFO - [fuel_scraper.py:255] - Added new record for JOELS OIL in Newhampshire zone 3 -2025-06-01 20:37:06,670 - INFO - [fuel_scraper.py:255] - Added new record for DUTILE & SONS INC in Newhampshire zone 3 -2025-06-01 20:37:06,671 - INFO - [fuel_scraper.py:255] - Added new record for FOLEY OIL CO in Newhampshire zone 3 -2025-06-01 20:37:06,672 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 3 -2025-06-01 20:37:06,672 - INFO - [fuel_scraper.py:257] - Queued 7 records from NewEnglandOil - newhampshire/zone3 for DB insertion. -2025-06-01 20:37:06,672 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone4.asp?type=0 (State: newhampshire, Zone Slug: zone4) -2025-06-01 20:37:07,022 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone4. -2025-06-01 20:37:07,024 - INFO - [fuel_scraper.py:255] - Added new record for R E HINKLEY CO in Newhampshire zone 4 -2025-06-01 20:37:07,024 - INFO - [fuel_scraper.py:257] - Queued 1 records from NewEnglandOil - newhampshire/zone4 for DB insertion. -2025-06-01 20:37:07,024 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone5.asp?type=0 (State: newhampshire, Zone Slug: zone5) -2025-06-01 20:37:07,369 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone5. -2025-06-01 20:37:07,371 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL OF KEENE in Newhampshire zone 5 -2025-06-01 20:37:07,372 - INFO - [fuel_scraper.py:255] - Added new record for DAVIS OIL CO in Newhampshire zone 5 -2025-06-01 20:37:07,373 - INFO - [fuel_scraper.py:255] - Added new record for REDS OF JAFFREY LLC in Newhampshire zone 5 -2025-06-01 20:37:07,375 - INFO - [fuel_scraper.py:255] - Added new record for SWANZEY OIL in Newhampshire zone 5 -2025-06-01 20:37:07,376 - INFO - [fuel_scraper.py:255] - Added new record for BOBS FUEL COMPANY in Newhampshire zone 5 -2025-06-01 20:37:07,376 - INFO - [fuel_scraper.py:257] - Queued 5 records from NewEnglandOil - newhampshire/zone5 for DB insertion. -2025-06-01 20:37:07,376 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone6.asp?type=0 (State: newhampshire, Zone Slug: zone6) -2025-06-01 20:37:07,620 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone6. -2025-06-01 20:37:07,623 - INFO - [fuel_scraper.py:255] - Added new record for HEBERT FUEL CO in Newhampshire zone 6 -2025-06-01 20:37:07,624 - INFO - [fuel_scraper.py:255] - Added new record for NASHUA FUEL in Newhampshire zone 6 -2025-06-01 20:37:07,625 - INFO - [fuel_scraper.py:255] - Added new record for COUNTY ENERGY in Newhampshire zone 6 -2025-06-01 20:37:07,626 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Newhampshire zone 6 -2025-06-01 20:37:07,627 - INFO - [fuel_scraper.py:255] - Added new record for FUEL NRG in Newhampshire zone 6 -2025-06-01 20:37:07,628 - INFO - [fuel_scraper.py:255] - Added new record for SOUTHERN NEW HAMPSHIRE ENERGY in Newhampshire zone 6 -2025-06-01 20:37:07,629 - INFO - [fuel_scraper.py:255] - Added new record for DEEP DISCOUNT OIL in Newhampshire zone 6 -2025-06-01 20:37:07,630 - INFO - [fuel_scraper.py:255] - Added new record for SNH CLEAN ENERGY in Newhampshire zone 6 -2025-06-01 20:37:07,630 - INFO - [fuel_scraper.py:257] - Queued 8 records from NewEnglandOil - newhampshire/zone6 for DB insertion. -2025-06-01 20:37:07,630 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone1.asp?type=0 (State: rhodeisland, Zone Slug: zone1) -2025-06-01 20:37:07,860 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone1. -2025-06-01 20:37:07,862 - INFO - [fuel_scraper.py:255] - Added new record for AFFORDABLE FUEL in Rhodeisland zone 1 -2025-06-01 20:37:07,864 - INFO - [fuel_scraper.py:255] - Added new record for NITE OIL CO., INC. in Rhodeisland zone 1 -2025-06-01 20:37:07,865 - INFO - [fuel_scraper.py:255] - Added new record for CHARLIES OIL COMPANY in Rhodeisland zone 1 -2025-06-01 20:37:07,866 - INFO - [fuel_scraper.py:255] - Added new record for DUDEK OIL CO in Rhodeisland zone 1 -2025-06-01 20:37:07,867 - INFO - [fuel_scraper.py:255] - Added new record for THE OIL MAN in Rhodeisland zone 1 -2025-06-01 20:37:07,868 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Rhodeisland zone 1 -2025-06-01 20:37:07,869 - INFO - [fuel_scraper.py:255] - Added new record for ELITE OIL HEATING & AIR CONDITIONING in Rhodeisland zone 1 -2025-06-01 20:37:07,870 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 1 -2025-06-01 20:37:07,871 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 1 -2025-06-01 20:37:07,871 - INFO - [fuel_scraper.py:257] - Queued 9 records from NewEnglandOil - rhodeisland/zone1 for DB insertion. -2025-06-01 20:37:07,871 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone2.asp?type=0 (State: rhodeisland, Zone Slug: zone2) -2025-06-01 20:37:08,151 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone2. -2025-06-01 20:37:08,154 - INFO - [fuel_scraper.py:255] - Added new record for PRICERITE OIL INC in Rhodeisland zone 2 -2025-06-01 20:37:08,155 - INFO - [fuel_scraper.py:255] - Added new record for PROFESSIONAL HEATING/SAVE-ON OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,156 - INFO - [fuel_scraper.py:255] - Added new record for A-STAR OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,157 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Rhodeisland zone 2 -2025-06-01 20:37:08,157 - INFO - [fuel_scraper.py:255] - Added new record for AFFORDABLE FUEL in Rhodeisland zone 2 -2025-06-01 20:37:08,158 - INFO - [fuel_scraper.py:255] - Added new record for RAMBONE & SPRAQUE OIL SERVICE INC. in Rhodeisland zone 2 -2025-06-01 20:37:08,159 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,160 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Rhodeisland zone 2 -2025-06-01 20:37:08,161 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Rhodeisland zone 2 -2025-06-01 20:37:08,162 - INFO - [fuel_scraper.py:255] - Added new record for HENRY OIL COMPANY in Rhodeisland zone 2 -2025-06-01 20:37:08,163 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,164 - INFO - [fuel_scraper.py:255] - Added new record for ANTHONYS OIL & WATER, LLC in Rhodeisland zone 2 -2025-06-01 20:37:08,165 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Rhodeisland zone 2 -2025-06-01 20:37:08,166 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Rhodeisland zone 2 -2025-06-01 20:37:08,166 - INFO - [fuel_scraper.py:255] - Added new record for BUTCHIE OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,168 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT FUEL OIL & Propane in Rhodeisland zone 2 -2025-06-01 20:37:08,168 - INFO - [fuel_scraper.py:255] - Added new record for MAJOR OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,169 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 2 -2025-06-01 20:37:08,170 - INFO - [fuel_scraper.py:255] - Added new record for WICKED WARM OIL in Rhodeisland zone 2 -2025-06-01 20:37:08,171 - INFO - [fuel_scraper.py:257] - Queued 19 records from NewEnglandOil - rhodeisland/zone2 for DB insertion. -2025-06-01 20:37:08,171 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone3.asp?type=0 (State: rhodeisland, Zone Slug: zone3) -2025-06-01 20:37:08,430 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone3. -2025-06-01 20:37:08,433 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Rhodeisland zone 3 -2025-06-01 20:37:08,434 - INFO - [fuel_scraper.py:255] - Added new record for GUARDIAN FUEL ONLINE in Rhodeisland zone 3 -2025-06-01 20:37:08,435 - INFO - [fuel_scraper.py:255] - Added new record for A-STAR OIL in Rhodeisland zone 3 -2025-06-01 20:37:08,436 - INFO - [fuel_scraper.py:255] - Added new record for HENRY OIL COMPANY in Rhodeisland zone 3 -2025-06-01 20:37:08,437 - INFO - [fuel_scraper.py:255] - Added new record for PROFESSIONAL HEATING/SAVE-ON OIL in Rhodeisland zone 3 -2025-06-01 20:37:08,438 - INFO - [fuel_scraper.py:255] - Added new record for VALLEY FUEL in Rhodeisland zone 3 -2025-06-01 20:37:08,439 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 3 -2025-06-01 20:37:08,440 - INFO - [fuel_scraper.py:255] - Added new record for NET FUELS in Rhodeisland zone 3 -2025-06-01 20:37:08,441 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT FUEL OIL & Propane in Rhodeisland zone 3 -2025-06-01 20:37:08,442 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Rhodeisland zone 3 -2025-06-01 20:37:08,443 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Rhodeisland zone 3 -2025-06-01 20:37:08,444 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 3 -2025-06-01 20:37:08,445 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT OIL in Rhodeisland zone 3 -2025-06-01 20:37:08,446 - INFO - [fuel_scraper.py:255] - Added new record for MAJOR OIL in Rhodeisland zone 3 -2025-06-01 20:37:08,446 - INFO - [fuel_scraper.py:257] - Queued 14 records from NewEnglandOil - rhodeisland/zone3 for DB insertion. -2025-06-01 20:37:08,446 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone4.asp?type=0 (State: rhodeisland, Zone Slug: zone4) -2025-06-01 20:37:08,691 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone4. -2025-06-01 20:37:08,694 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Rhodeisland zone 4 -2025-06-01 20:37:08,695 - INFO - [fuel_scraper.py:255] - Added new record for A-STAR OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,696 - INFO - [fuel_scraper.py:255] - Added new record for SPEEDY OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,697 - INFO - [fuel_scraper.py:255] - Added new record for HENRY OIL COMPANY in Rhodeisland zone 4 -2025-06-01 20:37:08,698 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,699 - INFO - [fuel_scraper.py:255] - Added new record for MAJOR OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,700 - INFO - [fuel_scraper.py:255] - Added new record for PROFESSIONAL HEATING/SAVE-ON OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,701 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,702 - INFO - [fuel_scraper.py:255] - Added new record for ELITE OIL HEATING & AIR CONDITIONING in Rhodeisland zone 4 -2025-06-01 20:37:08,703 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Rhodeisland zone 4 -2025-06-01 20:37:08,704 - INFO - [fuel_scraper.py:255] - Added new record for ANTHONYS OIL & WATER, LLC in Rhodeisland zone 4 -2025-06-01 20:37:08,705 - INFO - [fuel_scraper.py:255] - Added new record for NET FUELS in Rhodeisland zone 4 -2025-06-01 20:37:08,706 - INFO - [fuel_scraper.py:255] - Added new record for RAMBONE & SPRAQUE OIL SERVICE INC in Rhodeisland zone 4 -2025-06-01 20:37:08,707 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT FUEL OIL & PROPANE in Rhodeisland zone 4 -2025-06-01 20:37:08,708 - INFO - [fuel_scraper.py:255] - Added new record for PEREZ OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,709 - INFO - [fuel_scraper.py:255] - Added new record for ADAMS FAMILY OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,710 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 4 -2025-06-01 20:37:08,711 - INFO - [fuel_scraper.py:255] - Added new record for AZOREAN OIL in Rhodeisland zone 4 -2025-06-01 20:37:08,712 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Rhodeisland zone 4 -2025-06-01 20:37:08,713 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Rhodeisland zone 4 -2025-06-01 20:37:08,713 - INFO - [fuel_scraper.py:257] - Queued 20 records from NewEnglandOil - rhodeisland/zone4 for DB insertion. -2025-06-01 20:37:08,713 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0 (State: rhodeisland, Zone Slug: zone5) -2025-06-01 20:37:08,838 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0 -2025-06-01 20:37:08,839 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0. Skipping. -2025-06-01 20:37:08,839 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone1.asp?type=0 (State: vermont, Zone Slug: zone1) -2025-06-01 20:37:09,047 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone1. -2025-06-01 20:37:09,048 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone1. -2025-06-01 20:37:09,048 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone1.asp?type=0 -2025-06-01 20:37:09,048 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone2.asp?type=0 (State: vermont, Zone Slug: zone2) -2025-06-01 20:37:09,465 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone2. -2025-06-01 20:37:09,466 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone2. -2025-06-01 20:37:09,466 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone2.asp?type=0 -2025-06-01 20:37:09,466 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone3.asp?type=0 (State: vermont, Zone Slug: zone3) -2025-06-01 20:37:09,840 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone3. -2025-06-01 20:37:09,841 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone3. -2025-06-01 20:37:09,841 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone3.asp?type=0 -2025-06-01 20:37:09,841 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone4.asp?type=0 (State: vermont, Zone Slug: zone4) -2025-06-01 20:37:10,228 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone4. -2025-06-01 20:37:10,229 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone4. -2025-06-01 20:37:10,229 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone4.asp?type=0 -2025-06-01 20:37:10,229 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone5.asp?type=0 (State: vermont, Zone Slug: zone5) -2025-06-01 20:37:10,603 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone5. -2025-06-01 20:37:10,603 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone5. -2025-06-01 20:37:10,603 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone5.asp?type=0 -2025-06-01 20:37:10,603 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone6.asp?type=0 (State: vermont, Zone Slug: zone6) -2025-06-01 20:37:10,760 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/vermont/zone6.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/vermont/zone6.asp?type=0 -2025-06-01 20:37:10,760 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/vermont/zone6.asp?type=0. Skipping. -2025-06-01 20:37:10,760 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone1.asp?type=0 (State: newyork, Zone Slug: zone1) -2025-06-01 20:37:10,888 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone1.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone1.asp?type=0 -2025-06-01 20:37:10,888 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone1.asp?type=0. Skipping. -2025-06-01 20:37:10,888 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone2.asp?type=0 (State: newyork, Zone Slug: zone2) -2025-06-01 20:37:11,036 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone2.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone2.asp?type=0 -2025-06-01 20:37:11,036 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone2.asp?type=0. Skipping. -2025-06-01 20:37:11,036 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone3.asp?type=0 (State: newyork, Zone Slug: zone3) -2025-06-01 20:37:11,193 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone3.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone3.asp?type=0 -2025-06-01 20:37:11,193 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone3.asp?type=0. Skipping. -2025-06-01 20:37:11,193 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone4.asp?type=0 (State: newyork, Zone Slug: zone4) -2025-06-01 20:37:11,364 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone4.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone4.asp?type=0 -2025-06-01 20:37:11,364 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone4.asp?type=0. Skipping. -2025-06-01 20:37:11,364 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone5.asp?type=0 (State: newyork, Zone Slug: zone5) -2025-06-01 20:37:11,523 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone5.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone5.asp?type=0 -2025-06-01 20:37:11,523 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone5.asp?type=0. Skipping. -2025-06-01 20:37:11,523 - INFO - [fuel_scraper.py:204] - --- Processing site: MaineOil --- -2025-06-01 20:37:11,523 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone1.asp?type=0 (State: maine, Zone Slug: zone1) -2025-06-01 20:37:11,799 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone1. -2025-06-01 20:37:11,801 - INFO - [fuel_scraper.py:255] - Added new record for AJs Discount Oil in Maine zone 1 -2025-06-01 20:37:11,802 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 1 -2025-06-01 20:37:11,803 - INFO - [fuel_scraper.py:255] - Added new record for Pit Stop Fuels in Maine zone 1 -2025-06-01 20:37:11,804 - INFO - [fuel_scraper.py:255] - Added new record for Sea Land Energy in Maine zone 1 -2025-06-01 20:37:11,805 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 1 -2025-06-01 20:37:11,806 - INFO - [fuel_scraper.py:255] - Added new record for Pauls Oil Service in Maine zone 1 -2025-06-01 20:37:11,807 - INFO - [fuel_scraper.py:255] - Added new record for Higgins Energy in Maine zone 1 -2025-06-01 20:37:11,808 - INFO - [fuel_scraper.py:255] - Added new record for Willow Creek Fuel in Maine zone 1 -2025-06-01 20:37:11,809 - INFO - [fuel_scraper.py:255] - Added new record for Maine Heating Solutions in Maine zone 1 -2025-06-01 20:37:11,810 - INFO - [fuel_scraper.py:255] - Added new record for Atlantic Heating Company Inc in Maine zone 1 -2025-06-01 20:37:11,811 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 1 -2025-06-01 20:37:11,812 - INFO - [fuel_scraper.py:255] - Added new record for Conroys Oil in Maine zone 1 -2025-06-01 20:37:11,812 - INFO - [fuel_scraper.py:255] - Added new record for Dales Cash Fuel in Maine zone 1 -2025-06-01 20:37:11,813 - INFO - [fuel_scraper.py:255] - Added new record for Maine Standard Biofuels in Maine zone 1 -2025-06-01 20:37:11,814 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 1 -2025-06-01 20:37:11,815 - INFO - [fuel_scraper.py:255] - Added new record for Lowest Price Oil in Maine zone 1 -2025-06-01 20:37:11,816 - INFO - [fuel_scraper.py:255] - Added new record for Ace Oil in Maine zone 1 -2025-06-01 20:37:11,817 - INFO - [fuel_scraper.py:255] - Added new record for Northeast Fuels in Maine zone 1 -2025-06-01 20:37:11,818 - INFO - [fuel_scraper.py:255] - Added new record for Desrochers Oil in Maine zone 1 -2025-06-01 20:37:11,819 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 1 -2025-06-01 20:37:11,820 - INFO - [fuel_scraper.py:255] - Added new record for Rama Oil in Maine zone 1 -2025-06-01 20:37:11,821 - INFO - [fuel_scraper.py:255] - Added new record for Rinaldi Energy in Maine zone 1 -2025-06-01 20:37:11,822 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 1 -2025-06-01 20:37:11,822 - INFO - [fuel_scraper.py:255] - Added new record for Vic & Sons Fuel Co. in Maine zone 1 -2025-06-01 20:37:11,823 - INFO - [fuel_scraper.py:255] - Added new record for Atlantic Heating Company Inc in Maine zone 1 -2025-06-01 20:37:11,824 - INFO - [fuel_scraper.py:255] - Added new record for Cleaves Energy in Maine zone 1 -2025-06-01 20:37:11,825 - INFO - [fuel_scraper.py:255] - Added new record for Coastline Energy LLC in Maine zone 1 -2025-06-01 20:37:11,826 - INFO - [fuel_scraper.py:255] - Added new record for Daves Oil in Maine zone 1 -2025-06-01 20:37:11,827 - INFO - [fuel_scraper.py:255] - Added new record for SoPo Fuel in Maine zone 1 -2025-06-01 20:37:11,828 - INFO - [fuel_scraper.py:255] - Added new record for Order Oil Online in Maine zone 1 -2025-06-01 20:37:11,829 - INFO - [fuel_scraper.py:255] - Added new record for Maine-Ly Heating Online in Maine zone 1 -2025-06-01 20:37:11,830 - INFO - [fuel_scraper.py:255] - Added new record for Cash Energy in Maine zone 1 -2025-06-01 20:37:11,831 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 1 -2025-06-01 20:37:11,831 - INFO - [fuel_scraper.py:257] - Queued 33 records from MaineOil - maine/zone1 for DB insertion. -2025-06-01 20:37:11,831 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone2.asp?type=0 (State: maine, Zone Slug: zone2) -2025-06-01 20:37:12,123 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone2. -2025-06-01 20:37:12,126 - INFO - [fuel_scraper.py:255] - Added new record for Bobs Cash Fuel in Maine zone 2 -2025-06-01 20:37:12,127 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 2 -2025-06-01 20:37:12,128 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 2 -2025-06-01 20:37:12,129 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 2 -2025-06-01 20:37:12,131 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2 -2025-06-01 20:37:12,132 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2 -2025-06-01 20:37:12,133 - INFO - [fuel_scraper.py:255] - Added new record for C.O.D. Cash Fuel in Maine zone 2 -2025-06-01 20:37:12,134 - INFO - [fuel_scraper.py:255] - Added new record for M.A. Haskell Fuel Company, LLC. in Maine zone 2 -2025-06-01 20:37:12,135 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2 -2025-06-01 20:37:12,136 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 2 -2025-06-01 20:37:12,137 - INFO - [fuel_scraper.py:255] - Added new record for C.B. Haskell Fuel Co. in Maine zone 2 -2025-06-01 20:37:12,138 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2 -2025-06-01 20:37:12,139 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 2 -2025-06-01 20:37:12,140 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 2 -2025-06-01 20:37:12,141 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2 -2025-06-01 20:37:12,142 - INFO - [fuel_scraper.py:255] - Added new record for G & G Cash Fuel in Maine zone 2 -2025-06-01 20:37:12,143 - INFO - [fuel_scraper.py:255] - Added new record for Lisbon Fuel Co in Maine zone 2 -2025-06-01 20:37:12,144 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 2 -2025-06-01 20:37:12,144 - INFO - [fuel_scraper.py:257] - Queued 18 records from MaineOil - maine/zone2 for DB insertion. -2025-06-01 20:37:12,144 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone3.asp?type=0 (State: maine, Zone Slug: zone3) -2025-06-01 20:37:12,439 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone3. -2025-06-01 20:37:12,441 - INFO - [fuel_scraper.py:255] - Added new record for Lisbon Fuel Co in Maine zone 3 -2025-06-01 20:37:12,443 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 3 -2025-06-01 20:37:12,444 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 3 -2025-06-01 20:37:12,445 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3 -2025-06-01 20:37:12,446 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 3 -2025-06-01 20:37:12,446 - INFO - [fuel_scraper.py:255] - Added new record for G & G Cash Fuel in Maine zone 3 -2025-06-01 20:37:12,447 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3 -2025-06-01 20:37:12,448 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3 -2025-06-01 20:37:12,449 - INFO - [fuel_scraper.py:255] - Added new record for Maine Heating Solutions in Maine zone 3 -2025-06-01 20:37:12,450 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 3 -2025-06-01 20:37:12,451 - INFO - [fuel_scraper.py:255] - Added new record for Rinaldi Energy in Maine zone 3 -2025-06-01 20:37:12,452 - INFO - [fuel_scraper.py:255] - Added new record for S K Fuel in Maine zone 3 -2025-06-01 20:37:12,453 - INFO - [fuel_scraper.py:255] - Added new record for Luckys Cash Fuel in Maine zone 3 -2025-06-01 20:37:12,454 - INFO - [fuel_scraper.py:255] - Added new record for Maine-Ly Heating Online in Maine zone 3 -2025-06-01 20:37:12,455 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3 -2025-06-01 20:37:12,456 - INFO - [fuel_scraper.py:255] - Added new record for Lake Region Energy in Maine zone 3 -2025-06-01 20:37:12,457 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 3 -2025-06-01 20:37:12,458 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3 -2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:255] - Added new record for Big G Heating Fuel in Maine zone 3 -2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 3 -2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:257] - Queued 20 records from MaineOil - maine/zone3 for DB insertion. -2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone4.asp?type=0 (State: maine, Zone Slug: zone4) -2025-06-01 20:37:12,758 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone4. -2025-06-01 20:37:12,761 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 4 -2025-06-01 20:37:12,762 - INFO - [fuel_scraper.py:255] - Added new record for Alfred Oil in Maine zone 4 -2025-06-01 20:37:12,763 - INFO - [fuel_scraper.py:255] - Added new record for Willow Creek Fuel in Maine zone 4 -2025-06-01 20:37:12,764 - INFO - [fuel_scraper.py:255] - Added new record for Maine Heating Solutions in Maine zone 4 -2025-06-01 20:37:12,765 - INFO - [fuel_scraper.py:255] - Added new record for Quality Fuels, LLC in Maine zone 4 -2025-06-01 20:37:12,766 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 4 -2025-06-01 20:37:12,767 - INFO - [fuel_scraper.py:255] - Added new record for Welch Oil in Maine zone 4 -2025-06-01 20:37:12,768 - INFO - [fuel_scraper.py:255] - Added new record for Ace Oil in Maine zone 4 -2025-06-01 20:37:12,769 - INFO - [fuel_scraper.py:255] - Added new record for Top It Off Oil in Maine zone 4 -2025-06-01 20:37:12,770 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 4 -2025-06-01 20:37:12,771 - INFO - [fuel_scraper.py:255] - Added new record for Garrett Pillsbury - Fleurent Fuel in Maine zone 4 -2025-06-01 20:37:12,772 - INFO - [fuel_scraper.py:255] - Added new record for Noble Fuels in Maine zone 4 -2025-06-01 20:37:12,773 - INFO - [fuel_scraper.py:255] - Added new record for Gils Oil Service, Inc. in Maine zone 4 -2025-06-01 20:37:12,774 - INFO - [fuel_scraper.py:255] - Added new record for Seacoast Energy, Inc. in Maine zone 4 -2025-06-01 20:37:12,774 - INFO - [fuel_scraper.py:255] - Added new record for Winterwood Fuel in Maine zone 4 -2025-06-01 20:37:12,775 - INFO - [fuel_scraper.py:255] - Added new record for Roberge Energy in Maine zone 4 -2025-06-01 20:37:12,776 - INFO - [fuel_scraper.py:255] - Added new record for Bargain Fuel in Maine zone 4 -2025-06-01 20:37:12,777 - INFO - [fuel_scraper.py:255] - Added new record for Branch Brook Fuels in Maine zone 4 -2025-06-01 20:37:12,778 - INFO - [fuel_scraper.py:255] - Added new record for Desrochers Oil in Maine zone 4 -2025-06-01 20:37:12,779 - INFO - [fuel_scraper.py:255] - Added new record for Rinaldi Energy in Maine zone 4 -2025-06-01 20:37:12,780 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 4 -2025-06-01 20:37:12,781 - INFO - [fuel_scraper.py:255] - Added new record for Rama Oil in Maine zone 4 -2025-06-01 20:37:12,782 - INFO - [fuel_scraper.py:255] - Added new record for Arrow Oil Co in Maine zone 4 -2025-06-01 20:37:12,783 - INFO - [fuel_scraper.py:255] - Added new record for My Easy Oil in Maine zone 4 -2025-06-01 20:37:12,784 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 4 -2025-06-01 20:37:12,785 - INFO - [fuel_scraper.py:255] - Added new record for Estes Oil Online in Maine zone 4 -2025-06-01 20:37:12,786 - INFO - [fuel_scraper.py:255] - Added new record for Double E Oil in Maine zone 4 -2025-06-01 20:37:12,787 - INFO - [fuel_scraper.py:255] - Added new record for R & R OIL in Maine zone 4 -2025-06-01 20:37:12,788 - INFO - [fuel_scraper.py:255] - Added new record for Cleaves Energy in Maine zone 4 -2025-06-01 20:37:12,789 - INFO - [fuel_scraper.py:255] - Added new record for Eagle Oil in Maine zone 4 -2025-06-01 20:37:12,790 - INFO - [fuel_scraper.py:255] - Added new record for Vadnais Oil in Maine zone 4 -2025-06-01 20:37:12,791 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 4 -2025-06-01 20:37:12,791 - INFO - [fuel_scraper.py:257] - Queued 32 records from MaineOil - maine/zone4 for DB insertion. -2025-06-01 20:37:12,791 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone5.asp?type=0 (State: maine, Zone Slug: zone5) -2025-06-01 20:37:13,076 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone5. -2025-06-01 20:37:13,079 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 5 -2025-06-01 20:37:13,080 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 5 -2025-06-01 20:37:13,081 - INFO - [fuel_scraper.py:255] - Added new record for Country Fuel LLC in Maine zone 5 -2025-06-01 20:37:13,082 - INFO - [fuel_scraper.py:255] - Added new record for OFarrell Energy in Maine zone 5 -2025-06-01 20:37:13,083 - INFO - [fuel_scraper.py:255] - Added new record for M.A. Haskell Fuel Company, LLC. in Maine zone 5 -2025-06-01 20:37:13,084 - INFO - [fuel_scraper.py:255] - Added new record for Dales Cash Fuel in Maine zone 5 -2025-06-01 20:37:13,085 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 5 -2025-06-01 20:37:13,086 - INFO - [fuel_scraper.py:255] - Added new record for Kaler Oil Co., Inc. in Maine zone 5 -2025-06-01 20:37:13,087 - INFO - [fuel_scraper.py:255] - Added new record for Lisbon Fuel Co in Maine zone 5 -2025-06-01 20:37:13,088 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 5 -2025-06-01 20:37:13,089 - INFO - [fuel_scraper.py:255] - Added new record for Coastline Energy LLC in Maine zone 5 -2025-06-01 20:37:13,090 - INFO - [fuel_scraper.py:255] - Added new record for C.B. Haskell Fuel Co. in Maine zone 5 -2025-06-01 20:37:13,091 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 5 -2025-06-01 20:37:13,091 - INFO - [fuel_scraper.py:257] - Queued 13 records from MaineOil - maine/zone5 for DB insertion. -2025-06-01 20:37:13,091 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone6.asp?type=0 (State: maine, Zone Slug: zone6) -2025-06-01 20:37:13,387 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone6. -2025-06-01 20:37:13,389 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6 -2025-06-01 20:37:13,390 - INFO - [fuel_scraper.py:255] - Added new record for Pushaw Energy in Maine zone 6 -2025-06-01 20:37:13,391 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6 -2025-06-01 20:37:13,392 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6 -2025-06-01 20:37:13,394 - INFO - [fuel_scraper.py:255] - Added new record for Kennebec Energy in Maine zone 6 -2025-06-01 20:37:13,395 - INFO - [fuel_scraper.py:255] - Added new record for Hopkins Energy in Maine zone 6 -2025-06-01 20:37:13,396 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6 -2025-06-01 20:37:13,397 - INFO - [fuel_scraper.py:255] - Added new record for Pine Tree Oil in Maine zone 6 -2025-06-01 20:37:13,398 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6 -2025-06-01 20:37:13,398 - INFO - [fuel_scraper.py:255] - Added new record for Morin Fuel in Maine zone 6 -2025-06-01 20:37:13,399 - INFO - [fuel_scraper.py:255] - Added new record for Fettinger Fuels in Maine zone 6 -2025-06-01 20:37:13,400 - INFO - [fuel_scraper.py:255] - Added new record for Dysarts Fuel in Maine zone 6 -2025-06-01 20:37:13,401 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 6 -2025-06-01 20:37:13,401 - INFO - [fuel_scraper.py:257] - Queued 13 records from MaineOil - maine/zone6 for DB insertion. -2025-06-01 20:37:13,401 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone7.asp?type=0 (State: maine, Zone Slug: zone7) -2025-06-01 20:37:13,652 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone7. -2025-06-01 20:37:13,654 - INFO - [fuel_scraper.py:255] - Added new record for Eastern Plumbing & Heating in Maine zone 7 -2025-06-01 20:37:13,655 - INFO - [fuel_scraper.py:255] - Added new record for Hometown Fuel in Maine zone 7 -2025-06-01 20:37:13,656 - INFO - [fuel_scraper.py:255] - Added new record for Huntley Plumbing & Heating in Maine zone 7 -2025-06-01 20:37:13,657 - INFO - [fuel_scraper.py:255] - Added new record for Kelley Oil in Maine zone 7 -2025-06-01 20:37:13,657 - INFO - [fuel_scraper.py:257] - Queued 4 records from MaineOil - maine/zone7 for DB insertion. -2025-06-01 20:37:13,694 - INFO - [fuel_scraper.py:265] - Successfully committed 517 records to the database. -2025-06-01 20:37:13,694 - INFO - [fuel_scraper.py:275] - Database session closed. -2025-06-01 20:37:13,694 - INFO - [fuel_scraper.py:277] - Oil price scraper job finished. -2025-06-01 20:37:13,694 - INFO - [run.py:33] - Fuel price scraper finished. diff --git a/requirements.txt b/requirements.txt index dd7af0f..614f8b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ requests beautifulsoup4 sqlalchemy -psycopg2-binary \ No newline at end of file +psycopg2-binary +fastapi +uvicorn[standard] diff --git a/run.py b/run.py index 5e4bacc..429fa2d 100644 --- a/run.py +++ b/run.py @@ -2,44 +2,100 @@ import argparse import logging -# Import necessary functions/modules from your project -# The 'import models' is crucial for init_db to know about the tables import models from database import init_db, SessionLocal -from fuel_scraper import main as run_scraper_main # Import from modular package +from newenglandoil import main as run_scraper_main -# Configure basic logging for the run.py script itself if needed -# Your other modules (fuel_scraper, database) will have their own logging -# or you might centralize logging configuration further. -# For simplicity, we'll let fuel_scraper handle its detailed logging. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) + def initialize_database(): """Initializes the database by creating tables based on models.""" logger.info("Attempting to initialize database...") try: - init_db() # This function is imported from database.py - # It relies on models being imported so Base.metadata is populated + init_db() logger.info("Database initialization process completed.") except Exception as e: logger.error(f"Error during database initialization: {e}", exc_info=True) -def scrape_data(): - """Runs the fuel price scraper.""" - logger.info("Starting the fuel price scraper...") + +def scrape_data(state_abbr: str | None = None, refresh_metadata: bool = False): + """Runs the NewEnglandOil scraper.""" + logger.info("Starting the NewEnglandOil scraper...") + if refresh_metadata: + logger.info("Metadata refresh enabled: Existing phone/URL data may be overwritten.") + if state_abbr: + logger.info(f"Scraping restricted to state: {state_abbr}") + try: - run_scraper_main() # This is the main function from fuel_scraper.py - logger.info("Fuel price scraper finished.") + run_scraper_main(refresh_metadata=refresh_metadata, target_state_abbr=state_abbr) + logger.info("NewEnglandOil scraper finished.") except Exception as e: logger.error(f"Error during scraping process: {e}", exc_info=True) + +def scrape_cheapest(state_abbr: str, refresh_metadata: bool = False): + """Runs the CheapestOil scraper for a single state.""" + from cheapestoil import scrape_state + + logger.info(f"Starting CheapestOil scrape for {state_abbr}...") + if refresh_metadata: + logger.info("Metadata refresh enabled: Existing phone/URL data may be overwritten.") + + db_session = SessionLocal() + try: + counties = db_session.query(models.County).all() + county_lookup = {(c.state.strip(), c.name.strip()): c.id for c in counties} + result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata) + logger.info(f"CheapestOil result: {result}") + except Exception as e: + db_session.rollback() + logger.error(f"Error during CheapestOil scrape: {e}", exc_info=True) + finally: + db_session.close() + + +def run_migration(): + """Runs the data normalization migration.""" + from migrate_normalize import main as migrate_main + logger.info("Running data normalization migration...") + try: + migrate_main() + logger.info("Migration completed.") + except Exception as e: + logger.error(f"Error during migration: {e}", exc_info=True) + + +def start_server(): + """Starts the FastAPI server.""" + import uvicorn + logger.info("Starting FastAPI crawler server on port 9553...") + uvicorn.run("app:app", host="0.0.0.0", port=9553) + + def main(): parser = argparse.ArgumentParser(description="Fuel Price Scraper Control Script") parser.add_argument( "action", - choices=["initdb", "scrape"], - help="The action to perform: 'initdb' to initialize the database, 'scrape' to run the scraper." + choices=["initdb", "scrape", "scrape-cheapest", "migrate", "server"], + help=( + "'initdb' to initialize the database, " + "'scrape' to run NewEnglandOil scraper, " + "'scrape-cheapest' to run CheapestOil scraper, " + "'migrate' to run data normalization migration, " + "'server' to start the FastAPI server." + ), + ) + parser.add_argument( + "--state", + default=None, + help="State abbreviation (MA, CT, ME, NH, RI, VT).", + ) + parser.add_argument( + "--refresh-metadata", + action="store_true", + help="Force refresh phone numbers and URLs, overwriting existing data.", ) args = parser.parse_args() @@ -47,10 +103,18 @@ def main(): if args.action == "initdb": initialize_database() elif args.action == "scrape": - scrape_data() - else: - logger.error(f"Unknown action: {args.action}") - parser.print_help() + scrape_data(state_abbr=args.state, refresh_metadata=args.refresh_metadata) + elif args.action == "scrape-cheapest": + if not args.state: + logger.error("--state is required for scrape-cheapest action") + parser.print_help() + return + scrape_cheapest(args.state.upper(), refresh_metadata=args.refresh_metadata) + elif args.action == "migrate": + run_migration() + elif args.action == "server": + start_server() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test.py b/test.py deleted file mode 100644 index 2ab49c3..0000000 --- a/test.py +++ /dev/null @@ -1,34 +0,0 @@ -import requests -from bs4 import BeautifulSoup - -url = "https://www.newenglandoil.com/connecticut/zone1.asp?type=0" -headers_req = { # Renamed to avoid conflict with 'headers' variable later - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' -} - -try: - response = requests.get(url, headers=headers_req, timeout=10) - response.raise_for_status() - soup = BeautifulSoup(response.content, 'html.parser') - - all_tables = soup.find_all('table') - print(f"Found {len(all_tables)} table(s) in total.") - - if all_tables: - table = all_tables[0] # Assuming it's the first (and only) table - thead = table.find('thead') - if thead: - # Get the exact header texts - actual_headers = [th.get_text(strip=True) for th in thead.find_all('th')] - print(f"Actual headers found in the first table's thead: {actual_headers}") - # Get the lowercased versions for easy comparison - actual_headers_lower = [th.get_text(strip=True).lower() for th in thead.find_all('th')] - print(f"Actual headers (lowercase): {actual_headers_lower}") - - else: - print("The first table found does not have a element.") - else: - print("No tables found on the page.") - -except requests.exceptions.RequestException as e: - print(f"Error fetching page: {e}") \ No newline at end of file