diff --git a/Dockerfile b/Dockerfile
index 19f7c0b..ffa4cf7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
# Use an official Python runtime as a parent image
-FROM python:3.9-slim-buster
+FROM python:3.11-slim-bookworm
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
@@ -24,5 +24,7 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application code into the container
# This will be overridden by the volume mount in docker-compose for development
COPY . .
-#CMD ["python3", "run.py", "initdb"]
-CMD ["python3", "run.py", "scrape"]
\ No newline at end of file
+
+EXPOSE 9553
+
+CMD ["python3", "run.py", "server"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c0160ac
--- /dev/null
+++ b/README.md
@@ -0,0 +1,203 @@
+# NewEnglandBio Fuel Price Crawler
+
+Python scraper that collects heating oil prices from NewEnglandOil.com and MaineOil.com and stores them in PostgreSQL. Runs as a batch job (no HTTP server).
+
+## Tech Stack
+
+- **Language:** Python 3.9+
+- **HTTP:** requests + BeautifulSoup4
+- **Database:** SQLAlchemy + psycopg2 (PostgreSQL)
+- **Deployment:** Docker
+
+## Project Structure
+
+```
+crawler/
+├── run.py # CLI entry point (initdb / scrape)
+├── database.py # SQLAlchemy engine and session config
+├── models.py # ORM models (OilPrice, County, Company)
+├── fuel_scraper.py # Legacy monolithic scraper (deprecated)
+├── fuel_scraper/ # Modular package (use this)
+│ ├── __init__.py # Exports main()
+│ ├── config.py # Site configs, zone-to-county mappings, logging
+│ ├── http_client.py # HTTP requests with browser User-Agent
+│ ├── parsers.py # HTML table parsing for price extraction
+│ ├── scraper.py # Main orchestrator
+│ └── db_operations.py # Upsert logic for oil_prices table
+├── test.py # HTML parsing validation
+├── requirements.txt
+├── Dockerfile
+├── docker-compose.yml
+└── .env
+```
+
+## URLs Scraped
+
+The crawler hits these external websites to collect price data:
+
+### NewEnglandOil.com (5 states)
+
+**URL pattern:** `https://www.newenglandoil.com/{state}/{zone}.asp?type=0`
+
+| State | Zones | Example URL |
+|-------|-------|-------------|
+| Connecticut | zone1–zone10 | `https://www.newenglandoil.com/connecticut/zone1.asp?type=0` |
+| Massachusetts | zone1–zone15 | `https://www.newenglandoil.com/massachusetts/zone1.asp?type=0` |
+| New Hampshire | zone1–zone6 | `https://www.newenglandoil.com/newhampshire/zone1.asp?type=0` |
+| Rhode Island | zone1–zone4 | `https://www.newenglandoil.com/rhodeisland/zone1.asp?type=0` |
+| Vermont | zone1–zone4 | `https://www.newenglandoil.com/vermont/zone1.asp?type=0` |
+
+### MaineOil.com (1 state)
+
+**URL pattern:** `https://www.maineoil.com/{zone}.asp?type=0`
+
+| State | Zones | Example URL |
+|-------|-------|-------------|
+| Maine | zone1–zone7 | `https://www.maineoil.com/zone1.asp?type=0` |
+
+**Total: ~46 pages scraped per run.**
+
+Each page contains an HTML table with columns: Company Name, Price, Date. The parser extracts these and maps zones to counties using the config.
+
+## How to Run
+
+### CLI Usage
+
+```bash
+# Initialize database tables
+python3 run.py initdb
+
+# Run the scraper
+python3 run.py scrape
+```
+
+### Docker
+
+```bash
+# Build
+docker-compose build
+
+# Run scraper (default command)
+docker-compose run app
+
+# Initialize database via Docker
+docker-compose run app python3 run.py initdb
+
+# Both in sequence
+docker-compose run app python3 run.py initdb && docker-compose run app
+```
+
+### Curl the Scraped Data
+
+The crawler itself does **not** serve HTTP endpoints. After scraping, the data is available through the **Rust API** (port 9552):
+
+```bash
+# Get oil prices for a specific county
+curl http://localhost:9552/oil-prices/county/1
+
+# Get oil prices for Suffolk County (MA) — find county_id first
+curl http://localhost:9552/state/MA
+# Then use the county_id from the response
+curl http://localhost:9552/oil-prices/county/5
+```
+
+**Response format:**
+```json
+[
+ {
+ "id": 1234,
+ "state": "Massachusetts",
+ "zone": 1,
+ "name": "ABC Fuel Co",
+ "price": 3.29,
+ "date": "01/15/2026",
+ "scrapetimestamp": "2026-01-15T14:30:00Z",
+ "county_id": 5
+ }
+]
+```
+
+### Query the Database Directly
+
+```bash
+# All prices for Massachusetts
+psql postgresql://postgres:password@192.168.1.204:5432/fuelprices \
+ -c "SELECT name, price, date, county_id FROM oil_prices WHERE state='Massachusetts' ORDER BY price;"
+
+# Latest scrape timestamp
+psql postgresql://postgres:password@192.168.1.204:5432/fuelprices \
+ -c "SELECT MAX(scrapetimestamp) FROM oil_prices;"
+
+# Prices by county with county name
+psql postgresql://postgres:password@192.168.1.204:5432/fuelprices \
+ -c "SELECT c.name AS county, o.name AS company, o.price
+ FROM oil_prices o JOIN county c ON o.county_id = c.id
+ WHERE c.state='MA' ORDER BY o.price;"
+```
+
+## Environment
+
+Create `.env`:
+
+```
+DATABASE_URL=postgresql://postgres:password@192.168.1.204:5432/fuelprices
+```
+
+## Zone-to-County Mapping
+
+Each scraping zone maps to one or more counties:
+
+**Connecticut (10 zones):**
+- zone1 → Fairfield | zone2 → New Haven | zone3 → Middlesex
+- zone4 → New London | zone5 → Hartford | zone6 → Hartford
+- zone7 → Litchfield | zone8 → Tolland | zone9 → Windham
+- zone10 → New Haven
+
+**Massachusetts (15 zones):**
+- zone1 → Berkshire | zone2 → Franklin | zone3 → Hampshire
+- zone4 → Hampden | zone5 → Worcester | zone6 → Worcester
+- zone7 → Middlesex | zone8 → Essex | zone9 → Suffolk
+- zone10 → Norfolk | zone11 → Plymouth | zone12 → Bristol
+- zone13 → Barnstable | zone14 → Dukes | zone15 → Nantucket
+
+**New Hampshire (6 zones):**
+- zone1 → Coos, Grafton | zone2 → Carroll, Belknap
+- zone3 → Sullivan, Merrimack | zone4 → Strafford, Cheshire
+- zone5 → Hillsborough | zone6 → Rockingham
+
+**Rhode Island (4 zones):**
+- zone1 → Providence | zone2 → Kent, Bristol
+- zone3 → Washington | zone4 → Newport
+
+**Maine (7 zones):**
+- zone1 → Cumberland | zone2 → York | zone3 → Sagadahoc, Lincoln, Knox
+- zone4 → Androscoggin, Oxford, Franklin
+- zone5 → Kennebec, Somerset | zone6 → Penobscot, Piscataquis
+- zone7 → Hancock, Washington, Waldo, Aroostook
+
+## Upsert Logic
+
+When storing scraped data, the crawler:
+
+1. Matches existing records by `(name, state, county_id)` or `(name, state, zone)`
+2. **Skips** records where `company_id IS NOT NULL` (vendor-managed prices take priority)
+3. **Updates** if the price or county_id has changed
+4. **Inserts** a new record if no match exists
+
+## Scheduling
+
+The crawler has no built-in scheduler. Run it via cron or Unraid's User Scripts:
+
+```bash
+# Cron: run daily at 2 AM
+0 2 * * * cd /mnt/code/tradewar/crawler && docker-compose run app
+```
+
+## Logging
+
+Logs to `oil_scraper.log` in the working directory. Level: INFO.
+
+```
+2026-01-15 14:30:00 - INFO - [scraper.py:42] - Scraping Massachusetts zone1...
+2026-01-15 14:30:01 - INFO - [db_operations.py:28] - Upserted 15 records for Massachusetts zone1
+```
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..5e0bbd5
--- /dev/null
+++ b/app.py
@@ -0,0 +1,65 @@
+"""
+FastAPI web server for the crawler.
+Provides HTTP endpoints to trigger scrapes on demand.
+"""
+import logging
+from fastapi import FastAPI, HTTPException
+
+import models
+from database import SessionLocal
+from cheapestoil import scrape_state
+from cheapestoil.config import STATE_API_NAMES
+from newenglandoil.scraper import main as run_newenglandoil_scraper
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s",
+)
+
+app = FastAPI(title="Crawler API", version="1.0.0")
+
+
+def _build_county_lookup(db_session):
+ """Build a (state_abbr, county_name) -> county_id lookup from the DB."""
+ counties = db_session.query(models.County).all()
+ return {(c.state.strip(), c.name.strip()): c.id for c in counties}
+
+
+@app.get("/health")
+def health():
+ return {"status": "ok"}
+
+
+@app.get("/scrape/{state_abbr}")
+def scrape_endpoint(state_abbr: str, refresh_metadata: bool = False):
+ """Trigger a CheapestOil scrape for a single state."""
+ state_abbr = state_abbr.upper()
+ if state_abbr not in STATE_API_NAMES:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Unknown state: {state_abbr}. Valid: {list(STATE_API_NAMES.keys())}",
+ )
+
+ db_session = SessionLocal()
+ try:
+ county_lookup = _build_county_lookup(db_session)
+ result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata)
+ return result
+ except Exception as e:
+ db_session.rollback()
+ logging.error(f"Scrape failed for {state_abbr}: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=str(e))
+ finally:
+ db_session.close()
+
+
+@app.get("/scrape-newenglandoil")
+def scrape_newenglandoil_endpoint(state: str = None, refresh_metadata: bool = False):
+ """Trigger the NewEnglandOil scraper (runs synchronously)."""
+ try:
+ # This will run the scraper and log to stdout (inherited from app's logging setup)
+ run_newenglandoil_scraper(refresh_metadata=refresh_metadata, target_state_abbr=state)
+ return {"status": "ok", "message": "NewEnglandOil scrape completed"}
+ except Exception as e:
+ logging.error(f"NewEnglandOil scrape failed: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=str(e))
diff --git a/cheapestoil/__init__.py b/cheapestoil/__init__.py
new file mode 100644
index 0000000..ad45470
--- /dev/null
+++ b/cheapestoil/__init__.py
@@ -0,0 +1,4 @@
+# cheapestoil package
+from .scraper import scrape_state
+
+__all__ = ["scrape_state"]
diff --git a/cheapestoil/api_client.py b/cheapestoil/api_client.py
new file mode 100644
index 0000000..e108f79
--- /dev/null
+++ b/cheapestoil/api_client.py
@@ -0,0 +1,136 @@
+"""
+HTTP client for the CheapestOil JSON API.
+"""
+import re
+import requests
+from bs4 import BeautifulSoup
+
+from .config import API_URL
+
+DEFAULT_HEADERS = {
+ "User-Agent": (
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/91.0.4472.124 Safari/537.36"
+ )
+}
+
+REQUEST_TIMEOUT = 20
+
+
+def fetch_company_details(slug: str) -> dict:
+ """
+ Fetch company details (real URL, phone) from their CheapestOil profile page.
+
+ Args:
+ slug: The company slug/path (e.g. "Abc-Oil-Company")
+
+ Returns:
+ Dict with keys: "url" (str|None), "phone" (str|None)
+ """
+ if not slug:
+ return {"url": None, "phone": None}
+
+ # Construct detail URL
+ # If slug is full URL, use it, else append to base
+ if slug.startswith("http"):
+ url = slug
+ else:
+ url = f"https://www.cheapestoil.com/{slug}"
+
+ try:
+ resp = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT)
+ resp.raise_for_status()
+ soup = BeautifulSoup(resp.content, 'html.parser')
+
+ real_url = None
+ phone = None
+
+ # 1. Extract Real URL
+ # Look for "Visit Website" link or similar anchor texts
+ # Usually contained in a link with text "Visit Website" or the company name
+ # We look for a link that is NOT internal (doesn't contain cheapestoil.com)
+ # and behaves like an external link.
+
+ # Common pattern: Visit Website
+ visit_link = soup.find('a', string=re.compile(r"Visit Website|Company Website", re.IGNORECASE))
+ if visit_link and visit_link.get('href'):
+ href = visit_link.get('href')
+ if 'cheapestoil.com' not in href and href.startswith('http'):
+ real_url = href
+
+ # Fallback: look for any external link in the contact section if structured
+ if not real_url:
+ # Try to find the first external link in the main content area
+ # (This is heuristics-based, might need adjustment)
+ content_div = soup.find('div', class_='col-md-8') # Common bootstrap main col
+ if content_div:
+ links = content_div.find_all('a', href=True)
+ for a in links:
+ href = a['href']
+ if href.startswith('http') and 'cheapestoil.com' not in href:
+ real_url = href
+ break
+
+ # 2. Extract Phone
+ # Reuse robust regex pattern logic
+ page_text = soup.get_text(" ", strip=True)
+
+ # Look for "Phone:", "Tel:", etc.
+ # This is a bit simplified compared to the other scraper but likely sufficient
+ phone_match = re.search(r'(?:Phone|Tel|Call).*?(\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4})', page_text, re.IGNORECASE)
+ if phone_match:
+ phone_candidate = phone_match.group(1)
+ else:
+ # Fallback to just finding a phone pattern
+ phone_match = re.search(r'(?:\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4})', page_text)
+ phone_candidate = phone_match.group(0) if phone_match else None
+
+ if phone_candidate:
+ digits = re.sub(r'\D', '', phone_candidate)
+ if len(digits) == 10:
+ phone = f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
+ else:
+ phone = phone_candidate
+
+ return {"url": real_url, "phone": phone}
+
+ except Exception as e:
+ logging.warning(f"Failed to fetch details for {slug}: {e}")
+ return {"url": None, "phone": None}
+
+
+
+def fetch_county_prices(state_api_name: str, county_name: str | None = None) -> list:
+ """
+ Fetch price data from the CheapestOil API.
+
+ Args:
+ state_api_name: State name as used by the API (e.g. "Massachusetts", "NewHampshire")
+ county_name: County name filter, or None for state-level results
+
+ Returns:
+ List of raw JSON arrays from the API, or empty list on failure.
+ """
+ params = {
+ "sort": 0,
+ "state": state_api_name,
+ "county": county_name or "",
+ "zip": "",
+ }
+ try:
+ resp = requests.get(
+ API_URL, params=params, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT
+ )
+ resp.raise_for_status()
+ data = resp.json()
+ if isinstance(data, list):
+ return data
+ logging.warning(f"Unexpected response type from API: {type(data)}")
+ return []
+ except requests.exceptions.RequestException as e:
+ logging.error(f"Error fetching CheapestOil API for {state_api_name}/{county_name}: {e}")
+ return []
+ except ValueError as e:
+ logging.error(f"Invalid JSON from CheapestOil API: {e}")
+ return []
diff --git a/cheapestoil/company_matcher.py b/cheapestoil/company_matcher.py
new file mode 100644
index 0000000..516aecd
--- /dev/null
+++ b/cheapestoil/company_matcher.py
@@ -0,0 +1,90 @@
+"""
+Company name normalization and matching for cross-source deduplication.
+
+Handles slight naming variations between NewEnglandOil and CheapestOil:
+ "Fireman's Fuel Co." == "Firemans Fuel" after normalization.
+"""
+import re
+import logging
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from sqlalchemy.orm import Session
+import models
+
+# Suffixes to strip during normalization (order matters: longer first)
+_STRIP_SUFFIXES = [
+ "enterprises", "company", "oil co", "fuel co", "corp", "inc", "llc", "co",
+]
+
+
+def normalize_company_name(name: str) -> str:
+ """
+ Normalize a company name for fuzzy matching.
+
+ Steps:
+ 1. Strip whitespace, lowercase
+ 2. Replace '&' with 'and'
+ 3. Remove punctuation (apostrophes, periods, commas)
+ 4. Remove common suffixes
+ 5. Collapse multiple spaces
+
+ Args:
+ name: Raw company name
+
+ Returns:
+ Normalized string for comparison.
+ """
+ s = name.strip().lower()
+ s = s.replace("&", "and")
+ s = re.sub(r"['.,$]", "", s)
+ s = s.strip()
+ # Remove common suffixes (longest first to avoid partial matches)
+ for suffix in _STRIP_SUFFIXES:
+ if s.endswith(suffix):
+ s = s[: -len(suffix)]
+ break
+ s = re.sub(r"\s+", " ", s).strip()
+ return s
+
+
+def find_existing_record(
+ db_session: Session,
+ raw_name: str,
+ state_abbr: str,
+ county_id: int | None,
+) -> "models.OilPrice | None":
+ """
+ Find an existing oil_prices record that matches by normalized company name.
+
+ Queries all records for the given state+county_id (or state+zone=0 if no county),
+ then compares normalized names in Python.
+
+ Args:
+ db_session: SQLAlchemy session
+ raw_name: Raw company name from CheapestOil
+ state_abbr: Two-letter state abbreviation
+ county_id: County ID or None
+
+ Returns:
+ Matching OilPrice record or None.
+ """
+ target = normalize_company_name(raw_name)
+ if not target:
+ return None
+
+ query = db_session.query(models.OilPrice).filter(
+ models.OilPrice.state == state_abbr,
+ )
+ if county_id is not None:
+ query = query.filter(models.OilPrice.county_id == county_id)
+ else:
+ query = query.filter(models.OilPrice.zone == 0)
+
+ for record in query.all():
+ if normalize_company_name(record.name) == target:
+ return record
+
+ return None
diff --git a/cheapestoil/config.py b/cheapestoil/config.py
new file mode 100644
index 0000000..cf157b6
--- /dev/null
+++ b/cheapestoil/config.py
@@ -0,0 +1,50 @@
+"""
+Configuration for the CheapestOil scraper.
+"""
+
+API_URL = "https://www.cheapestoil.com/heating-oil-prices/api"
+
+# Seconds between requests to be polite
+SCRAPE_DELAY = 2
+
+# State abbreviation -> list of county names on cheapestoil.com
+# None means state-level only (no county filter)
+STATE_COUNTIES = {
+ "MA": [
+ "Barnstable", "Berkshire", "Bristol", "Essex", "Franklin",
+ "Hampden", "Hampshire", "Middlesex", "Norfolk", "Plymouth",
+ "Suffolk", "Worcester",
+ ],
+ "CT": [
+ "Fairfield", "Hartford", "Litchfield", "Middlesex",
+ "New Haven", "New London", "Tolland", "Windham",
+ ],
+ "ME": [
+ "Cumberland", "York", "Penobscot", "Kennebec", "Androscoggin",
+ "Aroostook", "Oxford", "Hancock", "Somerset", "Knox",
+ "Waldo", "Sagadahoc", "Lincoln", "Washington", "Franklin",
+ "Piscataquis",
+ ],
+ "NH": [
+ "Belknap", "Carroll", "Cheshire", "Coos", "Grafton",
+ "Hillsborough", "Merrimack", "Rockingham", "Strafford", "Sullivan",
+ ],
+ "RI": [
+ "Bristol", "Kent", "Newport", "Providence", "Washington",
+ ],
+ "VT": [
+ "Addison", "Bennington", "Caledonia", "Chittenden", "Essex",
+ "Franklin", "Grand Isle", "Lamoille", "Orange", "Orleans",
+ "Rutland", "Washington", "Windham", "Windsor",
+ ],
+}
+
+# State abbreviation -> API state name (as used in cheapestoil.com params)
+STATE_API_NAMES = {
+ "MA": "Massachusetts",
+ "CT": "Connecticut",
+ "ME": "Maine",
+ "NH": "NewHampshire",
+ "RI": "RhodeIsland",
+ "VT": "Vermont",
+}
diff --git a/cheapestoil/parsers.py b/cheapestoil/parsers.py
new file mode 100644
index 0000000..7d6dddb
--- /dev/null
+++ b/cheapestoil/parsers.py
@@ -0,0 +1,111 @@
+"""
+Parsers for CheapestOil API response data.
+
+API returns arrays like:
+ [name, 150gal_price, 300gal_price, 500gal_price, service_area, updated, link, flag]
+
+Price fields come as HTML strings like "$3.69
(Total $553.50*)"
+"""
+import re
+import logging
+
+# Common abbreviations that should stay uppercase after title-casing
+_KEEP_UPPER = {"LLC", "INC", "LP", "HVAC", "II", "III", "IV", "USA"}
+
+
+def _smart_title(name: str) -> str:
+ """Convert a company name to title case, preserving common abbreviations."""
+ words = name.title().split()
+ return " ".join(w.upper() if w.upper() in _KEEP_UPPER else w for w in words)
+
+
+def parse_price_150(price_html: str) -> float | None:
+ """
+ Extract the per-gallon price from a CheapestOil price field.
+
+ Examples:
+ "$3.69
(Total $553.50*)" -> 3.69
+ "$4.199" -> 4.199
+ "" -> None
+
+ Args:
+ price_html: Raw price string from the API
+
+ Returns:
+ Float price or None if unparseable.
+ """
+ if not price_html or not isinstance(price_html, str):
+ return None
+ # The per-gallon price is the first dollar amount before any
tag
+ match = re.search(r'\$(\d+\.\d+)', price_html)
+ if match:
+ try:
+ return float(match.group(1))
+ except ValueError:
+ pass
+ logging.warning(f"Could not parse price from: {price_html!r}")
+ return None
+
+
+def parse_company_record(row: list, county_name: str | None) -> dict | None:
+ """
+ Convert an API row array to a structured dict.
+
+ Expected row format:
+ [0] name
+ [1] 150gal price (HTML)
+ [2] 300gal price (HTML)
+ [3] 500gal price (HTML)
+ [4] service area text
+ [5] last updated date string
+ [6] company link/slug
+ [7] flag/badge
+
+ Args:
+ row: Raw array from the API
+ county_name: County name this row came from (None for state-level)
+
+ Returns:
+ Dict with {name, price, service_area, county_name, date} or None.
+ """
+ if not isinstance(row, list) or len(row) < 6:
+ logging.warning(f"Skipping malformed row: {row!r}")
+ return None
+
+ name = str(row[0]).strip() if row[0] else ""
+ if not name:
+ return None
+
+ # Apply title case normalization
+ name = _smart_title(name)
+
+ price = parse_price_150(str(row[1]) if row[1] else "")
+ service_area = str(row[4]).strip() if row[4] else ""
+ date_str = str(row[5]).strip() if row[5] else ""
+ # DB column is VARCHAR(20), truncate to fit
+ if len(date_str) > 20:
+ date_str = date_str[:20]
+
+ # Extract company URL from row[6] (link/slug)
+ # Only accept if it looks like a real external URL, not a slug
+ url = None
+ slug = None
+ if len(row) > 6 and row[6]:
+ raw_link = str(row[6]).strip()
+ if raw_link:
+ if raw_link.startswith("http"):
+ url = raw_link
+ else:
+ # It's a slug for the cheapestoil detail page
+ slug = raw_link
+
+ return {
+ "slug": slug, # Return slug so scraper can use it to fetch details
+ "name": name,
+ "price": price,
+ "service_area": service_area,
+ "county_name": county_name,
+ "date": date_str,
+ "url": url,
+ "slug": slug,
+ }
diff --git a/cheapestoil/scraper.py b/cheapestoil/scraper.py
new file mode 100644
index 0000000..6455311
--- /dev/null
+++ b/cheapestoil/scraper.py
@@ -0,0 +1,217 @@
+"""
+Main orchestrator for the CheapestOil scraper.
+"""
+import logging
+import time
+from datetime import datetime
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from sqlalchemy.orm import Session
+import models
+
+from .config import STATE_COUNTIES, STATE_API_NAMES, SCRAPE_DELAY
+from .api_client import fetch_company_details, fetch_county_prices
+from .parsers import parse_company_record
+from .company_matcher import find_existing_record
+from .town_lookup import resolve_county_from_service_area
+
+
+def _resolve_county_id(
+ county_name: str | None,
+ service_area: str,
+ state_abbr: str,
+ county_lookup: dict,
+) -> int | None:
+ """
+ Resolve a county_id from either a direct county name or service area text.
+
+ For MA/CT/ME: county_name comes directly from the API query parameter.
+ For NH/RI/VT: parse service_area text to find a town -> county mapping.
+ """
+ # Direct county match (MA/CT/ME)
+ if county_name:
+ county_id = county_lookup.get((state_abbr, county_name))
+ if county_id is None:
+ logging.warning(f"County not in DB: ({state_abbr}, {county_name})")
+ return county_id
+
+ # Service area parsing (NH/RI/VT)
+ if service_area:
+ resolved = resolve_county_from_service_area(service_area, state_abbr)
+ if resolved:
+ county_id = county_lookup.get((state_abbr, resolved))
+ if county_id is not None:
+ return county_id
+ logging.warning(f"Resolved county '{resolved}' not in DB for {state_abbr}")
+
+ return None
+
+
+def scrape_state(state_abbr: str, db_session: Session, county_lookup: dict, refresh_metadata: bool = False) -> dict:
+ """
+ Scrape all CheapestOil data for a single state.
+
+ Args:
+ state_abbr: Two-letter state code (MA, CT, ME, NH, RI, VT)
+ db_session: SQLAlchemy session
+ county_lookup: Dict of (state_abbr, county_name) -> county_id
+ refresh_metadata: If True, force re-fetch details (phone/url) and overwrite DB.
+
+ Returns:
+ Summary dict with {state, counties_scraped, records_added, records_updated, records_skipped}
+ """
+ state_abbr = state_abbr.upper()
+ if state_abbr not in STATE_API_NAMES:
+ raise ValueError(f"Unknown state: {state_abbr}. Must be one of {list(STATE_API_NAMES.keys())}")
+
+ api_name = STATE_API_NAMES[state_abbr]
+ counties = STATE_COUNTIES[state_abbr]
+
+ summary = {
+ "state": state_abbr,
+ "counties_scraped": 0,
+ "records_added": 0,
+ "records_updated": 0,
+ "records_skipped": 0,
+ }
+
+ details_cache = {} # cache for detail pages: slug -> {url, phone}
+
+ for i, county_name in enumerate(counties):
+ if i > 0:
+ time.sleep(SCRAPE_DELAY)
+
+ label = county_name or "(state-level)"
+ logging.info(f"[CheapestOil] Fetching: {state_abbr} / {label}")
+
+ rows = fetch_county_prices(api_name, county_name)
+ if not rows:
+ logging.info(f"No results for {state_abbr} / {label}")
+ continue
+
+ logging.info(f"[CheapestOil] Processing {len(rows)} records from {state_abbr} / {label} (Size: {len(rows)})")
+
+ summary["counties_scraped"] += 1
+
+ for row in rows:
+ record = parse_company_record(row, county_name)
+ if not record or record["price"] is None:
+ summary["records_skipped"] += 1
+ continue
+
+ # Resolve county_id
+ county_id = _resolve_county_id(
+ record["county_name"],
+ record["service_area"],
+ state_abbr,
+ county_lookup,
+ )
+
+ # Check for existing record (cross-source dedup)
+ existing = find_existing_record(
+ db_session, record["name"], state_abbr, county_id
+ )
+
+ # Fetch details logic:
+ slug = record.get("slug")
+ real_url = record.get("url")
+ phone = None
+
+ # Determine if we need to fetch details
+ # If refresh_metadata is True, we want to fetch to ensure fresh data.
+ # If not, we fetch if we are missing info (which is handled if we don't have existing record or existing record missing info)
+ # Simplest approach: fetch if we have slug and (refresh_metadata OR missing basic info)
+
+ should_fetch_details = False
+ if slug:
+ if refresh_metadata:
+ should_fetch_details = True
+ elif existing:
+ if not existing.url or not existing.phone:
+ should_fetch_details = True
+ else:
+ # New record, always fetch
+ should_fetch_details = True
+
+ if should_fetch_details:
+ if slug in details_cache:
+ cached = details_cache[slug]
+ real_url = cached["url"]
+ phone = cached["phone"]
+ else:
+ details = fetch_company_details(slug)
+ details_cache[slug] = details
+ real_url = details["url"]
+ phone = details["phone"]
+ time.sleep(1.0) # Polite delay between detail pages
+
+ if existing:
+ # Skip vendor-managed records
+ if existing.company_id is not None:
+ logging.debug(f"Skipping vendor-managed: {record['name']}")
+ summary["records_skipped"] += 1
+ continue
+
+ updated = False
+
+ # Backfill or Force Update url
+ if real_url:
+ if not existing.url or (refresh_metadata and existing.url != real_url):
+ existing.url = real_url
+ updated = True
+ logging.info(f"Updated/Backfilled URL for {record['name']}")
+
+ # Backfill or Force Update phone
+ if phone:
+ if not existing.phone or (refresh_metadata and existing.phone != phone):
+ existing.phone = phone
+ updated = True
+ logging.info(f"Updated/Backfilled Phone for {record['name']}")
+
+ # Backfill county_id if we have it now
+ if county_id is not None and existing.county_id != county_id:
+ existing.county_id = county_id
+ updated = True
+ logging.info(f"Updated county_id for {record['name']}")
+
+ # Update if price changed, otherwise just touch timestamp
+ if existing.price != record["price"]:
+ existing.price = record["price"]
+ existing.date = record["date"]
+ existing.scrapetimestamp = datetime.utcnow()
+ summary["records_updated"] += 1
+ logging.info(f"Updated price: {record['name']} ${existing.price:.2f} → ${record['price']:.2f}")
+ elif updated:
+ existing.scrapetimestamp = datetime.utcnow()
+ summary["records_updated"] += 1
+ else:
+ existing.scrapetimestamp = datetime.utcnow()
+ summary["records_skipped"] += 1
+ logging.debug(f"No changes for {record['name']} (${record['price']:.2f})")
+ else:
+ # Insert new record (zone=0 for cheapestoil)
+ oil_price = models.OilPrice(
+ state=state_abbr,
+ zone=0,
+ name=record["name"],
+ price=record["price"],
+ date=record["date"],
+ county_id=county_id,
+ url=real_url,
+ phone=phone,
+ scrapetimestamp=datetime.utcnow(),
+ )
+ db_session.add(oil_price)
+ summary["records_added"] += 1
+ logging.info(f"Added: {record['name']} in {state_abbr} (county_id={county_id}, phone={phone})")
+
+ db_session.commit()
+ logging.info(
+ f"[CheapestOil] State {state_abbr} complete: "
+ f"{summary['records_added']} added, {summary['records_updated']} updated, "
+ f"{summary['records_skipped']} skipped (no changes)"
+ )
+ return summary
diff --git a/cheapestoil/town_lookup.py b/cheapestoil/town_lookup.py
new file mode 100644
index 0000000..a990067
--- /dev/null
+++ b/cheapestoil/town_lookup.py
@@ -0,0 +1,1586 @@
+"""
+Town-to-county mapping for all 6 New England states.
+
+Used to resolve county from service_area text when CheapestOil returns
+state-level data (NH, RI, VT) without county filtering.
+
+For MA/CT/ME, the API has county-level pages so this is only needed as fallback.
+"""
+import re
+import logging
+
+# County names for direct mention matching (e.g. "Throughout Rockingham County")
+_NE_COUNTIES = {
+ "CT": ["Fairfield", "Hartford", "Litchfield", "Middlesex", "New Haven",
+ "New London", "Tolland", "Windham"],
+ "MA": ["Barnstable", "Berkshire", "Bristol", "Dukes", "Essex", "Franklin",
+ "Hampden", "Hampshire", "Middlesex", "Nantucket", "Norfolk",
+ "Plymouth", "Suffolk", "Worcester"],
+ "ME": ["Androscoggin", "Aroostook", "Cumberland", "Franklin", "Hancock",
+ "Kennebec", "Knox", "Lincoln", "Oxford", "Penobscot", "Piscataquis",
+ "Sagadahoc", "Somerset", "Waldo", "Washington", "York"],
+ "NH": ["Belknap", "Carroll", "Cheshire", "Coos", "Grafton", "Hillsborough",
+ "Merrimack", "Rockingham", "Strafford", "Sullivan"],
+ "RI": ["Bristol", "Kent", "Newport", "Providence", "Washington"],
+ "VT": ["Addison", "Bennington", "Caledonia", "Chittenden", "Essex",
+ "Franklin", "Grand Isle", "Lamoille", "Orange", "Orleans",
+ "Rutland", "Washington", "Windham", "Windsor"],
+}
+
+# Town name (lowercase) -> County name, organized by state
+TOWN_COUNTY_MAP = {
+ "CT": {
+ "andover": "Tolland",
+ "ansonia": "New Haven",
+ "ashford": "Windham",
+ "avon": "Hartford",
+ "barkhamsted": "Litchfield",
+ "beacon falls": "New Haven",
+ "berlin": "Hartford",
+ "bethany": "New Haven",
+ "bethel": "Fairfield",
+ "bethlehem": "Litchfield",
+ "bloomfield": "Hartford",
+ "bolton": "Tolland",
+ "bozrah": "New London",
+ "branford": "New Haven",
+ "bridgeport": "Fairfield",
+ "bridgewater": "Litchfield",
+ "bristol": "Hartford",
+ "brookfield": "Fairfield",
+ "brooklyn": "Windham",
+ "burlington": "Hartford",
+ "canaan": "Litchfield",
+ "canterbury": "Windham",
+ "canton": "Hartford",
+ "chaplin": "Windham",
+ "cheshire": "New Haven",
+ "chester": "Middlesex",
+ "clinton": "Middlesex",
+ "colchester": "New London",
+ "colebrook": "Litchfield",
+ "columbia": "Tolland",
+ "cornwall": "Litchfield",
+ "coventry": "Tolland",
+ "cromwell": "Middlesex",
+ "danbury": "Fairfield",
+ "darien": "Fairfield",
+ "deep river": "Middlesex",
+ "derby": "New Haven",
+ "durham": "Middlesex",
+ "east granby": "Hartford",
+ "east haddam": "Middlesex",
+ "east hampton": "Middlesex",
+ "east hartford": "Hartford",
+ "east haven": "New Haven",
+ "east lyme": "New London",
+ "east windsor": "Hartford",
+ "eastford": "Windham",
+ "easton": "Fairfield",
+ "ellington": "Tolland",
+ "enfield": "Hartford",
+ "essex": "Middlesex",
+ "fairfield": "Fairfield",
+ "farmington": "Hartford",
+ "franklin": "New London",
+ "glastonbury": "Hartford",
+ "goshen": "Litchfield",
+ "granby": "Hartford",
+ "greenwich": "Fairfield",
+ "griswold": "New London",
+ "groton": "New London",
+ "guilford": "New Haven",
+ "haddam": "Middlesex",
+ "hamden": "New Haven",
+ "hampton": "Windham",
+ "hartford": "Hartford",
+ "hartland": "Hartford",
+ "harwinton": "Litchfield",
+ "hebron": "Tolland",
+ "kent": "Litchfield",
+ "killingly": "Windham",
+ "killingworth": "Middlesex",
+ "lebanon": "New London",
+ "ledyard": "New London",
+ "lisbon": "New London",
+ "litchfield": "Litchfield",
+ "lyme": "New London",
+ "madison": "New Haven",
+ "manchester": "Hartford",
+ "mansfield": "Tolland",
+ "marlborough": "Hartford",
+ "meriden": "New Haven",
+ "middlebury": "New Haven",
+ "middlefield": "Middlesex",
+ "middletown": "Middlesex",
+ "milford": "New Haven",
+ "monroe": "Fairfield",
+ "montville": "New London",
+ "morris": "Litchfield",
+ "naugatuck": "New Haven",
+ "new britain": "Hartford",
+ "new canaan": "Fairfield",
+ "new fairfield": "Fairfield",
+ "new hartford": "Litchfield",
+ "new haven": "New Haven",
+ "new london": "New London",
+ "new milford": "Litchfield",
+ "newington": "Hartford",
+ "newtown": "Fairfield",
+ "norfolk": "Litchfield",
+ "north branford": "New Haven",
+ "north canaan": "Litchfield",
+ "north haven": "New Haven",
+ "north stonington": "New London",
+ "norwalk": "Fairfield",
+ "norwich": "New London",
+ "old lyme": "New London",
+ "old saybrook": "Middlesex",
+ "orange": "New Haven",
+ "oxford": "New Haven",
+ "plainfield": "Windham",
+ "plainville": "Hartford",
+ "plymouth": "Litchfield",
+ "pomfret": "Windham",
+ "portland": "Middlesex",
+ "preston": "New London",
+ "prospect": "New Haven",
+ "putnam": "Windham",
+ "redding": "Fairfield",
+ "ridgefield": "Fairfield",
+ "rocky hill": "Hartford",
+ "roxbury": "Litchfield",
+ "salem": "New London",
+ "salisbury": "Litchfield",
+ "scotland": "Windham",
+ "seymour": "New Haven",
+ "sharon": "Litchfield",
+ "shelton": "Fairfield",
+ "sherman": "Fairfield",
+ "simsbury": "Hartford",
+ "somers": "Tolland",
+ "south windsor": "Hartford",
+ "southbury": "New Haven",
+ "southington": "Hartford",
+ "sprague": "New London",
+ "stafford": "Tolland",
+ "stamford": "Fairfield",
+ "sterling": "Windham",
+ "stonington": "New London",
+ "stratford": "Fairfield",
+ "suffield": "Hartford",
+ "thomaston": "Litchfield",
+ "thompson": "Windham",
+ "tolland": "Tolland",
+ "torrington": "Litchfield",
+ "trumbull": "Fairfield",
+ "union": "Tolland",
+ "vernon": "Tolland",
+ "voluntown": "New London",
+ "wallingford": "New Haven",
+ "warren": "Litchfield",
+ "washington": "Litchfield",
+ "waterbury": "New Haven",
+ "waterford": "New London",
+ "watertown": "Litchfield",
+ "west hartford": "Hartford",
+ "west haven": "New Haven",
+ "westbrook": "Middlesex",
+ "weston": "Fairfield",
+ "westport": "Fairfield",
+ "wethersfield": "Hartford",
+ "willington": "Tolland",
+ "wilton": "Fairfield",
+ "winchester": "Litchfield",
+ "windham": "Windham",
+ "windsor": "Hartford",
+ "windsor locks": "Hartford",
+ "wolcott": "New Haven",
+ "woodbridge": "New Haven",
+ "woodbury": "Litchfield",
+ "woodstock": "Windham",
+ },
+ "MA": {
+ "abington": "Plymouth",
+ "acton": "Middlesex",
+ "acushnet": "Bristol",
+ "adams": "Berkshire",
+ "agawam": "Hampden",
+ "alford": "Berkshire",
+ "amesbury": "Essex",
+ "amherst": "Hampshire",
+ "andover": "Essex",
+ "arlington": "Middlesex",
+ "ashburnham": "Worcester",
+ "ashby": "Middlesex",
+ "ashfield": "Franklin",
+ "ashland": "Middlesex",
+ "athol": "Worcester",
+ "attleboro": "Bristol",
+ "auburn": "Worcester",
+ "avon": "Norfolk",
+ "ayer": "Middlesex",
+ "barnstable": "Barnstable",
+ "barre": "Worcester",
+ "becket": "Berkshire",
+ "bedford": "Middlesex",
+ "belchertown": "Hampshire",
+ "bellingham": "Norfolk",
+ "belmont": "Middlesex",
+ "berkley": "Bristol",
+ "berlin": "Worcester",
+ "bernardston": "Franklin",
+ "beverly": "Essex",
+ "billerica": "Middlesex",
+ "blackstone": "Worcester",
+ "blandford": "Hampden",
+ "bolton": "Worcester",
+ "boston": "Suffolk",
+ "bourne": "Barnstable",
+ "boxborough": "Middlesex",
+ "boxford": "Essex",
+ "boylston": "Worcester",
+ "braintree": "Norfolk",
+ "brewster": "Barnstable",
+ "bridgewater": "Plymouth",
+ "brimfield": "Hampden",
+ "brockton": "Plymouth",
+ "brookfield": "Worcester",
+ "brookline": "Norfolk",
+ "buckland": "Franklin",
+ "burlington": "Middlesex",
+ "cambridge": "Middlesex",
+ "canton": "Norfolk",
+ "carlisle": "Middlesex",
+ "carver": "Plymouth",
+ "charlemont": "Franklin",
+ "charlton": "Worcester",
+ "chatham": "Barnstable",
+ "chelmsford": "Middlesex",
+ "chelsea": "Suffolk",
+ "cheshire": "Berkshire",
+ "chester": "Hampden",
+ "chesterfield": "Hampshire",
+ "chicopee": "Hampden",
+ "chilmark": "Dukes",
+ "clarksburg": "Berkshire",
+ "clinton": "Worcester",
+ "cohasset": "Norfolk",
+ "colrain": "Franklin",
+ "concord": "Middlesex",
+ "conway": "Franklin",
+ "cummington": "Hampshire",
+ "dalton": "Berkshire",
+ "danvers": "Essex",
+ "dartmouth": "Bristol",
+ "dedham": "Norfolk",
+ "deerfield": "Franklin",
+ "dennis": "Barnstable",
+ "dighton": "Bristol",
+ "douglas": "Worcester",
+ "dover": "Norfolk",
+ "dracut": "Middlesex",
+ "dudley": "Worcester",
+ "dunstable": "Middlesex",
+ "duxbury": "Plymouth",
+ "east bridgewater": "Plymouth",
+ "east brookfield": "Worcester",
+ "east longmeadow": "Hampden",
+ "eastham": "Barnstable",
+ "easthampton": "Hampshire",
+ "easton": "Bristol",
+ "edgartown": "Dukes",
+ "egremont": "Berkshire",
+ "erving": "Franklin",
+ "essex": "Essex",
+ "everett": "Middlesex",
+ "fairhaven": "Bristol",
+ "fall river": "Bristol",
+ "falmouth": "Barnstable",
+ "fitchburg": "Worcester",
+ "florida": "Berkshire",
+ "foxborough": "Norfolk",
+ "framingham": "Middlesex",
+ "franklin": "Norfolk",
+ "freetown": "Bristol",
+ "gardner": "Worcester",
+ "georgetown": "Essex",
+ "gill": "Franklin",
+ "gloucester": "Essex",
+ "goshen": "Hampshire",
+ "gosnold": "Dukes",
+ "grafton": "Worcester",
+ "granby": "Hampshire",
+ "granville": "Hampden",
+ "great barrington": "Berkshire",
+ "greenfield": "Franklin",
+ "groton": "Middlesex",
+ "groveland": "Essex",
+ "hadley": "Hampshire",
+ "halifax": "Plymouth",
+ "hamilton": "Essex",
+ "hampden": "Hampden",
+ "hancock": "Berkshire",
+ "hanover": "Plymouth",
+ "hanson": "Plymouth",
+ "hardwick": "Worcester",
+ "harvard": "Worcester",
+ "harwich": "Barnstable",
+ "hatfield": "Hampshire",
+ "haverhill": "Essex",
+ "hawley": "Franklin",
+ "heath": "Franklin",
+ "hingham": "Plymouth",
+ "hinsdale": "Berkshire",
+ "holbrook": "Norfolk",
+ "holden": "Worcester",
+ "holland": "Hampden",
+ "holliston": "Middlesex",
+ "holyoke": "Hampden",
+ "hopedale": "Worcester",
+ "hopkinton": "Middlesex",
+ "hubbardston": "Worcester",
+ "hudson": "Middlesex",
+ "hull": "Plymouth",
+ "huntington": "Hampshire",
+ "ipswich": "Essex",
+ "kingston": "Plymouth",
+ "lakeville": "Plymouth",
+ "lancaster": "Worcester",
+ "lanesborough": "Berkshire",
+ "lawrence": "Essex",
+ "lee": "Berkshire",
+ "leicester": "Worcester",
+ "lenox": "Berkshire",
+ "leominster": "Worcester",
+ "leverett": "Franklin",
+ "lexington": "Middlesex",
+ "leyden": "Franklin",
+ "lincoln": "Middlesex",
+ "littleton": "Middlesex",
+ "longmeadow": "Hampden",
+ "lowell": "Middlesex",
+ "ludlow": "Hampden",
+ "lunenburg": "Worcester",
+ "lynn": "Essex",
+ "lynnfield": "Essex",
+ "malden": "Middlesex",
+ "manchester-by-the-sea": "Essex",
+ "manchester": "Essex",
+ "mansfield": "Bristol",
+ "marblehead": "Essex",
+ "marion": "Plymouth",
+ "marlborough": "Middlesex",
+ "marshfield": "Plymouth",
+ "mashpee": "Barnstable",
+ "mattapoisett": "Plymouth",
+ "maynard": "Middlesex",
+ "medfield": "Norfolk",
+ "medford": "Middlesex",
+ "medway": "Norfolk",
+ "melrose": "Middlesex",
+ "mendon": "Worcester",
+ "merrimac": "Essex",
+ "methuen": "Essex",
+ "middleborough": "Plymouth",
+ "middlefield": "Hampshire",
+ "middleton": "Essex",
+ "milford": "Worcester",
+ "millbury": "Worcester",
+ "millis": "Norfolk",
+ "millville": "Worcester",
+ "milton": "Norfolk",
+ "monroe": "Franklin",
+ "monson": "Hampden",
+ "montague": "Franklin",
+ "monterey": "Berkshire",
+ "montgomery": "Hampden",
+ "mount washington": "Berkshire",
+ "nahant": "Essex",
+ "nantucket": "Nantucket",
+ "natick": "Middlesex",
+ "needham": "Norfolk",
+ "new ashford": "Berkshire",
+ "new bedford": "Bristol",
+ "new braintree": "Worcester",
+ "new marlborough": "Berkshire",
+ "new salem": "Franklin",
+ "newbury": "Essex",
+ "newburyport": "Essex",
+ "newton": "Middlesex",
+ "norfolk": "Norfolk",
+ "north adams": "Berkshire",
+ "north andover": "Essex",
+ "north attleborough": "Bristol",
+ "north brookfield": "Worcester",
+ "north reading": "Middlesex",
+ "northampton": "Hampshire",
+ "northborough": "Worcester",
+ "northbridge": "Worcester",
+ "northfield": "Franklin",
+ "norton": "Bristol",
+ "norwell": "Plymouth",
+ "norwood": "Norfolk",
+ "oak bluffs": "Dukes",
+ "oakham": "Worcester",
+ "orange": "Franklin",
+ "orleans": "Barnstable",
+ "otis": "Berkshire",
+ "oxford": "Worcester",
+ "palmer": "Hampden",
+ "paxton": "Worcester",
+ "peabody": "Essex",
+ "pelham": "Hampshire",
+ "pembroke": "Plymouth",
+ "pepperell": "Middlesex",
+ "peru": "Berkshire",
+ "petersham": "Worcester",
+ "phillipston": "Worcester",
+ "pittsfield": "Berkshire",
+ "plainfield": "Hampshire",
+ "plainville": "Norfolk",
+ "plymouth": "Plymouth",
+ "plympton": "Plymouth",
+ "princeton": "Worcester",
+ "provincetown": "Barnstable",
+ "quincy": "Norfolk",
+ "randolph": "Norfolk",
+ "raynham": "Bristol",
+ "reading": "Middlesex",
+ "rehoboth": "Bristol",
+ "revere": "Suffolk",
+ "richmond": "Berkshire",
+ "rochester": "Plymouth",
+ "rockland": "Plymouth",
+ "rockport": "Essex",
+ "rowe": "Franklin",
+ "rowley": "Essex",
+ "royalston": "Worcester",
+ "russell": "Hampden",
+ "rutland": "Worcester",
+ "salem": "Essex",
+ "salisbury": "Essex",
+ "sandisfield": "Berkshire",
+ "sandwich": "Barnstable",
+ "saugus": "Essex",
+ "savoy": "Berkshire",
+ "scituate": "Plymouth",
+ "seekonk": "Bristol",
+ "sharon": "Norfolk",
+ "sheffield": "Berkshire",
+ "shelburne": "Franklin",
+ "sherborn": "Middlesex",
+ "shirley": "Middlesex",
+ "shrewsbury": "Worcester",
+ "shutesbury": "Franklin",
+ "somerset": "Bristol",
+ "somerville": "Middlesex",
+ "south hadley": "Hampshire",
+ "southampton": "Hampshire",
+ "southborough": "Worcester",
+ "southbridge": "Worcester",
+ "southwick": "Hampden",
+ "spencer": "Worcester",
+ "springfield": "Hampden",
+ "sterling": "Worcester",
+ "stockbridge": "Berkshire",
+ "stoneham": "Middlesex",
+ "stoughton": "Norfolk",
+ "stow": "Middlesex",
+ "sturbridge": "Worcester",
+ "sudbury": "Middlesex",
+ "sunderland": "Franklin",
+ "sutton": "Worcester",
+ "swampscott": "Essex",
+ "swansea": "Bristol",
+ "taunton": "Bristol",
+ "templeton": "Worcester",
+ "tewksbury": "Middlesex",
+ "tisbury": "Dukes",
+ "tolland": "Hampden",
+ "topsfield": "Essex",
+ "townsend": "Middlesex",
+ "truro": "Barnstable",
+ "tyngsborough": "Middlesex",
+ "tyringham": "Berkshire",
+ "upton": "Worcester",
+ "uxbridge": "Worcester",
+ "wakefield": "Middlesex",
+ "wales": "Hampden",
+ "walpole": "Norfolk",
+ "waltham": "Middlesex",
+ "ware": "Hampshire",
+ "wareham": "Plymouth",
+ "warren": "Worcester",
+ "warwick": "Franklin",
+ "washington": "Berkshire",
+ "watertown": "Middlesex",
+ "wayland": "Middlesex",
+ "webster": "Worcester",
+ "wellesley": "Norfolk",
+ "wellfleet": "Barnstable",
+ "wendell": "Franklin",
+ "wenham": "Essex",
+ "west boylston": "Worcester",
+ "west bridgewater": "Plymouth",
+ "west brookfield": "Worcester",
+ "west newbury": "Essex",
+ "west springfield": "Hampden",
+ "west stockbridge": "Berkshire",
+ "west tisbury": "Dukes",
+ "westborough": "Worcester",
+ "westfield": "Hampden",
+ "westford": "Middlesex",
+ "westhampton": "Hampshire",
+ "westminster": "Worcester",
+ "weston": "Middlesex",
+ "westport": "Bristol",
+ "westwood": "Norfolk",
+ "weymouth": "Norfolk",
+ "whately": "Franklin",
+ "whitman": "Plymouth",
+ "wilbraham": "Hampden",
+ "williamsburg": "Hampshire",
+ "williamstown": "Berkshire",
+ "wilmington": "Middlesex",
+ "winchendon": "Worcester",
+ "winchester": "Middlesex",
+ "windsor": "Berkshire",
+ "winthrop": "Suffolk",
+ "woburn": "Middlesex",
+ "worcester": "Worcester",
+ "worthington": "Hampshire",
+ "wrentham": "Norfolk",
+ "yarmouth": "Barnstable",
+ },
+ "ME": {
+ "auburn": "Androscoggin",
+ "durham": "Androscoggin",
+ "greene": "Androscoggin",
+ "leeds": "Androscoggin",
+ "lewiston": "Androscoggin",
+ "lisbon": "Androscoggin",
+ "livermore": "Androscoggin",
+ "livermore falls": "Androscoggin",
+ "mechanic falls": "Androscoggin",
+ "minot": "Androscoggin",
+ "poland": "Androscoggin",
+ "sabattus": "Androscoggin",
+ "turner": "Androscoggin",
+ "wales": "Androscoggin",
+ "allagash": "Aroostook",
+ "amity": "Aroostook",
+ "ashland": "Aroostook",
+ "bancroft": "Aroostook",
+ "blaine": "Aroostook",
+ "bridgewater": "Aroostook",
+ "caribou": "Aroostook",
+ "castle hill": "Aroostook",
+ "caswell": "Aroostook",
+ "chapman": "Aroostook",
+ "connor": "Aroostook",
+ "crystal": "Aroostook",
+ "dyer brook": "Aroostook",
+ "eagle lake": "Aroostook",
+ "easton": "Aroostook",
+ "fort fairfield": "Aroostook",
+ "fort kent": "Aroostook",
+ "frenchville": "Aroostook",
+ "grand isle": "Aroostook",
+ "hamlin": "Aroostook",
+ "haynesville": "Aroostook",
+ "hersey": "Aroostook",
+ "hodgdon": "Aroostook",
+ "houlton": "Aroostook",
+ "island falls": "Aroostook",
+ "limestone": "Aroostook",
+ "linneus": "Aroostook",
+ "littleton": "Aroostook",
+ "ludlow": "Aroostook",
+ "madawaska": "Aroostook",
+ "mapleton": "Aroostook",
+ "mars hill": "Aroostook",
+ "masardis": "Aroostook",
+ "merrill": "Aroostook",
+ "monticello": "Aroostook",
+ "new canada": "Aroostook",
+ "new limerick": "Aroostook",
+ "new sweden": "Aroostook",
+ "oakfield": "Aroostook",
+ "orient": "Aroostook",
+ "perham": "Aroostook",
+ "portage lake": "Aroostook",
+ "presque isle": "Aroostook",
+ "saint agatha": "Aroostook",
+ "saint francis": "Aroostook",
+ "sherman": "Aroostook",
+ "smyrna": "Aroostook",
+ "stockholm": "Aroostook",
+ "van buren": "Aroostook",
+ "wade": "Aroostook",
+ "wallagrass": "Aroostook",
+ "washburn": "Aroostook",
+ "westfield": "Aroostook",
+ "weston": "Aroostook",
+ "woodland": "Aroostook",
+ "baldwin": "Cumberland",
+ "bridgton": "Cumberland",
+ "brunswick": "Cumberland",
+ "cape elizabeth": "Cumberland",
+ "casco": "Cumberland",
+ "chebeague island": "Cumberland",
+ "cumberland": "Cumberland",
+ "falmouth": "Cumberland",
+ "freeport": "Cumberland",
+ "frye island": "Cumberland",
+ "gorham": "Cumberland",
+ "gray": "Cumberland",
+ "harpswell": "Cumberland",
+ "harrison": "Cumberland",
+ "long island": "Cumberland",
+ "naples": "Cumberland",
+ "new gloucester": "Cumberland",
+ "north yarmouth": "Cumberland",
+ "portland": "Cumberland",
+ "pownal": "Cumberland",
+ "raymond": "Cumberland",
+ "scarborough": "Cumberland",
+ "sebago": "Cumberland",
+ "south portland": "Cumberland",
+ "standish": "Cumberland",
+ "westbrook": "Cumberland",
+ "windham": "Cumberland",
+ "yarmouth": "Cumberland",
+ "avon": "Franklin",
+ "carrabassett valley": "Franklin",
+ "carthage": "Franklin",
+ "chesterville": "Franklin",
+ "eustis": "Franklin",
+ "farmington": "Franklin",
+ "industry": "Franklin",
+ "jay": "Franklin",
+ "kingfield": "Franklin",
+ "new sharon": "Franklin",
+ "new vineyard": "Franklin",
+ "phillips": "Franklin",
+ "rangeley": "Franklin",
+ "strong": "Franklin",
+ "temple": "Franklin",
+ "weld": "Franklin",
+ "wilton": "Franklin",
+ "amherst": "Hancock",
+ "aurora": "Hancock",
+ "bar harbor": "Hancock",
+ "blue hill": "Hancock",
+ "brooklin": "Hancock",
+ "brooksville": "Hancock",
+ "bucksport": "Hancock",
+ "castine": "Hancock",
+ "cranberry isles": "Hancock",
+ "dedham": "Hancock",
+ "deer isle": "Hancock",
+ "eastbrook": "Hancock",
+ "ellsworth": "Hancock",
+ "franklin": "Hancock",
+ "frenchboro": "Hancock",
+ "gouldsboro": "Hancock",
+ "hancock": "Hancock",
+ "lamoine": "Hancock",
+ "mariaville": "Hancock",
+ "mount desert": "Hancock",
+ "orland": "Hancock",
+ "otis": "Hancock",
+ "penobscot": "Hancock",
+ "sedgwick": "Hancock",
+ "sorrento": "Hancock",
+ "southwest harbor": "Hancock",
+ "stonington": "Hancock",
+ "sullivan": "Hancock",
+ "surry": "Hancock",
+ "swans island": "Hancock",
+ "tremont": "Hancock",
+ "trenton": "Hancock",
+ "verona island": "Hancock",
+ "waltham": "Hancock",
+ "winter harbor": "Hancock",
+ "albion": "Kennebec",
+ "augusta": "Kennebec",
+ "belgrade": "Kennebec",
+ "benton": "Kennebec",
+ "chelsea": "Kennebec",
+ "china": "Kennebec",
+ "clinton": "Kennebec",
+ "farmingdale": "Kennebec",
+ "fayette": "Kennebec",
+ "gardiner": "Kennebec",
+ "hallowell": "Kennebec",
+ "litchfield": "Kennebec",
+ "manchester": "Kennebec",
+ "monmouth": "Kennebec",
+ "mount vernon": "Kennebec",
+ "oakland": "Kennebec",
+ "pittston": "Kennebec",
+ "randolph": "Kennebec",
+ "readfield": "Kennebec",
+ "rome": "Kennebec",
+ "sidney": "Kennebec",
+ "vassalboro": "Kennebec",
+ "vienna": "Kennebec",
+ "waterville": "Kennebec",
+ "wayne": "Kennebec",
+ "west gardiner": "Kennebec",
+ "windsor": "Kennebec",
+ "winslow": "Kennebec",
+ "winthrop": "Kennebec",
+ "appleton": "Knox",
+ "camden": "Knox",
+ "cushing": "Knox",
+ "friendship": "Knox",
+ "hope": "Knox",
+ "isle au haut": "Knox",
+ "north haven": "Knox",
+ "owls head": "Knox",
+ "rockland": "Knox",
+ "rockport": "Knox",
+ "saint george": "Knox",
+ "south thomaston": "Knox",
+ "thomaston": "Knox",
+ "union": "Knox",
+ "vinalhaven": "Knox",
+ "warren": "Knox",
+ "washington": "Knox",
+ "alna": "Lincoln",
+ "boothbay": "Lincoln",
+ "boothbay harbor": "Lincoln",
+ "bremen": "Lincoln",
+ "bristol": "Lincoln",
+ "damariscotta": "Lincoln",
+ "dresden": "Lincoln",
+ "edgecomb": "Lincoln",
+ "jefferson": "Lincoln",
+ "newcastle": "Lincoln",
+ "nobleboro": "Lincoln",
+ "somerville": "Lincoln",
+ "south bristol": "Lincoln",
+ "southport": "Lincoln",
+ "waldoboro": "Lincoln",
+ "westport island": "Lincoln",
+ "whitefield": "Lincoln",
+ "wiscasset": "Lincoln",
+ "albany": "Oxford",
+ "andover": "Oxford",
+ "bethel": "Oxford",
+ "brownfield": "Oxford",
+ "buckfield": "Oxford",
+ "byron": "Oxford",
+ "canton": "Oxford",
+ "denmark": "Oxford",
+ "dixfield": "Oxford",
+ "fryeburg": "Oxford",
+ "gilead": "Oxford",
+ "greenwood": "Oxford",
+ "hanover": "Oxford",
+ "hartford": "Oxford",
+ "hebron": "Oxford",
+ "hiram": "Oxford",
+ "lovell": "Oxford",
+ "mexico": "Oxford",
+ "newry": "Oxford",
+ "norway": "Oxford",
+ "oxford": "Oxford",
+ "paris": "Oxford",
+ "peru": "Oxford",
+ "porter": "Oxford",
+ "roxbury": "Oxford",
+ "rumford": "Oxford",
+ "stoneham": "Oxford",
+ "stow": "Oxford",
+ "sumner": "Oxford",
+ "sweden": "Oxford",
+ "upton": "Oxford",
+ "waterford": "Oxford",
+ "west paris": "Oxford",
+ "woodstock": "Oxford",
+ "milton": "Oxford",
+ "alton": "Penobscot",
+ "bangor": "Penobscot",
+ "bradford": "Penobscot",
+ "bradley": "Penobscot",
+ "brewer": "Penobscot",
+ "burlington": "Penobscot",
+ "carmel": "Penobscot",
+ "charleston": "Penobscot",
+ "chester": "Penobscot",
+ "clifton": "Penobscot",
+ "corinna": "Penobscot",
+ "corinth": "Penobscot",
+ "dexter": "Penobscot",
+ "dixmont": "Penobscot",
+ "east millinocket": "Penobscot",
+ "eddington": "Penobscot",
+ "edinburg": "Penobscot",
+ "enfield": "Penobscot",
+ "etna": "Penobscot",
+ "exeter": "Penobscot",
+ "garland": "Penobscot",
+ "glenburn": "Penobscot",
+ "greenbush": "Penobscot",
+ "greenfield": "Penobscot",
+ "hampden": "Penobscot",
+ "holden": "Penobscot",
+ "howland": "Penobscot",
+ "hudson": "Penobscot",
+ "kenduskeag": "Penobscot",
+ "lagrange": "Penobscot",
+ "lee": "Penobscot",
+ "levant": "Penobscot",
+ "lincoln": "Penobscot",
+ "lowell": "Penobscot",
+ "mattawamkeag": "Penobscot",
+ "maxfield": "Penobscot",
+ "medway": "Penobscot",
+ "milford": "Penobscot",
+ "millinocket": "Penobscot",
+ "newburgh": "Penobscot",
+ "newport": "Penobscot",
+ "old town": "Penobscot",
+ "orono": "Penobscot",
+ "orrington": "Penobscot",
+ "passadumkeag": "Penobscot",
+ "patten": "Penobscot",
+ "plymouth": "Penobscot",
+ "prentiss": "Penobscot",
+ "stetson": "Penobscot",
+ "springfield": "Penobscot",
+ "stacyville": "Penobscot",
+ "veazie": "Penobscot",
+ "winn": "Penobscot",
+ "woodville": "Penobscot",
+ "mount chase": "Penobscot",
+ "abbot": "Piscataquis",
+ "atkinson": "Piscataquis",
+ "beaver cove": "Piscataquis",
+ "bowerbank": "Piscataquis",
+ "brownville": "Piscataquis",
+ "dover-foxcroft": "Piscataquis",
+ "greenville": "Piscataquis",
+ "guilford": "Piscataquis",
+ "medford": "Piscataquis",
+ "milo": "Piscataquis",
+ "monson": "Piscataquis",
+ "parkman": "Piscataquis",
+ "sangerville": "Piscataquis",
+ "sebec": "Piscataquis",
+ "shirley": "Piscataquis",
+ "wellington": "Piscataquis",
+ "willimantic": "Piscataquis",
+ "arrowsic": "Sagadahoc",
+ "bath": "Sagadahoc",
+ "bowdoin": "Sagadahoc",
+ "bowdoinham": "Sagadahoc",
+ "georgetown": "Sagadahoc",
+ "phippsburg": "Sagadahoc",
+ "richmond": "Sagadahoc",
+ "topsham": "Sagadahoc",
+ "west bath": "Sagadahoc",
+ "woolwich": "Sagadahoc",
+ "anson": "Somerset",
+ "athens": "Somerset",
+ "bingham": "Somerset",
+ "cambridge": "Somerset",
+ "canaan": "Somerset",
+ "caratunk": "Somerset",
+ "cornville": "Somerset",
+ "detroit": "Somerset",
+ "embden": "Somerset",
+ "fairfield": "Somerset",
+ "harmony": "Somerset",
+ "hartland": "Somerset",
+ "jackman": "Somerset",
+ "madison": "Somerset",
+ "mercer": "Somerset",
+ "moscow": "Somerset",
+ "new portland": "Somerset",
+ "norridgewock": "Somerset",
+ "palmyra": "Somerset",
+ "pittsfield": "Somerset",
+ "ripley": "Somerset",
+ "saint albans": "Somerset",
+ "skowhegan": "Somerset",
+ "smithfield": "Somerset",
+ "solon": "Somerset",
+ "starks": "Somerset",
+ "belfast": "Waldo",
+ "belmont": "Waldo",
+ "brooks": "Waldo",
+ "burnham": "Waldo",
+ "frankfort": "Waldo",
+ "freedom": "Waldo",
+ "islesboro": "Waldo",
+ "jackson": "Waldo",
+ "knox": "Waldo",
+ "liberty": "Waldo",
+ "lincolnville": "Waldo",
+ "monroe": "Waldo",
+ "montville": "Waldo",
+ "morrill": "Waldo",
+ "northport": "Waldo",
+ "palermo": "Waldo",
+ "prospect": "Waldo",
+ "searsmont": "Waldo",
+ "searsport": "Waldo",
+ "stockton springs": "Waldo",
+ "swanville": "Waldo",
+ "thorndike": "Waldo",
+ "troy": "Waldo",
+ "unity": "Waldo",
+ "waldo": "Waldo",
+ "winterport": "Waldo",
+ "addison": "Washington",
+ "alexander": "Washington",
+ "baileyville": "Washington",
+ "beals": "Washington",
+ "beddington": "Washington",
+ "calais": "Washington",
+ "centerville": "Washington",
+ "charlotte": "Washington",
+ "cherryfield": "Washington",
+ "columbia": "Washington",
+ "columbia falls": "Washington",
+ "cooper": "Washington",
+ "crawford": "Washington",
+ "cutler": "Washington",
+ "danforth": "Washington",
+ "deblois": "Washington",
+ "dennysville": "Washington",
+ "east machias": "Washington",
+ "eastport": "Washington",
+ "harrington": "Washington",
+ "jonesboro": "Washington",
+ "jonesport": "Washington",
+ "lubec": "Washington",
+ "machias": "Washington",
+ "machiasport": "Washington",
+ "marion": "Washington",
+ "marshfield": "Washington",
+ "meddybemps": "Washington",
+ "milbridge": "Washington",
+ "northfield": "Washington",
+ "pembroke": "Washington",
+ "perry": "Washington",
+ "princeton": "Washington",
+ "robbinston": "Washington",
+ "roque bluffs": "Washington",
+ "steuben": "Washington",
+ "talmadge": "Washington",
+ "topsfield": "Washington",
+ "vanceboro": "Washington",
+ "waite": "Washington",
+ "wesley": "Washington",
+ "whiting": "Washington",
+ "whitneyville": "Washington",
+ "acton": "York",
+ "alfred": "York",
+ "arundel": "York",
+ "berwick": "York",
+ "biddeford": "York",
+ "buxton": "York",
+ "cornish": "York",
+ "dayton": "York",
+ "eliot": "York",
+ "hollis": "York",
+ "kennebunk": "York",
+ "kennebunkport": "York",
+ "kittery": "York",
+ "lebanon": "York",
+ "limerick": "York",
+ "limington": "York",
+ "lyman": "York",
+ "newfield": "York",
+ "north berwick": "York",
+ "ogunquit": "York",
+ "old orchard beach": "York",
+ "parsonsfield": "York",
+ "saco": "York",
+ "sanford": "York",
+ "shapleigh": "York",
+ "south berwick": "York",
+ "waterboro": "York",
+ "wells": "York",
+ "york": "York",
+ },
+ "NH": {
+ "acworth": "Sullivan",
+ "albany": "Carroll",
+ "alexandria": "Grafton",
+ "allenstown": "Merrimack",
+ "alstead": "Cheshire",
+ "alton": "Belknap",
+ "amherst": "Hillsborough",
+ "andover": "Merrimack",
+ "antrim": "Hillsborough",
+ "ashland": "Grafton",
+ "atkinson": "Rockingham",
+ "auburn": "Rockingham",
+ "barnstead": "Belknap",
+ "barrington": "Strafford",
+ "bartlett": "Carroll",
+ "bath": "Grafton",
+ "bedford": "Hillsborough",
+ "belmont": "Belknap",
+ "bennington": "Hillsborough",
+ "benton": "Grafton",
+ "berlin": "Coos",
+ "bethlehem": "Grafton",
+ "boscawen": "Merrimack",
+ "bow": "Merrimack",
+ "bradford": "Merrimack",
+ "brentwood": "Rockingham",
+ "bridgewater": "Grafton",
+ "bristol": "Grafton",
+ "brookfield": "Carroll",
+ "brookline": "Hillsborough",
+ "campton": "Grafton",
+ "canaan": "Grafton",
+ "candia": "Rockingham",
+ "canterbury": "Merrimack",
+ "carroll": "Coos",
+ "center harbor": "Belknap",
+ "charlestown": "Sullivan",
+ "chatham": "Carroll",
+ "chester": "Rockingham",
+ "chesterfield": "Cheshire",
+ "chichester": "Merrimack",
+ "claremont": "Sullivan",
+ "clarksville": "Coos",
+ "colebrook": "Coos",
+ "columbia": "Coos",
+ "concord": "Merrimack",
+ "conway": "Carroll",
+ "cornish": "Sullivan",
+ "croydon": "Sullivan",
+ "dalton": "Coos",
+ "danbury": "Merrimack",
+ "danville": "Rockingham",
+ "deerfield": "Rockingham",
+ "deering": "Hillsborough",
+ "derry": "Rockingham",
+ "dorchester": "Grafton",
+ "dover": "Strafford",
+ "dublin": "Cheshire",
+ "dummer": "Coos",
+ "dunbarton": "Merrimack",
+ "durham": "Strafford",
+ "east kingston": "Rockingham",
+ "easton": "Grafton",
+ "eaton": "Carroll",
+ "effingham": "Carroll",
+ "ellsworth": "Grafton",
+ "enfield": "Grafton",
+ "epping": "Rockingham",
+ "epsom": "Merrimack",
+ "errol": "Coos",
+ "exeter": "Rockingham",
+ "farmington": "Strafford",
+ "fitzwilliam": "Cheshire",
+ "francestown": "Hillsborough",
+ "franconia": "Grafton",
+ "franklin": "Merrimack",
+ "freedom": "Carroll",
+ "fremont": "Rockingham",
+ "gilford": "Belknap",
+ "gilmanton": "Belknap",
+ "gilsum": "Cheshire",
+ "goffstown": "Hillsborough",
+ "gorham": "Coos",
+ "goshen": "Sullivan",
+ "grafton": "Grafton",
+ "grantham": "Sullivan",
+ "greenfield": "Hillsborough",
+ "greenland": "Rockingham",
+ "greenville": "Hillsborough",
+ "groton": "Grafton",
+ "hampstead": "Rockingham",
+ "hampton": "Rockingham",
+ "hampton falls": "Rockingham",
+ "hancock": "Hillsborough",
+ "hanover": "Grafton",
+ "harrisville": "Cheshire",
+ "hart's location": "Carroll",
+ "haverhill": "Grafton",
+ "hebron": "Grafton",
+ "henniker": "Merrimack",
+ "hill": "Merrimack",
+ "hillsborough": "Hillsborough",
+ "hinsdale": "Cheshire",
+ "holderness": "Grafton",
+ "hollis": "Hillsborough",
+ "hooksett": "Merrimack",
+ "hopkinton": "Merrimack",
+ "hudson": "Hillsborough",
+ "jackson": "Carroll",
+ "jaffrey": "Cheshire",
+ "jefferson": "Coos",
+ "keene": "Cheshire",
+ "kensington": "Rockingham",
+ "kingston": "Rockingham",
+ "laconia": "Belknap",
+ "lancaster": "Coos",
+ "landaff": "Grafton",
+ "langdon": "Sullivan",
+ "lee": "Strafford",
+ "lempster": "Sullivan",
+ "lincoln": "Grafton",
+ "lisbon": "Grafton",
+ "litchfield": "Hillsborough",
+ "littleton": "Grafton",
+ "londonderry": "Rockingham",
+ "loudon": "Merrimack",
+ "lyme": "Grafton",
+ "lyndeborough": "Hillsborough",
+ "madbury": "Strafford",
+ "madison": "Carroll",
+ "manchester": "Hillsborough",
+ "marlborough": "Cheshire",
+ "marlow": "Cheshire",
+ "mason": "Hillsborough",
+ "meredith": "Belknap",
+ "merrimack": "Hillsborough",
+ "middleton": "Strafford",
+ "milan": "Coos",
+ "milford": "Hillsborough",
+ "millsfield": "Coos",
+ "milton": "Strafford",
+ "monroe": "Grafton",
+ "mont vernon": "Hillsborough",
+ "moultonborough": "Carroll",
+ "nashua": "Hillsborough",
+ "nelson": "Cheshire",
+ "new boston": "Hillsborough",
+ "new castle": "Rockingham",
+ "new durham": "Strafford",
+ "new hampton": "Belknap",
+ "new ipswich": "Hillsborough",
+ "new london": "Merrimack",
+ "newbury": "Merrimack",
+ "newfields": "Rockingham",
+ "newington": "Rockingham",
+ "newmarket": "Rockingham",
+ "newport": "Sullivan",
+ "newton": "Rockingham",
+ "north hampton": "Rockingham",
+ "northfield": "Merrimack",
+ "northumberland": "Coos",
+ "northwood": "Rockingham",
+ "nottingham": "Rockingham",
+ "orange": "Grafton",
+ "orford": "Grafton",
+ "ossipee": "Carroll",
+ "pelham": "Hillsborough",
+ "pembroke": "Merrimack",
+ "peterborough": "Hillsborough",
+ "piermont": "Grafton",
+ "pittsburg": "Coos",
+ "pittsfield": "Merrimack",
+ "plainfield": "Sullivan",
+ "plaistow": "Rockingham",
+ "plymouth": "Grafton",
+ "portsmouth": "Rockingham",
+ "randolph": "Coos",
+ "raymond": "Rockingham",
+ "richmond": "Cheshire",
+ "rindge": "Cheshire",
+ "rochester": "Strafford",
+ "rollinsford": "Strafford",
+ "roxbury": "Cheshire",
+ "rumney": "Grafton",
+ "rye": "Rockingham",
+ "salem": "Rockingham",
+ "salisbury": "Merrimack",
+ "sanbornton": "Belknap",
+ "sandown": "Rockingham",
+ "sandwich": "Carroll",
+ "seabrook": "Rockingham",
+ "sharon": "Hillsborough",
+ "shelburne": "Coos",
+ "somersworth": "Strafford",
+ "south hampton": "Rockingham",
+ "springfield": "Sullivan",
+ "stark": "Coos",
+ "stewartstown": "Coos",
+ "stoddard": "Cheshire",
+ "strafford": "Strafford",
+ "stratford": "Coos",
+ "stratham": "Rockingham",
+ "sugar hill": "Grafton",
+ "sunapee": "Sullivan",
+ "surry": "Cheshire",
+ "sutton": "Merrimack",
+ "swanzey": "Cheshire",
+ "tamworth": "Carroll",
+ "temple": "Hillsborough",
+ "thornton": "Grafton",
+ "tilton": "Belknap",
+ "troy": "Cheshire",
+ "tuftonboro": "Carroll",
+ "unity": "Sullivan",
+ "wakefield": "Carroll",
+ "walpole": "Cheshire",
+ "warner": "Merrimack",
+ "warren": "Grafton",
+ "washington": "Sullivan",
+ "waterville valley": "Grafton",
+ "weare": "Hillsborough",
+ "webster": "Merrimack",
+ "wentworth": "Grafton",
+ "westmoreland": "Cheshire",
+ "whitefield": "Coos",
+ "wilmot": "Merrimack",
+ "wilton": "Hillsborough",
+ "winchester": "Cheshire",
+ "windham": "Rockingham",
+ "windsor": "Hillsborough",
+ "wolfeboro": "Carroll",
+ "woodstock": "Grafton",
+ },
+ "RI": {
+ "barrington": "Bristol",
+ "bristol": "Bristol",
+ "warren": "Bristol",
+ "coventry": "Kent",
+ "east greenwich": "Kent",
+ "warwick": "Kent",
+ "west greenwich": "Kent",
+ "west warwick": "Kent",
+ "jamestown": "Newport",
+ "little compton": "Newport",
+ "middletown": "Newport",
+ "newport": "Newport",
+ "portsmouth": "Newport",
+ "tiverton": "Newport",
+ "burrillville": "Providence",
+ "central falls": "Providence",
+ "cranston": "Providence",
+ "cumberland": "Providence",
+ "east providence": "Providence",
+ "foster": "Providence",
+ "glocester": "Providence",
+ "johnston": "Providence",
+ "lincoln": "Providence",
+ "north providence": "Providence",
+ "north smithfield": "Providence",
+ "pawtucket": "Providence",
+ "providence": "Providence",
+ "scituate": "Providence",
+ "smithfield": "Providence",
+ "woonsocket": "Providence",
+ "charlestown": "Washington",
+ "exeter": "Washington",
+ "hopkinton": "Washington",
+ "narragansett": "Washington",
+ "new shoreham": "Washington",
+ "north kingstown": "Washington",
+ "richmond": "Washington",
+ "south kingstown": "Washington",
+ "westerly": "Washington",
+ },
+ "VT": {
+ "addison": "Addison",
+ "bridport": "Addison",
+ "bristol": "Addison",
+ "cornwall": "Addison",
+ "ferrisburgh": "Addison",
+ "goshen": "Addison",
+ "granville": "Addison",
+ "hancock": "Addison",
+ "leicester": "Addison",
+ "lincoln": "Addison",
+ "middlebury": "Addison",
+ "monkton": "Addison",
+ "new haven": "Addison",
+ "orwell": "Addison",
+ "panton": "Addison",
+ "ripton": "Addison",
+ "salisbury": "Addison",
+ "shoreham": "Addison",
+ "starksboro": "Addison",
+ "vergennes": "Addison",
+ "waltham": "Addison",
+ "weybridge": "Addison",
+ "whiting": "Addison",
+ "arlington": "Bennington",
+ "bennington": "Bennington",
+ "dorset": "Bennington",
+ "glastenbury": "Bennington",
+ "landgrove": "Bennington",
+ "manchester": "Bennington",
+ "peru": "Bennington",
+ "pownal": "Bennington",
+ "readsboro": "Bennington",
+ "rupert": "Bennington",
+ "sandgate": "Bennington",
+ "searsburg": "Bennington",
+ "shaftsbury": "Bennington",
+ "stamford": "Bennington",
+ "sunderland": "Bennington",
+ "winhall": "Bennington",
+ "woodford": "Bennington",
+ "barnet": "Caledonia",
+ "burke": "Caledonia",
+ "danville": "Caledonia",
+ "groton": "Caledonia",
+ "hardwick": "Caledonia",
+ "kirby": "Caledonia",
+ "lyndon": "Caledonia",
+ "newark": "Caledonia",
+ "peacham": "Caledonia",
+ "ryegate": "Caledonia",
+ "sheffield": "Caledonia",
+ "st. johnsbury": "Caledonia",
+ "st johnsbury": "Caledonia",
+ "stannard": "Caledonia",
+ "sutton": "Caledonia",
+ "walden": "Caledonia",
+ "waterford": "Caledonia",
+ "wheelock": "Caledonia",
+ "bolton": "Chittenden",
+ "burlington": "Chittenden",
+ "charlotte": "Chittenden",
+ "colchester": "Chittenden",
+ "essex": "Chittenden",
+ "essex junction": "Chittenden",
+ "hinesburg": "Chittenden",
+ "huntington": "Chittenden",
+ "jericho": "Chittenden",
+ "milton": "Chittenden",
+ "richmond": "Chittenden",
+ "shelburne": "Chittenden",
+ "south burlington": "Chittenden",
+ "underhill": "Chittenden",
+ "westford": "Chittenden",
+ "williston": "Chittenden",
+ "winooski": "Chittenden",
+ "averill": "Essex",
+ "bloomfield": "Essex",
+ "brighton": "Essex",
+ "brunswick": "Essex",
+ "canaan": "Essex",
+ "concord": "Essex",
+ "east haven": "Essex",
+ "ferdinand": "Essex",
+ "granby": "Essex",
+ "guildhall": "Essex",
+ "lemington": "Essex",
+ "lunenburg": "Essex",
+ "maidstone": "Essex",
+ "norton": "Essex",
+ "victory": "Essex",
+ "bakersfield": "Franklin",
+ "berkshire": "Franklin",
+ "enosburg": "Franklin",
+ "enosburg falls": "Franklin",
+ "fairfax": "Franklin",
+ "fairfield": "Franklin",
+ "fletcher": "Franklin",
+ "franklin": "Franklin",
+ "georgia": "Franklin",
+ "highgate": "Franklin",
+ "montgomery": "Franklin",
+ "richford": "Franklin",
+ "sheldon": "Franklin",
+ "st. albans": "Franklin",
+ "st albans": "Franklin",
+ "swanton": "Franklin",
+ "alburgh": "Grand Isle",
+ "grand isle": "Grand Isle",
+ "isle la motte": "Grand Isle",
+ "north hero": "Grand Isle",
+ "south hero": "Grand Isle",
+ "belvidere": "Lamoille",
+ "cambridge": "Lamoille",
+ "eden": "Lamoille",
+ "elmore": "Lamoille",
+ "hyde park": "Lamoille",
+ "johnson": "Lamoille",
+ "morristown": "Lamoille",
+ "morrisville": "Lamoille",
+ "stowe": "Lamoille",
+ "waterville": "Lamoille",
+ "wolcott": "Lamoille",
+ "bradford": "Orange",
+ "braintree": "Orange",
+ "brookfield": "Orange",
+ "chelsea": "Orange",
+ "corinth": "Orange",
+ "fairlee": "Orange",
+ "newbury": "Orange",
+ "orange": "Orange",
+ "randolph": "Orange",
+ "strafford": "Orange",
+ "thetford": "Orange",
+ "topsham": "Orange",
+ "tunbridge": "Orange",
+ "vershire": "Orange",
+ "washington": "Orange",
+ "west fairlee": "Orange",
+ "williamstown": "Orange",
+ "albany": "Orleans",
+ "barton": "Orleans",
+ "brownington": "Orleans",
+ "charleston": "Orleans",
+ "coventry": "Orleans",
+ "craftsbury": "Orleans",
+ "derby": "Orleans",
+ "glover": "Orleans",
+ "greensboro": "Orleans",
+ "holland": "Orleans",
+ "irasburgh": "Orleans",
+ "jay": "Orleans",
+ "lowell": "Orleans",
+ "morgan": "Orleans",
+ "newport": "Orleans",
+ "troy": "Orleans",
+ "westfield": "Orleans",
+ "westmore": "Orleans",
+ "benson": "Rutland",
+ "brandon": "Rutland",
+ "castleton": "Rutland",
+ "chittenden": "Rutland",
+ "clarendon": "Rutland",
+ "danby": "Rutland",
+ "fair haven": "Rutland",
+ "hubbardton": "Rutland",
+ "ira": "Rutland",
+ "killington": "Rutland",
+ "mendon": "Rutland",
+ "middletown springs": "Rutland",
+ "mount holly": "Rutland",
+ "mount tabor": "Rutland",
+ "pawlet": "Rutland",
+ "pittsfield": "Rutland",
+ "pittsford": "Rutland",
+ "poultney": "Rutland",
+ "proctor": "Rutland",
+ "rutland": "Rutland",
+ "shrewsbury": "Rutland",
+ "sudbury": "Rutland",
+ "tinmouth": "Rutland",
+ "wallingford": "Rutland",
+ "west haven": "Rutland",
+ "west rutland": "Rutland",
+ "barre": "Washington",
+ "berlin": "Washington",
+ "cabot": "Washington",
+ "calais": "Washington",
+ "duxbury": "Washington",
+ "east montpelier": "Washington",
+ "fayston": "Washington",
+ "marshfield": "Washington",
+ "middlesex": "Washington",
+ "montpelier": "Washington",
+ "moretown": "Washington",
+ "northfield": "Washington",
+ "plainfield": "Washington",
+ "roxbury": "Washington",
+ "waitsfield": "Washington",
+ "warren": "Washington",
+ "waterbury": "Washington",
+ "woodbury": "Washington",
+ "worcester": "Washington",
+ "athens": "Windham",
+ "brattleboro": "Windham",
+ "brookline": "Windham",
+ "dover": "Windham",
+ "dummerston": "Windham",
+ "grafton": "Windham",
+ "guilford": "Windham",
+ "halifax": "Windham",
+ "jamaica": "Windham",
+ "londonderry": "Windham",
+ "marlboro": "Windham",
+ "newfane": "Windham",
+ "putney": "Windham",
+ "rockingham": "Windham",
+ "somerset": "Windham",
+ "stratton": "Windham",
+ "townshend": "Windham",
+ "vernon": "Windham",
+ "wardsboro": "Windham",
+ "westminster": "Windham",
+ "whitingham": "Windham",
+ "wilmington": "Windham",
+ "windham": "Windham",
+ "andover": "Windsor",
+ "baltimore": "Windsor",
+ "barnard": "Windsor",
+ "bethel": "Windsor",
+ "bridgewater": "Windsor",
+ "cavendish": "Windsor",
+ "chester": "Windsor",
+ "hartford": "Windsor",
+ "hartland": "Windsor",
+ "ludlow": "Windsor",
+ "norwich": "Windsor",
+ "plymouth": "Windsor",
+ "pomfret": "Windsor",
+ "reading": "Windsor",
+ "rochester": "Windsor",
+ "royalton": "Windsor",
+ "sharon": "Windsor",
+ "springfield": "Windsor",
+ "stockbridge": "Windsor",
+ "weathersfield": "Windsor",
+ "weston": "Windsor",
+ "west windsor": "Windsor",
+ "windsor": "Windsor",
+ "woodstock": "Windsor",
+ },
+}
+
+
+def resolve_county_from_service_area(service_area: str, state_abbr: str) -> str | None:
+ """
+ Try to resolve a county name from a service area text string.
+
+ Strategy:
+ 1. Check for direct county name mentions (e.g. "Throughout Rockingham County")
+ 2. Tokenize and check each token against TOWN_COUNTY_MAP
+
+ Args:
+ service_area: Free-text service area from CheapestOil
+ state_abbr: Two-letter state code
+
+ Returns:
+ County name string or None if no match found.
+ """
+ if not service_area or state_abbr not in TOWN_COUNTY_MAP:
+ return None
+
+ text = service_area.strip()
+ text_lower = text.lower()
+
+ # 1. Check for direct county name mentions
+ counties = _NE_COUNTIES.get(state_abbr, [])
+ for county in counties:
+ if county.lower() in text_lower:
+ return county
+
+ # 2. Tokenize and check against town map
+ town_map = TOWN_COUNTY_MAP[state_abbr]
+ # Split on common delimiters: commas, "and", semicolons, slashes
+ tokens = re.split(r'[,;/&]+|\band\b', text_lower)
+ for token in tokens:
+ token = token.strip().rstrip('.')
+ if not token:
+ continue
+ # Direct match
+ if token in town_map:
+ return town_map[token]
+ # Try without common prefixes/suffixes like "greater", "area", "surrounding"
+ for prefix in ("greater ", "the ", "town of ", "city of "):
+ if token.startswith(prefix):
+ cleaned = token[len(prefix):]
+ if cleaned in town_map:
+ return town_map[cleaned]
+
+ return None
diff --git a/docker-compose.yml b/docker-compose.yml
index 83f9af3..1f526dc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,22 +1,11 @@
services:
app:
- build: . # Build the image from the Dockerfile in the current directory
+ build: .
container_name: fuel_scraper_app_service
+ ports:
+ - "9553:9553"
volumes:
- # Mount current directory for live code updates during development
- .:/app
- # If your app needs to connect to a DB on the host, and host.docker.internal
- # isn't working, you might need to add it to the host network (less secure, platform-dependent)
- # or use 'extra_hosts' on Linux.
- # For host.docker.internal to work on Linux, you might need:
extra_hosts:
- "host.docker.internal:host-gateway"
- #environment:
- # You can pass DATABASE_URL here to override database.py if needed
- # DATABASE_URL: "postgresql://your_user:your_password@host.docker.internal:5432/fuelprices"
- # PYTHONUNBUFFERED: 1 # Already in Dockerfile, but good practice
- # The default command comes from the Dockerfile's CMD
- # To keep the container running after the script finishes (for debugging or exec):
- # tty: true
- # stdin_open: true
\ No newline at end of file
diff --git a/fuel_scraper.py b/fuel_scraper.py
deleted file mode 100644
index 1193362..0000000
--- a/fuel_scraper.py
+++ /dev/null
@@ -1,360 +0,0 @@
-#!/usr/bin/env python3
-import requests
-from bs4 import BeautifulSoup
-from datetime import datetime
-import logging
-import os
-import re # For parsing zone number from slug
-
-from sqlalchemy.orm import Session
-from database import SessionLocal, init_db
-import models
-
-# --- SITES CONFIGURATION ---
-SITES_CONFIG = [
- {
- "site_name": "NewEnglandOil",
- "base_url": "https://www.newenglandoil.com",
- "url_template": "{base_url}/{state_slug}/{zone_slug}.asp?type={oil_type}",
- "oil_type": 0,
- "locations": {
- "connecticut": [
- "zone1", "zone2", "zone3", "zone4", "zone5", "zone6", "zone7",
- "zone8", "zone9", "zone10"
- ],
- "massachusetts": [
- "zone1", "zone2", "zone3", "zone4", "zone5", "zone6",
- "zone7", "zone8", "zone9", "zone10", "zone11", "zone12",
- "zone13","zone14","zone15"
- ],
- "newhampshire": [
- "zone1", "zone2", "zone3", "zone4", "zone5", "zone6"
- ],
- "rhodeisland": [
- "zone1", "zone2", "zone3", "zone4"
- ],
-
-
- }
- },
- {
- "site_name": "MaineOil",
- "base_url": "https://www.maineoil.com",
- "url_template": "{base_url}/{zone_slug}.asp?type={oil_type}",
- "oil_type": 0,
- "locations": {
- "maine": [
- "zone1", "zone2", "zone3", "zone4", "zone5",
- "zone6", "zone7"
- ]
- }
- }
-]
-
-# --- ZONE-TO-COUNTY MAPPING ---
-# Maps (state_key, zone_number) -> (state_abbrev, county_name)
-ZONE_COUNTY_MAP = {
- ("connecticut", 1): ("CT", "New London"),
- ("connecticut", 2): ("CT", "Windham"),
- ("connecticut", 3): ("CT", "New Haven"),
- ("connecticut", 4): ("CT", "Middlesex"),
- ("connecticut", 5): ("CT", "New Haven"),
- ("connecticut", 6): ("CT", "Hartford"),
- ("connecticut", 7): ("CT", "Litchfield"),
- ("connecticut", 8): ("CT", "Fairfield"),
- ("connecticut", 9): ("CT", "Tolland"),
- ("connecticut", 10): ("CT", "Litchfield"),
- ("massachusetts", 1): ("MA", "Suffolk"),
- ("massachusetts", 2): ("MA", "Middlesex"),
- ("massachusetts", 3): ("MA", "Norfolk"),
- ("massachusetts", 4): ("MA", "Plymouth"),
- ("massachusetts", 5): ("MA", "Middlesex"),
- ("massachusetts", 6): ("MA", "Bristol"),
- ("massachusetts", 7): ("MA", "Barnstable"),
- ("massachusetts", 8): ("MA", "Essex"),
- ("massachusetts", 9): ("MA", "Essex"),
- ("massachusetts", 10): ("MA", "Worcester"),
- ("massachusetts", 11): ("MA", "Worcester"),
- ("massachusetts", 12): ("MA", "Hampshire"),
- ("massachusetts", 13): ("MA", "Hampden"),
- ("massachusetts", 14): ("MA", "Franklin"),
- ("massachusetts", 15): ("MA", "Berkshire"),
- ("newhampshire", 1): ("NH", "Coos"),
- ("newhampshire", 2): ("NH", "Strafford"),
- ("newhampshire", 3): ("NH", "Merrimack"),
- ("newhampshire", 4): ("NH", "Grafton"),
- ("newhampshire", 5): ("NH", "Cheshire"),
- ("newhampshire", 6): ("NH", "Hillsborough"),
- ("rhodeisland", 1): ("RI", "Newport"),
- ("rhodeisland", 2): ("RI", "Providence"),
- ("rhodeisland", 3): ("RI", "Washington"),
- ("rhodeisland", 4): ("RI", "Kent"),
- ("maine", 1): ("ME", "Cumberland"),
- ("maine", 2): ("ME", "Kennebec"),
- ("maine", 3): ("ME", "Androscoggin"),
- ("maine", 4): ("ME", "York"),
- ("maine", 5): ("ME", "Knox"),
- ("maine", 6): ("ME", "Penobscot"),
- ("maine", 7): ("ME", "Washington"),
-}
-
-LOG_FILE = "oil_scraper.log"
-logging.basicConfig(
- filename=LOG_FILE,
- level=logging.INFO,
- format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
-)
-
-# --- Helper Functions ---
-def make_request(url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
- }
- try:
- response = requests.get(url, headers=headers, timeout=20)
- response.raise_for_status()
- return BeautifulSoup(response.content, 'html.parser')
- except requests.exceptions.RequestException as e:
- logging.error(f"Error fetching {url}: {e}")
- return None
-
-def parse_zone_slug_to_int(zone_slug_str):
- """Extracts the numeric part of a zone slug (e.g., "zone1" -> 1, "zonema5" -> 5)."""
- if not zone_slug_str: return None
- match = re.search(r'\d+$', zone_slug_str)
- if match:
- return int(match.group(0))
- logging.warning(f"Could not parse numeric zone from slug: '{zone_slug_str}'")
- return None
-
-def parse_price_table(soup, state_name_key, zone_slug_str):
- """Parses price tables. state_name_key is "connecticut", "maine", etc. zone_slug_str is "zone1", "zonema5", etc."""
- data_dicts = []
- all_tables_on_page = soup.find_all('table')
- logging.info(f"Found {len(all_tables_on_page)} table(s) on page for {state_name_key} - {zone_slug_str}.")
-
- if not all_tables_on_page:
- logging.warning(f"No HTML tables found at all for {state_name_key} - {zone_slug_str}.")
- return data_dicts
-
- # --- Convert zone_slug_str to integer ---
- zone_int = parse_zone_slug_to_int(zone_slug_str)
- if zone_int is None:
- logging.error(f"Cannot parse zone number for {state_name_key} - {zone_slug_str}. Skipping.")
- return data_dicts
-
- candidate_tables_found = 0
- for table_index, table in enumerate(all_tables_on_page):
- thead = table.find('thead')
- is_price_table = False
- actual_column_indices = {}
-
- if thead:
- headers_lower = [th.get_text(strip=True).lower() for th in thead.find_all('th')]
- logging.debug(f"Table {table_index} on {state_name_key}/{zone_slug_str} - headers: {headers_lower}")
- try:
- actual_column_indices['company'] = headers_lower.index('company name')
- price_col_name_part = 'price'
- actual_column_indices['price'] = next(i for i, header in enumerate(headers_lower) if price_col_name_part in header)
- actual_column_indices['date'] = headers_lower.index('date')
- is_price_table = True
- logging.debug(f"Table {table_index} identified as price table. Indices: {actual_column_indices}")
- except (ValueError, StopIteration):
- logging.debug(f"Table {table_index} headers do not contain all key columns.")
- else:
- logging.debug(f"Table {table_index} has no thead.")
-
- if not is_price_table:
- continue
-
- candidate_tables_found += 1
- tbody = table.find('tbody')
- if not tbody:
- logging.warning(f"Price table identified by headers has no tbody. Skipping. State: {state_name_key}, Zone: {zone_slug_str}")
- continue
- rows = tbody.find_all('tr')
- if not rows:
- logging.debug(f"No rows found in tbody for price table in {state_name_key}/{zone_slug_str}")
- continue
-
- for row_index, row in enumerate(rows):
- cells = row.find_all('td')
- max_required_index = max(actual_column_indices.values()) if actual_column_indices else -1
-
- if max_required_index == -1:
- logging.error(f"Logic error: is_price_table true but no column indices for {state_name_key}/{zone_slug_str}")
- continue
-
- if len(cells) > max_required_index:
- company_name_scraped = cells[actual_column_indices['company']].get_text(strip=True)
- price_str = cells[actual_column_indices['price']].get_text(strip=True)
- date_posted_str = cells[actual_column_indices['date']].get_text(strip=True)
-
- company_link = cells[actual_column_indices['company']].find('a')
- if company_link:
- company_name_scraped = company_link.get_text(strip=True)
-
- price_float = None
- try:
- cleaned_price_str = ''.join(filter(lambda x: x.isdigit() or x == '.', price_str))
- if cleaned_price_str:
- price_float = float(cleaned_price_str)
- except ValueError:
- logging.warning(f"Could not parse price: '{price_str}' for {company_name_scraped} in {state_name_key}/{zone_slug_str}.")
- except Exception as e:
- logging.error(f"Unexpected error parsing price: '{price_str}' for {company_name_scraped}. Error: {e}")
-
- data_dicts.append({
- "state": state_name_key.capitalize(), # Use the passed state_name_key
- "zone": zone_int, # Use the parsed integer zone
- "name": company_name_scraped,
- "price": price_float,
- "date": date_posted_str,
- })
- elif len(cells) > 0:
- logging.warning(f"Skipping row {row_index+1} with insufficient cells ({len(cells)}, need {max_required_index+1}) in {state_name_key}/{zone_slug_str}")
-
- if candidate_tables_found == 0:
- logging.warning(f"No tables matching expected price table structure found for {state_name_key} - {zone_slug_str}.")
- return data_dicts
-
-# --- Helper: Build county lookup ---
-def build_county_lookup(db_session):
- """Build (state_abbrev, county_name) -> county_id lookup from DB."""
- counties = db_session.query(models.County).all()
- lookup = {}
- for c in counties:
- lookup[(c.state, c.name)] = c.id
- logging.info(f"Built county lookup with {len(lookup)} entries")
- return lookup
-
-
-def resolve_county_id(state_key, zone_number, county_lookup):
- """Resolve county_id from ZONE_COUNTY_MAP and county lookup."""
- mapping = ZONE_COUNTY_MAP.get((state_key, zone_number))
- if not mapping:
- return None
- state_abbrev, county_name = mapping
- return county_lookup.get((state_abbrev, county_name))
-
-
-# --- Main Script ---
-def main():
- logging.info("Starting oil price scraper job.")
- try:
- init_db()
- logging.info("Database initialized/checked successfully.")
- except Exception as e:
- logging.error(f"Failed to initialize database: {e}", exc_info=True)
- return
-
- db_session: Session = SessionLocal()
- total_records_added_this_run = 0
-
- try:
- # Build county lookup at startup
- county_lookup = build_county_lookup(db_session)
-
- for site_config in SITES_CONFIG:
- site_name = site_config["site_name"]
- base_url = site_config["base_url"]
- url_template = site_config["url_template"]
- oil_type = site_config["oil_type"]
-
- logging.info(f"--- Processing site: {site_name} ---")
-
- for state_key_in_config, zone_slugs_list in site_config["locations"].items():
-
- for zone_slug_from_list in zone_slugs_list:
- format_params = {
- "base_url": base_url,
- "state_slug": state_key_in_config,
- "zone_slug": zone_slug_from_list,
- "oil_type": oil_type
- }
- target_url = url_template.format(**format_params)
-
- logging.info(f"Scraping: {target_url} (State: {state_key_in_config}, Zone Slug: {zone_slug_from_list})")
-
- soup = make_request(target_url)
- if soup:
- parsed_items = parse_price_table(soup, state_key_in_config, zone_slug_from_list)
-
- if parsed_items:
- # Resolve county_id for this zone
- zone_int = parse_zone_slug_to_int(zone_slug_from_list)
- county_id = None
- if zone_int is not None:
- county_id = resolve_county_id(state_key_in_config, zone_int, county_lookup)
-
- for item_dict in parsed_items:
- # Match by county_id when available to avoid duplicates
- # when multiple zones map to the same county
- if county_id is not None:
- existing_record = db_session.query(models.OilPrice).filter(
- models.OilPrice.name == item_dict["name"],
- models.OilPrice.state == item_dict["state"],
- models.OilPrice.county_id == county_id
- ).first()
- else:
- existing_record = db_session.query(models.OilPrice).filter(
- models.OilPrice.name == item_dict["name"],
- models.OilPrice.state == item_dict["state"],
- models.OilPrice.zone == item_dict["zone"]
- ).first()
-
- if existing_record:
- if existing_record.company_id is not None:
- logging.debug(f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} due to non-null company_id")
- else:
- updated = False
- if county_id is not None and existing_record.county_id != county_id:
- existing_record.county_id = county_id
- updated = True
- if existing_record.price != item_dict["price"]:
- existing_record.price = item_dict["price"]
- existing_record.date = item_dict["date"]
- existing_record.scrapetimestamp = datetime.utcnow()
- logging.info(f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} to {item_dict['price']}")
- elif updated:
- existing_record.scrapetimestamp = datetime.utcnow()
- logging.info(f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} to {county_id}")
- else:
- logging.debug(f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}")
- else:
- oil_price_record = models.OilPrice(
- state=item_dict["state"],
- zone=item_dict["zone"],
- name=item_dict["name"],
- price=item_dict["price"],
- date=item_dict["date"],
- county_id=county_id,
- scrapetimestamp=datetime.utcnow()
- )
- db_session.add(oil_price_record)
- logging.info(f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} (county_id={county_id})")
- total_records_added_this_run += len(parsed_items)
- logging.info(f"Queued {len(parsed_items)} records from {site_name} - {state_key_in_config}/{zone_slug_from_list} for DB insertion.")
- else:
- logging.info(f"No data extracted from {target_url}")
- else:
- logging.warning(f"Failed to retrieve or parse {target_url}. Skipping.")
-
- if total_records_added_this_run > 0:
- db_session.commit()
- logging.info(f"Successfully committed {total_records_added_this_run} records to the database.")
- else:
- logging.info("No new records were queued for database insertion in this run.")
-
- except Exception as e:
- logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True)
- db_session.rollback()
- logging.info("Database transaction rolled back due to error.")
- finally:
- db_session.close()
- logging.info("Database session closed.")
-
- logging.info("Oil price scraper job finished.")
-
-if __name__ == "__main__":
- main()
diff --git a/fuel_scraper/db_operations.py b/fuel_scraper/db_operations.py
deleted file mode 100644
index 030dce0..0000000
--- a/fuel_scraper/db_operations.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""
-Database operations module for oil price CRUD operations.
-"""
-import logging
-from datetime import datetime
-from sqlalchemy.orm import Session
-
-import sys
-import os
-# Add parent directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-import models
-
-
-def upsert_oil_price(db_session: Session, item_dict: dict) -> bool:
- """
- Insert or update an oil price record.
-
- Logic:
- - Match by (name, state, county_id) when county_id is available to avoid
- duplicates when multiple zones map to the same county.
- - Fall back to (name, state, zone) when county_id is not available.
- - If record exists with non-null company_id: skip (vendor-managed price)
- - If record exists with null company_id and different price: update
- - If record exists with same price: skip (no change)
- - If no record exists: insert new
-
- Args:
- db_session: SQLAlchemy session
- item_dict: Dictionary with state, zone, name, price, date, county_id
-
- Returns:
- True if a record was inserted or updated, False otherwise
- """
- county_id = item_dict.get("county_id")
-
- # Check if record already exists - prefer matching by county_id to avoid
- # duplicates when multiple zones map to the same county
- if county_id is not None:
- existing_record = db_session.query(models.OilPrice).filter(
- models.OilPrice.name == item_dict["name"],
- models.OilPrice.state == item_dict["state"],
- models.OilPrice.county_id == county_id
- ).first()
- else:
- existing_record = db_session.query(models.OilPrice).filter(
- models.OilPrice.name == item_dict["name"],
- models.OilPrice.state == item_dict["state"],
- models.OilPrice.zone == item_dict["zone"]
- ).first()
-
- if existing_record:
- # Record exists - check if we should update
- if existing_record.company_id is not None:
- logging.debug(
- f"Skipping update for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
- "due to non-null company_id"
- )
- return False
-
- # Always update county_id if we have one and it differs
- updated = False
- if county_id is not None and existing_record.county_id != county_id:
- existing_record.county_id = county_id
- updated = True
-
- # Company ID is null - check if price changed
- if existing_record.price != item_dict["price"]:
- existing_record.price = item_dict["price"]
- existing_record.date = item_dict["date"]
- existing_record.scrapetimestamp = datetime.utcnow()
- logging.info(
- f"Updated price for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
- f"to {item_dict['price']}"
- )
- return True
- elif updated:
- existing_record.scrapetimestamp = datetime.utcnow()
- logging.info(
- f"Updated county_id for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
- f"to {county_id}"
- )
- return True
- else:
- logging.debug(
- f"Price unchanged for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']}"
- )
- return False
- else:
- # No record exists - create new
- oil_price_record = models.OilPrice(
- state=item_dict["state"],
- zone=item_dict["zone"],
- name=item_dict["name"],
- price=item_dict["price"],
- date=item_dict["date"],
- county_id=county_id,
- scrapetimestamp=datetime.utcnow()
- )
- db_session.add(oil_price_record)
- logging.info(
- f"Added new record for {item_dict['name']} in {item_dict['state']} zone {item_dict['zone']} "
- f"(county_id={county_id})"
- )
- return True
diff --git a/fuel_scraper/http_client.py b/fuel_scraper/http_client.py
deleted file mode 100644
index 4658518..0000000
--- a/fuel_scraper/http_client.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-HTTP client module for making web requests.
-"""
-import logging
-import requests
-from bs4 import BeautifulSoup
-
-# Default headers to mimic a browser
-DEFAULT_HEADERS = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-}
-
-REQUEST_TIMEOUT = 20
-
-
-def make_request(url: str) -> BeautifulSoup | None:
- """
- Fetch a URL and return a BeautifulSoup object.
-
- Args:
- url: The URL to fetch
-
- Returns:
- BeautifulSoup object if successful, None otherwise
- """
- try:
- response = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT)
- response.raise_for_status()
- return BeautifulSoup(response.content, 'html.parser')
- except requests.exceptions.RequestException as e:
- logging.error(f"Error fetching {url}: {e}")
- return None
diff --git a/fuel_scraper/scraper.py b/fuel_scraper/scraper.py
deleted file mode 100644
index 08b939b..0000000
--- a/fuel_scraper/scraper.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/env python3
-"""
-Main scraper orchestrator module.
-Coordinates fetching, parsing, and storing oil price data.
-"""
-import logging
-import sys
-import os
-
-# Add parent directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from sqlalchemy.orm import Session
-from database import SessionLocal, init_db
-import models
-
-from .config import SITES_CONFIG, ZONE_COUNTY_MAP, setup_logging
-from .http_client import make_request
-from .parsers import parse_price_table, parse_zone_slug_to_int
-from .db_operations import upsert_oil_price
-
-
-def _build_county_lookup(db_session: Session) -> dict:
- """
- Build a lookup dict from (state_abbrev, county_name) -> county_id
- by querying the county table.
- """
- counties = db_session.query(models.County).all()
- lookup = {}
- for c in counties:
- lookup[(c.state, c.name)] = c.id
- logging.info(f"Built county lookup with {len(lookup)} entries")
- return lookup
-
-
-def _resolve_county_id(state_key: str, zone_number: int, county_lookup: dict) -> int | None:
- """
- Resolve a county_id from ZONE_COUNTY_MAP and the county lookup.
- Returns None if no mapping exists.
- """
- mapping = ZONE_COUNTY_MAP.get((state_key, zone_number))
- if not mapping:
- logging.debug(f"No zone-to-county mapping for ({state_key}, {zone_number})")
- return None
- state_abbrev, county_name = mapping
- county_id = county_lookup.get((state_abbrev, county_name))
- if county_id is None:
- logging.warning(f"County not found in DB: ({state_abbrev}, {county_name}) for zone ({state_key}, {zone_number})")
- return county_id
-
-
-def _scrape_zone(
- db_session: Session,
- site_name: str,
- url_template: str,
- base_url: str,
- oil_type: int,
- state_key: str,
- zone_slug: str,
- county_lookup: dict
-) -> int:
- """
- Scrape a single zone and store records.
-
- Returns:
- Number of records processed
- """
- format_params = {
- "base_url": base_url,
- "state_slug": state_key,
- "zone_slug": zone_slug,
- "oil_type": oil_type
- }
- target_url = url_template.format(**format_params)
-
- logging.info(f"Scraping: {target_url} (State: {state_key}, Zone Slug: {zone_slug})")
-
- soup = make_request(target_url)
- if not soup:
- logging.warning(f"Failed to retrieve or parse {target_url}. Skipping.")
- return 0
-
- parsed_items = parse_price_table(soup, state_key, zone_slug)
-
- if not parsed_items:
- logging.info(f"No data extracted from {target_url}")
- return 0
-
- # Resolve county_id for this zone
- zone_number = parse_zone_slug_to_int(zone_slug)
- county_id = None
- if zone_number is not None:
- county_id = _resolve_county_id(state_key, zone_number, county_lookup)
-
- records_processed = 0
- for item_dict in parsed_items:
- item_dict["county_id"] = county_id
- if upsert_oil_price(db_session, item_dict):
- records_processed += 1
-
- logging.info(
- f"Processed {len(parsed_items)} records from {site_name} - {state_key}/{zone_slug} "
- f"({records_processed} inserted/updated, county_id={county_id})"
- )
-
- return len(parsed_items)
-
-
-def _scrape_site(db_session: Session, site_config: dict, county_lookup: dict) -> int:
- """
- Scrape all zones for a single site.
-
- Returns:
- Total number of records processed
- """
- site_name = site_config["site_name"]
- base_url = site_config["base_url"]
- url_template = site_config["url_template"]
- oil_type = site_config["oil_type"]
-
- logging.info(f"--- Processing site: {site_name} ---")
-
- total_records = 0
-
- for state_key, zone_slugs in site_config["locations"].items():
- for zone_slug in zone_slugs:
- records = _scrape_zone(
- db_session=db_session,
- site_name=site_name,
- url_template=url_template,
- base_url=base_url,
- oil_type=oil_type,
- state_key=state_key,
- zone_slug=zone_slug,
- county_lookup=county_lookup
- )
- total_records += records
-
- return total_records
-
-
-def main():
- """
- Main entry point for the oil price scraper.
-
- Initializes database, iterates through all configured sites and zones,
- scrapes price data, and stores it in the database.
- """
- setup_logging()
- logging.info("Starting oil price scraper job.")
-
- # Initialize database
- try:
- init_db()
- logging.info("Database initialized/checked successfully.")
- except Exception as e:
- logging.error(f"Failed to initialize database: {e}", exc_info=True)
- return
-
- db_session: Session = SessionLocal()
- total_records = 0
-
- try:
- # Build county lookup at startup
- county_lookup = _build_county_lookup(db_session)
-
- # Process each configured site
- for site_config in SITES_CONFIG:
- records = _scrape_site(db_session, site_config, county_lookup)
- total_records += records
-
- # Commit all changes
- if total_records > 0:
- db_session.commit()
- logging.info(f"Successfully committed records to the database.")
- else:
- logging.info("No new records were queued for database insertion in this run.")
-
- except Exception as e:
- logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True)
- db_session.rollback()
- logging.info("Database transaction rolled back due to error.")
- finally:
- db_session.close()
- logging.info("Database session closed.")
-
- logging.info("Oil price scraper job finished.")
-
-
-if __name__ == "__main__":
- main()
diff --git a/models.py b/models.py
index 4879848..ae7b643 100644
--- a/models.py
+++ b/models.py
@@ -25,6 +25,8 @@ class OilPrice(Base):
company_id = Column(Integer, ForeignKey("company.id"), nullable=True)
county_id = Column(Integer, nullable=True)
+ phone = Column(String(20), nullable=True)
+ url = Column(String(500), nullable=True)
def __repr__(self):
return (f""
\ No newline at end of file
+ return f""
+
+# --- StatsPrice Model ---
+class StatsPrice(Base):
+ __tablename__ = "stats_prices"
+
+ id = Column(Integer, primary_key=True, index=True, autoincrement=True)
+ state = Column(String(2), nullable=False)
+ price = Column(Float, nullable=False)
+ created_at = Column(DateTime, default=datetime.utcnow)
+
+ def __repr__(self):
+ return f""
\ No newline at end of file
diff --git a/fuel_scraper/__init__.py b/newenglandoil/__init__.py
similarity index 65%
rename from fuel_scraper/__init__.py
rename to newenglandoil/__init__.py
index 4bcddf1..612370b 100644
--- a/fuel_scraper/__init__.py
+++ b/newenglandoil/__init__.py
@@ -1,4 +1,4 @@
-# fuel_scraper package
+# newenglandoil package
from .scraper import main
__all__ = ["main"]
diff --git a/fuel_scraper/config.py b/newenglandoil/config.py
similarity index 94%
rename from fuel_scraper/config.py
rename to newenglandoil/config.py
index 2087045..c0c791b 100644
--- a/fuel_scraper/config.py
+++ b/newenglandoil/config.py
@@ -43,6 +43,17 @@ SITES_CONFIG = [
}
]
+# --- STATE ABBREVIATION MAP ---
+# Maps lowercase state keys (as used in SITES_CONFIG locations) to 2-letter abbreviations
+STATE_ABBREV_MAP = {
+ "connecticut": "CT",
+ "massachusetts": "MA",
+ "maine": "ME",
+ "newhampshire": "NH",
+ "rhodeisland": "RI",
+ "vermont": "VT",
+}
+
# --- ZONE-TO-COUNTY MAPPING ---
# Maps (state_key, zone_number) -> (state_abbrev, county_name)
# state_key matches the keys in SITES_CONFIG locations (lowercase, no spaces)
diff --git a/newenglandoil/db_operations.py b/newenglandoil/db_operations.py
new file mode 100644
index 0000000..8ffacc1
--- /dev/null
+++ b/newenglandoil/db_operations.py
@@ -0,0 +1,131 @@
+"""
+Database operations module for oil price CRUD operations.
+"""
+import logging
+import sys
+import os
+from datetime import datetime
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from sqlalchemy.orm import Session
+from sqlalchemy import func
+import models
+
+
+def upsert_oil_price(db_session: Session, item_dict: dict, force_update_metadata: bool = False) -> bool:
+ """
+ Insert or update an oil price record.
+
+ Logic:
+ - Match by (name, state, county_id) - case insensitive on name!
+ - If county_id is None, fall back to (name, state, zone).
+ - If match found:
+ - If company_id is set: SKIP (vendor managed).
+ - Update name to formatted version (e.g. "Leblanc Oil" vs "LEBLANC OIL").
+ - Update phone/url if missing OR force_update_metadata is True.
+ - Update price/date if changed.
+ - If no match: INSERT.
+
+ Args:
+ db_session: SQLAlchemy session
+ item_dict: Dictionary with state, zone, name, price, date, county_id
+ force_update_metadata: If True, overwrite existing phone/url
+ """
+ county_id = item_dict.get("county_id")
+ site_name = item_dict.get("site_name", "NewEnglandOil")
+ name_clean = item_dict["name"].strip()
+
+ # Query for existing record - Case Insensitive
+ query = db_session.query(models.OilPrice).filter(
+ func.lower(models.OilPrice.name) == name_clean.lower(),
+ models.OilPrice.state == item_dict["state"]
+ )
+
+ if county_id is not None:
+ query = query.filter(models.OilPrice.county_id == county_id)
+ else:
+ query = query.filter(models.OilPrice.zone == item_dict["zone"])
+
+ existing_record = query.first()
+
+ new_phone = item_dict.get("phone")
+ new_url = item_dict.get("url")
+
+ if existing_record:
+ # Record exists
+ if existing_record.company_id is not None:
+ logging.debug(
+ f"[{site_name}] Skipping update for {name_clean} (ID={existing_record.id}) "
+ "due to non-null company_id"
+ )
+ return False
+
+ updated = False
+
+ # 1. Update name casing if different (and new name looks "better" e.g. not all caps)
+ # Simple heuristic: if existing is all caps and new is mixed, take new.
+ if existing_record.name != name_clean:
+ # We trust the scraper's _smart_title() output is generally good
+ existing_record.name = name_clean
+ updated = True
+
+ # 2. Update county_id if we have one (scraper resolved it) and DB didn't have it
+ if county_id is not None and existing_record.county_id != county_id:
+ existing_record.county_id = county_id
+ updated = True
+
+ # 3. Backfill or Force Update phone/url
+ if new_phone:
+ if not existing_record.phone or (force_update_metadata and existing_record.phone != new_phone):
+ existing_record.phone = new_phone
+ updated = True
+
+ if new_url:
+ if not existing_record.url or (force_update_metadata and existing_record.url != new_url):
+ existing_record.url = new_url
+ updated = True
+
+ # 4. Check Price Change
+ # We compare as float provided logic is sound, but float equality can be tricky.
+ # However, price is usually 2 decimals.
+ if abs(existing_record.price - item_dict["price"]) > 0.001:
+ existing_record.price = item_dict["price"]
+ existing_record.date = item_dict["date"]
+ existing_record.scrapetimestamp = datetime.utcnow()
+ logging.info(
+ f"[{site_name}] Updated price for {name_clean} (ID={existing_record.id}) "
+ f"to {item_dict['price']}"
+ )
+ return True
+ elif updated:
+ existing_record.scrapetimestamp = datetime.utcnow()
+ logging.info(
+ f"[{site_name}] Updated metadata for {name_clean} (ID={existing_record.id})"
+ )
+ return True
+ else:
+ # No meaningful change
+ logging.debug(
+ f"[{site_name}] Price unchanged for {name_clean} in {item_dict['state']} zone {item_dict['zone']}"
+ )
+ return False
+ else:
+ # Create new
+ oil_price_record = models.OilPrice(
+ state=item_dict["state"],
+ zone=item_dict["zone"],
+ name=name_clean,
+ price=item_dict["price"],
+ date=item_dict["date"],
+ county_id=county_id,
+ phone=new_phone,
+ url=new_url,
+ scrapetimestamp=datetime.utcnow()
+ )
+ db_session.add(oil_price_record)
+ logging.info(
+ f"[{site_name}] Added new record for {name_clean} in {item_dict['state']} zone {item_dict['zone']} "
+ f"(county_id={county_id})"
+ )
+ return True
diff --git a/newenglandoil/http_client.py b/newenglandoil/http_client.py
new file mode 100644
index 0000000..168bdde
--- /dev/null
+++ b/newenglandoil/http_client.py
@@ -0,0 +1,111 @@
+"""
+HTTP client module for making web requests.
+"""
+import logging
+import re
+import time
+import requests
+from bs4 import BeautifulSoup
+
+# Default headers to mimic a browser
+DEFAULT_HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+}
+
+REQUEST_TIMEOUT = 20
+PHONE_FETCH_DELAY = 1 # seconds between phone page requests
+
+
+def make_request(url: str) -> BeautifulSoup | None:
+ """
+ Fetch a URL and return a BeautifulSoup object.
+
+ Args:
+ url: The URL to fetch
+
+ Returns:
+ BeautifulSoup object if successful, None otherwise
+ """
+ try:
+ response = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT)
+ response.raise_for_status()
+ return BeautifulSoup(response.content, 'html.parser')
+ except requests.exceptions.RequestException as e:
+ logging.error(f"Error fetching {url}: {e}")
+ return None
+
+
+def fetch_phone_number(base_url: str, phone_page_path: str, state_slug: str = "") -> str | None:
+ """
+ Fetch a phone number from a newenglandoil phones.asp page.
+
+ Args:
+ base_url: Site base URL (e.g. "https://www.newenglandoil.com")
+ phone_page_path: Relative path like "phones.asp?zone=1&ID=10&a=MA1"
+ state_slug: State slug for URL path (e.g. "massachusetts")
+
+ Returns:
+ Phone number string or None if not found.
+ """
+ # Build full URL - phone_page_path may be relative
+ if phone_page_path.startswith('http'):
+ url = phone_page_path
+ elif state_slug:
+ url = f"{base_url}/{state_slug}/{phone_page_path}"
+ else:
+ url = f"{base_url}/{phone_page_path}"
+
+ time.sleep(PHONE_FETCH_DELAY)
+
+ soup = make_request(url)
+ if not soup:
+ return None
+
+ # Look for phone number patterns in the page text
+ page_text = soup.get_text(" ", strip=True)
+
+ # Common US phone formats: (508) 555-1234, 508-555-1234, 508.555.1234, 5085551234
+ # Captures:
+ # 1. Optional open paren
+ # 2. 3 digits (area code)
+ # 3. Optional close paren
+ # 4. Separator (space, dot, dash)
+ # 5. 3 digits (prefix)
+ # 6. Separator
+ # 7. 4 digits (line number)
+ phone_pattern = re.compile(
+ r'(?:\(?(\d{3})\)?[\s.\-]?(\d{3})[\s.\-]?(\d{4}))'
+ )
+
+ # Try to find a phone number near "Phone:" or "Tel:" first
+ keyword_pattern = re.compile(r'(?:Phone|Tel|Call|Contact).*?(\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4})', re.IGNORECASE)
+ keyword_match = keyword_pattern.search(page_text)
+
+ candidate = None
+ if keyword_match:
+ # If we found a number near a keyword, use that one.
+ candidate = keyword_match.group(1)
+ else:
+ # Otherwise, look for the first valid phone pattern
+ matches = phone_pattern.findall(page_text)
+ for m in matches:
+ # m is a tuple of groups: ('508', '555', '1234')
+ full_num = "".join(m)
+
+ # Simple heuristic to avoid dates like 2024, 2025 or common years if adjacent
+ # But the regex requires 3-3-4 structure so a simple "2024" won't match unless it's like 202-455-1234
+ # We can filter out obviously bad "numbers" if needed, e.g. 000-000-0000
+ if full_num.startswith('000'):
+ continue
+
+ candidate = f"{m[0]}-{m[1]}-{m[2]}"
+ break
+
+ if candidate:
+ digits = re.sub(r'\D', '', candidate)
+ if len(digits) == 10:
+ return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
+ return candidate
+
+ logging.debug(f"No phone number found on {url}")
+ return None
diff --git a/fuel_scraper/parsers.py b/newenglandoil/parsers.py
similarity index 54%
rename from fuel_scraper/parsers.py
rename to newenglandoil/parsers.py
index 0fb4ccc..bab74c0 100644
--- a/fuel_scraper/parsers.py
+++ b/newenglandoil/parsers.py
@@ -3,8 +3,11 @@ HTML parsing module for extracting oil price data from web pages.
"""
import logging
import re
+from urllib.parse import urlparse, parse_qs
from bs4 import BeautifulSoup
+from .config import STATE_ABBREV_MAP
+
def parse_zone_slug_to_int(zone_slug_str: str) -> int | None:
"""
@@ -54,31 +57,132 @@ def _find_price_table_columns(thead) -> dict | None:
return None
+def _smart_title(name: str) -> str:
+ """
+ Convert a company name to title case, preserving common abbreviations.
+
+ Handles: LLC, INC, CO, LP, HVAC, A1, etc.
+ """
+ # Common abbreviations that should stay uppercase
+ keep_upper = {"LLC", "INC", "LP", "HVAC", "II", "III", "IV", "USA", "CT", "MA", "NH", "ME", "RI", "VT"}
+ words = name.title().split()
+ result = []
+ for word in words:
+ if word.upper() in keep_upper:
+ result.append(word.upper())
+ else:
+ result.append(word)
+ return " ".join(result)
+
+
+def _extract_company_url(company_link) -> str | None:
+ """
+ Extract the actual company URL from a link.
+
+ Handles:
+ 1. Redirects: click.asp?x=http://example.com&... -> http://example.com
+ 2. Direct links: http://example.com -> http://example.com
+ """
+ if not company_link:
+ return None
+
+ href = company_link.get('href', '')
+ if not href:
+ return None
+
+ url_candidate = None
+
+ if 'click.asp' in href:
+ # Parse the x parameter which contains the actual URL
+ try:
+ parsed = urlparse(href)
+ params = parse_qs(parsed.query)
+ extracted = params.get('x', [None])[0]
+ if extracted:
+ url_candidate = extracted
+ except Exception:
+ pass
+ elif href.startswith(('http://', 'https://')):
+ # Direct link
+ url_candidate = href
+
+ # Validate the candidate URL
+ if url_candidate:
+ try:
+ # Basic validation
+ if not url_candidate.startswith(('http://', 'https://')):
+ return None
+
+ lower_url = url_candidate.lower()
+ # Filter out internal or competitor site loops
+ if 'newenglandoil.com' in lower_url or 'cheapestoil.com' in lower_url:
+ return None
+
+ return url_candidate
+ except Exception:
+ pass
+
+ return None
+
+
+def _extract_phone_link(cells: list) -> dict | None:
+ """
+ Extract the phone page link info from a row's phone cell.
+
+ Phone link format: phones.asp?zone=1&ID=10&a=MA1
+ Returns dict with {url, company_neo_id} or None.
+ """
+ for cell in cells:
+ link = cell.find('a', href=lambda h: h and 'phones.asp' in h)
+ if link:
+ href = link.get('href', '')
+ try:
+ parsed = urlparse(href)
+ params = parse_qs(parsed.query)
+ neo_id = params.get('ID', [None])[0]
+ return {
+ "phone_page_path": href,
+ "neo_id": neo_id,
+ }
+ except Exception:
+ pass
+ return None
+
+
def _parse_row(cells: list, column_indices: dict, state_name: str, zone: int) -> dict | None:
"""
Parse a single table row into a price record.
-
+
Args:
cells: List of td elements
column_indices: Dictionary mapping column names to indices
- state_name: State name string
+ state_name: State name string (lowercase key like "connecticut")
zone: Zone number
-
+
Returns:
Dictionary with parsed data or None if parsing fails
"""
max_required_index = max(column_indices.values())
-
+
if len(cells) <= max_required_index:
return None
-
+
# Extract company name (prefer link text if available)
company_cell = cells[column_indices['company']]
company_name = company_cell.get_text(strip=True)
company_link = company_cell.find('a')
if company_link:
company_name = company_link.get_text(strip=True)
-
+
+ # Apply title case normalization
+ company_name = _smart_title(company_name)
+
+ # Extract company URL from click.asp link
+ company_url = _extract_company_url(company_link)
+
+ # Extract phone page link info
+ phone_info = _extract_phone_link(cells)
+
# Extract and parse price
price_str = cells[column_indices['price']].get_text(strip=True)
price_float = None
@@ -90,20 +194,28 @@ def _parse_row(cells: list, column_indices: dict, state_name: str, zone: int) ->
logging.warning(f"Could not parse price: '{price_str}' for {company_name} in {state_name}/zone{zone}.")
except Exception as e:
logging.error(f"Unexpected error parsing price: '{price_str}' for {company_name}. Error: {e}")
-
+
# Extract date
date_posted_str = cells[column_indices['date']].get_text(strip=True)
-
+
+ # Convert state name to 2-letter abbreviation
+ state_abbr = STATE_ABBREV_MAP.get(state_name.lower())
+ if not state_abbr:
+ logging.warning(f"Unknown state key: {state_name}, using capitalized form")
+ state_abbr = state_name.capitalize()
+
return {
- "state": state_name.capitalize(),
+ "state": state_abbr,
"zone": zone,
"name": company_name,
"price": price_float,
"date": date_posted_str,
+ "url": company_url,
+ "phone_info": phone_info,
}
-def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: str) -> list[dict]:
+def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: str, site_name: str = "NewEnglandOil") -> list[dict]:
"""
Parse price tables from a BeautifulSoup page.
@@ -117,16 +229,16 @@ def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: s
"""
data_dicts = []
all_tables = soup.find_all('table')
- logging.info(f"Found {len(all_tables)} table(s) on page for {state_name_key} - {zone_slug_str}.")
+ logging.info(f"[{site_name}] Found {len(all_tables)} table(s) on page for {state_name_key} - {zone_slug_str}.")
if not all_tables:
- logging.warning(f"No HTML tables found at all for {state_name_key} - {zone_slug_str}.")
+ logging.warning(f"[{site_name}] No HTML tables found at all for {state_name_key} - {zone_slug_str}.")
return data_dicts
# Parse zone number from slug
zone_int = parse_zone_slug_to_int(zone_slug_str)
if zone_int is None:
- logging.error(f"Cannot parse zone number for {state_name_key} - {zone_slug_str}. Skipping.")
+ logging.error(f"[{site_name}] Cannot parse zone number for {state_name_key} - {zone_slug_str}. Skipping.")
return data_dicts
candidate_tables_found = 0
@@ -149,7 +261,7 @@ def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: s
# Parse table body
tbody = table.find('tbody')
if not tbody:
- logging.warning(f"Price table identified by headers has no tbody. Skipping. State: {state_name_key}, Zone: {zone_slug_str}")
+ logging.warning(f"[{site_name}] Price table identified by headers has no tbody. Skipping. State: {state_name_key}, Zone: {zone_slug_str}")
continue
rows = tbody.find_all('tr')
@@ -167,11 +279,11 @@ def parse_price_table(soup: BeautifulSoup, state_name_key: str, zone_slug_str: s
elif len(cells) > 0:
max_required = max(column_indices.values()) + 1
logging.warning(
- f"Skipping row {row_index+1} with insufficient cells ({len(cells)}, need {max_required}) "
+ f"[{site_name}] Skipping row {row_index+1} with insufficient cells ({len(cells)}, need {max_required}) "
f"in {state_name_key}/{zone_slug_str}"
)
if candidate_tables_found == 0:
- logging.warning(f"No tables matching expected price table structure found for {state_name_key} - {zone_slug_str}.")
+ logging.warning(f"[{site_name}] No tables matching expected price table structure found for {state_name_key} - {zone_slug_str}.")
return data_dicts
diff --git a/newenglandoil/scraper.py b/newenglandoil/scraper.py
new file mode 100644
index 0000000..2d72634
--- /dev/null
+++ b/newenglandoil/scraper.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python3
+"""
+Main scraper orchestrator module.
+Coordinates fetching, parsing, and storing oil price data.
+"""
+import logging
+import sys
+import os
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from sqlalchemy.orm import Session
+from database import SessionLocal, init_db
+import models
+
+from .config import SITES_CONFIG, ZONE_COUNTY_MAP, setup_logging, STATE_ABBREV_MAP
+from .http_client import make_request, fetch_phone_number
+from .parsers import parse_price_table, parse_zone_slug_to_int
+from .db_operations import upsert_oil_price
+
+
+def _build_county_lookup(db_session: Session) -> dict:
+ """
+ Build a lookup dict from (state_abbrev, county_name) -> county_id
+ by querying the county table.
+ """
+ counties = db_session.query(models.County).all()
+ lookup = {}
+ for c in counties:
+ if c.name:
+ lookup[(c.state, c.name.strip())] = c.id
+ logging.info(f"Built county lookup with {len(lookup)} entries")
+ return lookup
+
+
+def _resolve_county_id(state_key: str, zone_number: int, county_lookup: dict) -> int | None:
+ """
+ Resolve a county_id from ZONE_COUNTY_MAP and the county lookup.
+ Returns None if no mapping exists.
+ """
+ mapping = ZONE_COUNTY_MAP.get((state_key, zone_number))
+ if not mapping:
+ logging.debug(f"No zone-to-county mapping for ({state_key}, {zone_number})")
+ return None
+ state_abbrev, county_name = mapping
+ county_id = county_lookup.get((state_abbrev, county_name))
+ if county_id is None:
+ logging.warning(f"County not found in DB: ({state_abbrev}, {county_name}) for zone ({state_key}, {zone_number})")
+ return county_id
+
+
+def _scrape_zone(
+ db_session: Session,
+ site_name: str,
+ url_template: str,
+ base_url: str,
+ oil_type: int,
+ state_key: str,
+ zone_slug: str,
+ county_lookup: dict,
+ phone_cache: dict,
+ refresh_metadata: bool = False,
+) -> int:
+ """
+ Scrape a single zone and store records.
+
+ Args:
+ phone_cache: Dict mapping neo_id -> phone string. Shared across zones
+ to avoid re-fetching the same company's phone page.
+ refresh_metadata: If True, force re-fetch phone even if in cache (or not cached yet)
+ and overwrite DB values.
+
+ Returns:
+ Number of records processed
+ """
+ format_params = {
+ "base_url": base_url,
+ "state_slug": state_key,
+ "zone_slug": zone_slug,
+ "oil_type": oil_type
+ }
+ target_url = url_template.format(**format_params)
+
+ logging.info(f"[{site_name}] Scraping: {target_url} (State: {state_key}, Zone Slug: {zone_slug})")
+
+ soup = make_request(target_url)
+ if not soup:
+ logging.warning(f"[{site_name}] Failed to retrieve or parse {target_url}. Skipping.")
+ return 0
+
+ parsed_items = parse_price_table(soup, state_key, zone_slug, site_name)
+
+ if not parsed_items:
+ logging.info(f"[{site_name}] No data extracted from {target_url}")
+ return 0
+
+ # Resolve county_id for this zone
+ zone_number = parse_zone_slug_to_int(zone_slug)
+ county_id = None
+ if zone_number is not None:
+ county_id = _resolve_county_id(state_key, zone_number, county_lookup)
+
+ records_processed = 0
+ for item_dict in parsed_items:
+ item_dict["county_id"] = county_id
+ item_dict["site_name"] = site_name
+
+ # Fetch phone number if we have phone_info and haven't fetched this company yet
+ phone_info = item_dict.pop("phone_info", None)
+ if phone_info:
+ neo_id = phone_info.get("neo_id")
+
+ # If refresh_metadata is True, we want to fetch regardless of cache check initially
+ # to refresh the cache value if needed.
+ # Use phone_page_path as the cache key because neo_id is only unique per zone.
+ # phone_page_path typically looks like "phones.asp?zone=1&ID=10&a=MA1" effectively unique.
+ phone_key = phone_info.get("phone_page_path")
+
+ if phone_key:
+ should_fetch = False
+ if phone_key in phone_cache:
+ if refresh_metadata:
+ # Even if in cache, we might want to refetch?
+ # Or maybe just trust first fetch in this run.
+ # Let's say cache handles current runtime, refresh_metadata handles DB.
+ # BUT if we want to refresh, we should fetch it at least once this run.
+ item_dict["phone"] = phone_cache[phone_key]
+ else:
+ item_dict["phone"] = phone_cache[phone_key]
+ else:
+ should_fetch = True
+
+ if should_fetch:
+ # Only include state_slug in phone URL if the site uses it in its URL template
+ slug = state_key if "{state_slug}" in url_template else ""
+ phone = fetch_phone_number(base_url, phone_info["phone_page_path"], slug)
+ phone_cache[phone_key] = phone
+ item_dict["phone"] = phone
+ if phone:
+ logging.info(f"[{site_name}] Fetched phone for {item_dict['name']} (ID={neo_id}): {phone}")
+
+ if upsert_oil_price(db_session, item_dict, force_update_metadata=refresh_metadata):
+ records_processed += 1
+
+ logging.info(
+ f"[{site_name}] Processed {len(parsed_items)} records from {site_name} - {state_key}/{zone_slug} "
+ f"({records_processed} inserted/updated, county_id={county_id}) (Size: {len(parsed_items)})"
+ )
+
+ return len(parsed_items)
+
+
+def _scrape_site(db_session: Session, site_config: dict, county_lookup: dict, refresh_metadata: bool = False) -> int:
+ """
+ Scrape all zones for a single site.
+
+ Returns:
+ Total number of records processed
+ """
+ site_name = site_config["site_name"]
+ base_url = site_config["base_url"]
+ url_template = site_config["url_template"]
+ oil_type = site_config["oil_type"]
+
+ logging.info(f"--- Processing site: {site_name} ---")
+
+ total_records = 0
+ # Shared phone cache across all zones for this site to avoid redundant fetches
+ phone_cache = {}
+
+ for state_key, zone_slugs in site_config["locations"].items():
+ for zone_slug in zone_slugs:
+ records = _scrape_zone(
+ db_session=db_session,
+ site_name=site_name,
+ url_template=url_template,
+ base_url=base_url,
+ oil_type=oil_type,
+ state_key=state_key,
+ zone_slug=zone_slug,
+ county_lookup=county_lookup,
+ phone_cache=phone_cache,
+ refresh_metadata=refresh_metadata,
+ )
+ total_records += records
+
+ logging.info(f"Phone cache: fetched {len(phone_cache)} unique company phones for {site_name}")
+ return total_records
+
+
+def main(refresh_metadata: bool = False, target_state_abbr: str | None = None):
+ """
+ Main entry point for the oil price scraper.
+
+ Args:
+ refresh_metadata: If True, force re-fetch details.
+ target_state_abbr: If set (e.g. "MA"), only scrape that state.
+ """
+ setup_logging()
+
+ state_msg = f" (State: {target_state_abbr})" if target_state_abbr else ""
+ logging.info(f"Starting oil price scraper job.{state_msg} (Refresh Metadata: {refresh_metadata})")
+
+ # Initialize database
+ try:
+ init_db()
+ logging.info("Database initialized/checked successfully.")
+ except Exception as e:
+ logging.error(f"Failed to initialize database: {e}", exc_info=True)
+ return
+
+ db_session: Session = SessionLocal()
+ total_records = 0
+
+ try:
+ # Build county lookup at startup
+ county_lookup = _build_county_lookup(db_session)
+
+ # Build reverse map for state filtering
+ abbrev_to_state = {v: k for k, v in STATE_ABBREV_MAP.items()}
+ target_state_key = abbrev_to_state.get(target_state_abbr.upper()) if target_state_abbr else None
+
+ if target_state_abbr and not target_state_key:
+ logging.error(f"Unknown state abbreviation: {target_state_abbr}")
+ return
+
+ # Process each configured site
+ for site_config in SITES_CONFIG:
+ # If filtering by state, create a shallow copy of config with filtered locations
+ config_to_use = site_config
+ if target_state_key:
+ # Check if this site has the target state
+ if target_state_key in site_config["locations"]:
+ # Create filtered config
+ config_to_use = site_config.copy()
+ config_to_use["locations"] = {
+ target_state_key: site_config["locations"][target_state_key]
+ }
+ else:
+ logging.info(f"Skipping {site_config['site_name']} (does not cover {target_state_abbr})")
+ continue
+
+ records = _scrape_site(db_session, config_to_use, county_lookup, refresh_metadata=refresh_metadata)
+ total_records += records
+
+ # Commit all changes
+ if total_records > 0:
+ db_session.commit()
+ logging.info(f"Successfully committed records to the database.")
+ else:
+ logging.info("No new records were queued for database insertion in this run.")
+
+ except Exception as e:
+ logging.error(f"An error occurred during scraping or DB operation: {e}", exc_info=True)
+ db_session.rollback()
+ logging.info("Database transaction rolled back due to error.")
+ finally:
+ db_session.close()
+ logging.info("Database session closed.")
+
+ logging.info("Oil price scraper job finished.")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/oil_scraper.log b/oil_scraper.log
deleted file mode 100644
index 89d8c68..0000000
--- a/oil_scraper.log
+++ /dev/null
@@ -1,689 +0,0 @@
-2025-06-01 20:36:58,558 - INFO - [run.py:30] - Starting the fuel price scraper...
-2025-06-01 20:36:58,558 - INFO - [fuel_scraper.py:186] - Starting oil price scraper job.
-2025-06-01 20:36:58,576 - INFO - [fuel_scraper.py:189] - Database initialized/checked successfully.
-2025-06-01 20:36:58,576 - INFO - [fuel_scraper.py:204] - --- Processing site: NewEnglandOil ---
-2025-06-01 20:36:58,576 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone1.asp?type=0 (State: connecticut, Zone Slug: zone1)
-2025-06-01 20:36:58,790 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone1.
-2025-06-01 20:36:58,799 - INFO - [fuel_scraper.py:257] - Queued 5 records from NewEnglandOil - connecticut/zone1 for DB insertion.
-2025-06-01 20:36:58,799 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone2.asp?type=0 (State: connecticut, Zone Slug: zone2)
-2025-06-01 20:36:59,009 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone2.
-2025-06-01 20:36:59,018 - INFO - [fuel_scraper.py:257] - Queued 8 records from NewEnglandOil - connecticut/zone2 for DB insertion.
-2025-06-01 20:36:59,018 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone3.asp?type=0 (State: connecticut, Zone Slug: zone3)
-2025-06-01 20:36:59,253 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone3.
-2025-06-01 20:36:59,255 - INFO - [fuel_scraper.py:255] - Added new record for RESIDENTIAL FUEL SYSTEMS in Connecticut zone 3
-2025-06-01 20:36:59,256 - INFO - [fuel_scraper.py:255] - Added new record for CORPORAL HEATING, LLC in Connecticut zone 3
-2025-06-01 20:36:59,257 - INFO - [fuel_scraper.py:255] - Added new record for FORBES FUEL FUEL in Connecticut zone 3
-2025-06-01 20:36:59,258 - INFO - [fuel_scraper.py:255] - Added new record for CENTS-ABLE Oil in Connecticut zone 3
-2025-06-01 20:36:59,259 - INFO - [fuel_scraper.py:255] - Added new record for PURPLEFUELS, LLC in Connecticut zone 3
-2025-06-01 20:36:59,260 - INFO - [fuel_scraper.py:255] - Added new record for BLUE FLAME OIL in Connecticut zone 3
-2025-06-01 20:36:59,262 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN FUEL in Connecticut zone 3
-2025-06-01 20:36:59,263 - INFO - [fuel_scraper.py:255] - Added new record for POLAR ENERGY in Connecticut zone 3
-2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:255] - Added new record for HI-HO PETROLEUM in Connecticut zone 3
-2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:255] - Added new record for JOES FUEL CO in Connecticut zone 3
-2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:257] - Queued 10 records from NewEnglandOil - connecticut/zone3 for DB insertion.
-2025-06-01 20:36:59,264 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone4.asp?type=0 (State: connecticut, Zone Slug: zone4)
-2025-06-01 20:36:59,477 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone4.
-2025-06-01 20:36:59,478 - INFO - [fuel_scraper.py:255] - Added new record for CORPORAL HEATING, LLC in Connecticut zone 4
-2025-06-01 20:36:59,479 - INFO - [fuel_scraper.py:255] - Added new record for PURPLEFUELS, LLC in Connecticut zone 4
-2025-06-01 20:36:59,481 - INFO - [fuel_scraper.py:255] - Added new record for WESTBROOK OIL in Connecticut zone 4
-2025-06-01 20:36:59,481 - INFO - [fuel_scraper.py:255] - Added new record for J J SULLIVAN INC in Connecticut zone 4
-2025-06-01 20:36:59,483 - INFO - [fuel_scraper.py:255] - Added new record for BRAZOS OIL in Connecticut zone 4
-2025-06-01 20:36:59,484 - INFO - [fuel_scraper.py:255] - Added new record for MADISON OIL CO in Connecticut zone 4
-2025-06-01 20:36:59,484 - INFO - [fuel_scraper.py:257] - Queued 6 records from NewEnglandOil - connecticut/zone4 for DB insertion.
-2025-06-01 20:36:59,484 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone5.asp?type=0 (State: connecticut, Zone Slug: zone5)
-2025-06-01 20:36:59,701 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone5.
-2025-06-01 20:36:59,703 - INFO - [fuel_scraper.py:255] - Added new record for SIMPLY HEATING OIL in Connecticut zone 5
-2025-06-01 20:36:59,704 - INFO - [fuel_scraper.py:255] - Added new record for CORPORAL HEATING, LLC in Connecticut zone 5
-2025-06-01 20:36:59,705 - INFO - [fuel_scraper.py:255] - Added new record for RESIDENTIAL FUEL SYSTEMS in Connecticut zone 5
-2025-06-01 20:36:59,706 - INFO - [fuel_scraper.py:255] - Added new record for OMNI ENERGY in Connecticut zone 5
-2025-06-01 20:36:59,707 - INFO - [fuel_scraper.py:255] - Added new record for QUALITY OIL CO LLC in Connecticut zone 5
-2025-06-01 20:36:59,708 - INFO - [fuel_scraper.py:255] - Added new record for FIRST FUEL OIL in Connecticut zone 5
-2025-06-01 20:36:59,709 - INFO - [fuel_scraper.py:255] - Added new record for VADNEY FUEL CO in Connecticut zone 5
-2025-06-01 20:36:59,710 - INFO - [fuel_scraper.py:255] - Added new record for WESSON ENERGY INC in Connecticut zone 5
-2025-06-01 20:36:59,710 - INFO - [fuel_scraper.py:255] - Added new record for MANN FUEL OIL in Connecticut zone 5
-2025-06-01 20:36:59,711 - INFO - [fuel_scraper.py:255] - Added new record for DAVIS OIL CO in Connecticut zone 5
-2025-06-01 20:36:59,712 - INFO - [fuel_scraper.py:255] - Added new record for MIMS OIL LLC in Connecticut zone 5
-2025-06-01 20:36:59,713 - INFO - [fuel_scraper.py:255] - Added new record for MCKINLEY OIL LLC in Connecticut zone 5
-2025-06-01 20:36:59,713 - INFO - [fuel_scraper.py:257] - Queued 12 records from NewEnglandOil - connecticut/zone5 for DB insertion.
-2025-06-01 20:36:59,713 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone6.asp?type=0 (State: connecticut, Zone Slug: zone6)
-2025-06-01 20:36:59,915 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone6.
-2025-06-01 20:36:59,917 - INFO - [fuel_scraper.py:255] - Added new record for COST LESS OIL in Connecticut zone 6
-2025-06-01 20:36:59,918 - INFO - [fuel_scraper.py:255] - Added new record for BROTHERS OIL CO in Connecticut zone 6
-2025-06-01 20:36:59,919 - INFO - [fuel_scraper.py:255] - Added new record for SIMPLY HEATING OIL in Connecticut zone 6
-2025-06-01 20:36:59,920 - INFO - [fuel_scraper.py:255] - Added new record for FERGUSON OIL in Connecticut zone 6
-2025-06-01 20:36:59,921 - INFO - [fuel_scraper.py:255] - Added new record for TOWN OIL CO in Connecticut zone 6
-2025-06-01 20:36:59,923 - INFO - [fuel_scraper.py:255] - Added new record for OMNI ENERGY in Connecticut zone 6
-2025-06-01 20:36:59,924 - INFO - [fuel_scraper.py:255] - Added new record for SPRINGERS OIL SERVICE in Connecticut zone 6
-2025-06-01 20:36:59,924 - INFO - [fuel_scraper.py:257] - Queued 7 records from NewEnglandOil - connecticut/zone6 for DB insertion.
-2025-06-01 20:36:59,924 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone7.asp?type=0 (State: connecticut, Zone Slug: zone7)
-2025-06-01 20:37:00,151 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone7.
-2025-06-01 20:37:00,152 - INFO - [fuel_scraper.py:255] - Added new record for OMNI ENERGY in Connecticut zone 7
-2025-06-01 20:37:00,153 - INFO - [fuel_scraper.py:255] - Added new record for DIME OIL COMPANY in Connecticut zone 7
-2025-06-01 20:37:00,155 - INFO - [fuel_scraper.py:255] - Added new record for 24 7 OIL in Connecticut zone 7
-2025-06-01 20:37:00,156 - INFO - [fuel_scraper.py:255] - Added new record for PRICERITE OIL in Connecticut zone 7
-2025-06-01 20:37:00,157 - INFO - [fuel_scraper.py:255] - Added new record for PLYMOUTH OIL SERVICES in Connecticut zone 7
-2025-06-01 20:37:00,158 - INFO - [fuel_scraper.py:255] - Added new record for THOMASTON OIL & PROPANE in Connecticut zone 7
-2025-06-01 20:37:00,159 - INFO - [fuel_scraper.py:255] - Added new record for CT OIL DIRECT in Connecticut zone 7
-2025-06-01 20:37:00,160 - INFO - [fuel_scraper.py:255] - Added new record for ANYTIME OIL in Connecticut zone 7
-2025-06-01 20:37:00,160 - INFO - [fuel_scraper.py:255] - Added new record for THURSTON ENERGY in Connecticut zone 7
-2025-06-01 20:37:00,161 - INFO - [fuel_scraper.py:255] - Added new record for JENNINGS OIL CO in Connecticut zone 7
-2025-06-01 20:37:00,161 - INFO - [fuel_scraper.py:257] - Queued 10 records from NewEnglandOil - connecticut/zone7 for DB insertion.
-2025-06-01 20:37:00,161 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone8.asp?type=0 (State: connecticut, Zone Slug: zone8)
-2025-06-01 20:37:00,384 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone8.
-2025-06-01 20:37:00,385 - INFO - [fuel_scraper.py:255] - Added new record for FIORILLA HEATING OIL in Connecticut zone 8
-2025-06-01 20:37:00,386 - INFO - [fuel_scraper.py:255] - Added new record for PARK CITY FUEL in Connecticut zone 8
-2025-06-01 20:37:00,387 - INFO - [fuel_scraper.py:255] - Added new record for WESTMORE OIL EXPRESS in Connecticut zone 8
-2025-06-01 20:37:00,388 - INFO - [fuel_scraper.py:255] - Added new record for COASTAL ENERGY CT in Connecticut zone 8
-2025-06-01 20:37:00,389 - INFO - [fuel_scraper.py:255] - Added new record for PIRO PETROLEUM in Connecticut zone 8
-2025-06-01 20:37:00,389 - INFO - [fuel_scraper.py:257] - Queued 5 records from NewEnglandOil - connecticut/zone8 for DB insertion.
-2025-06-01 20:37:00,389 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone9.asp?type=0 (State: connecticut, Zone Slug: zone9)
-2025-06-01 20:37:00,627 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone9.
-2025-06-01 20:37:00,629 - INFO - [fuel_scraper.py:255] - Added new record for CASHWAY OIL in Connecticut zone 9
-2025-06-01 20:37:00,630 - INFO - [fuel_scraper.py:255] - Added new record for CT VALLEY OIL in Connecticut zone 9
-2025-06-01 20:37:00,631 - INFO - [fuel_scraper.py:255] - Added new record for E-Z OIL CO in Connecticut zone 9
-2025-06-01 20:37:00,632 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN FUEL OIL INC in Connecticut zone 9
-2025-06-01 20:37:00,633 - INFO - [fuel_scraper.py:255] - Added new record for A1 Oil in Connecticut zone 9
-2025-06-01 20:37:00,634 - INFO - [fuel_scraper.py:255] - Added new record for FERGUSON OIL in Connecticut zone 9
-2025-06-01 20:37:00,634 - INFO - [fuel_scraper.py:257] - Queued 6 records from NewEnglandOil - connecticut/zone9 for DB insertion.
-2025-06-01 20:37:00,635 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone10.asp?type=0 (State: connecticut, Zone Slug: zone10)
-2025-06-01 20:37:00,876 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for connecticut - zone10.
-2025-06-01 20:37:00,878 - INFO - [fuel_scraper.py:255] - Added new record for ENERGY DIRECT LLC in Connecticut zone 10
-2025-06-01 20:37:00,879 - INFO - [fuel_scraper.py:255] - Added new record for PLAINVILLE OIL CO in Connecticut zone 10
-2025-06-01 20:37:00,881 - INFO - [fuel_scraper.py:255] - Added new record for ROBERTS DISCOUNT FUEL CO in Connecticut zone 10
-2025-06-01 20:37:00,882 - INFO - [fuel_scraper.py:255] - Added new record for TOWER ENERGY in Connecticut zone 10
-2025-06-01 20:37:00,882 - INFO - [fuel_scraper.py:257] - Queued 4 records from NewEnglandOil - connecticut/zone10 for DB insertion.
-2025-06-01 20:37:00,882 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone11.asp?type=0 (State: connecticut, Zone Slug: zone11)
-2025-06-01 20:37:01,041 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone11.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone11.asp?type=0
-2025-06-01 20:37:01,041 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone11.asp?type=0. Skipping.
-2025-06-01 20:37:01,041 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone12.asp?type=0 (State: connecticut, Zone Slug: zone12)
-2025-06-01 20:37:01,220 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone12.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone12.asp?type=0
-2025-06-01 20:37:01,221 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone12.asp?type=0. Skipping.
-2025-06-01 20:37:01,221 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone13.asp?type=0 (State: connecticut, Zone Slug: zone13)
-2025-06-01 20:37:01,382 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone13.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone13.asp?type=0
-2025-06-01 20:37:01,382 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone13.asp?type=0. Skipping.
-2025-06-01 20:37:01,382 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone14.asp?type=0 (State: connecticut, Zone Slug: zone14)
-2025-06-01 20:37:01,545 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone14.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone14.asp?type=0
-2025-06-01 20:37:01,545 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone14.asp?type=0. Skipping.
-2025-06-01 20:37:01,545 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone15.asp?type=0 (State: connecticut, Zone Slug: zone15)
-2025-06-01 20:37:01,705 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone15.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone15.asp?type=0
-2025-06-01 20:37:01,705 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone15.asp?type=0. Skipping.
-2025-06-01 20:37:01,705 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/connecticut/zone16.asp?type=0 (State: connecticut, Zone Slug: zone16)
-2025-06-01 20:37:01,833 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/connecticut/zone16.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/connecticut/zone16.asp?type=0
-2025-06-01 20:37:01,834 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/connecticut/zone16.asp?type=0. Skipping.
-2025-06-01 20:37:01,834 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone1.asp?type=0 (State: massachusetts, Zone Slug: zone1)
-2025-06-01 20:37:02,148 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone1.
-2025-06-01 20:37:02,151 - INFO - [fuel_scraper.py:255] - Added new record for OILMAN INC. in Massachusetts zone 1
-2025-06-01 20:37:02,152 - INFO - [fuel_scraper.py:255] - Added new record for GUARANTEE FUEL in Massachusetts zone 1
-2025-06-01 20:37:02,152 - INFO - [fuel_scraper.py:255] - Added new record for SWEET HEAT in Massachusetts zone 1
-2025-06-01 20:37:02,153 - INFO - [fuel_scraper.py:255] - Added new record for BRIDGEWATER FUEL in Massachusetts zone 1
-2025-06-01 20:37:02,154 - INFO - [fuel_scraper.py:255] - Added new record for LAPUMA FUEL in Massachusetts zone 1
-2025-06-01 20:37:02,154 - INFO - [fuel_scraper.py:255] - Added new record for CAREYS DISCOUNT OIL in Massachusetts zone 1
-2025-06-01 20:37:02,155 - INFO - [fuel_scraper.py:255] - Added new record for FOSSIL FUEL ENTERPRISES in Massachusetts zone 1
-2025-06-01 20:37:02,156 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 1
-2025-06-01 20:37:02,157 - INFO - [fuel_scraper.py:255] - Added new record for G&G FUEL INC in Massachusetts zone 1
-2025-06-01 20:37:02,158 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 1
-2025-06-01 20:37:02,158 - INFO - [fuel_scraper.py:255] - Added new record for OHARA FUEL in Massachusetts zone 1
-2025-06-01 20:37:02,159 - INFO - [fuel_scraper.py:255] - Added new record for HIGHWAY FUEL in Massachusetts zone 1
-2025-06-01 20:37:02,160 - INFO - [fuel_scraper.py:255] - Added new record for BURKE OIL in Massachusetts zone 1
-2025-06-01 20:37:02,160 - INFO - [fuel_scraper.py:257] - Queued 13 records from NewEnglandOil - massachusetts/zone1 for DB insertion.
-2025-06-01 20:37:02,160 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone2.asp?type=0 (State: massachusetts, Zone Slug: zone2)
-2025-06-01 20:37:02,461 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone2.
-2025-06-01 20:37:02,463 - INFO - [fuel_scraper.py:255] - Added new record for BOBS OIL COMPANY in Massachusetts zone 2
-2025-06-01 20:37:02,464 - INFO - [fuel_scraper.py:255] - Added new record for FIREMANS FUEL in Massachusetts zone 2
-2025-06-01 20:37:02,465 - INFO - [fuel_scraper.py:255] - Added new record for NARDONE OIL in Massachusetts zone 2
-2025-06-01 20:37:02,466 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 2
-2025-06-01 20:37:02,467 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Massachusetts zone 2
-2025-06-01 20:37:02,468 - INFO - [fuel_scraper.py:255] - Added new record for ARLINGTON ENERGY in Massachusetts zone 2
-2025-06-01 20:37:02,469 - INFO - [fuel_scraper.py:255] - Added new record for NORTHEAST OIL DELIVERY in Massachusetts zone 2
-2025-06-01 20:37:02,469 - INFO - [fuel_scraper.py:255] - Added new record for SAVINO & SONS OIL in Massachusetts zone 2
-2025-06-01 20:37:02,470 - INFO - [fuel_scraper.py:255] - Added new record for GO GREEN OIL in Massachusetts zone 2
-2025-06-01 20:37:02,471 - INFO - [fuel_scraper.py:255] - Added new record for JOHNSON FUEL CO in Massachusetts zone 2
-2025-06-01 20:37:02,472 - INFO - [fuel_scraper.py:255] - Added new record for S&D OIL CO in Massachusetts zone 2
-2025-06-01 20:37:02,473 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 2
-2025-06-01 20:37:02,474 - INFO - [fuel_scraper.py:255] - Added new record for MARCHETTI COMMERCIAL FUELS INC. in Massachusetts zone 2
-2025-06-01 20:37:02,475 - INFO - [fuel_scraper.py:255] - Added new record for KATIES DISCOUNT OIL in Massachusetts zone 2
-2025-06-01 20:37:02,475 - INFO - [fuel_scraper.py:257] - Queued 14 records from NewEnglandOil - massachusetts/zone2 for DB insertion.
-2025-06-01 20:37:02,475 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone3.asp?type=0 (State: massachusetts, Zone Slug: zone3)
-2025-06-01 20:37:02,778 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone3.
-2025-06-01 20:37:02,781 - INFO - [fuel_scraper.py:255] - Added new record for ARROW FUEL in Massachusetts zone 3
-2025-06-01 20:37:02,782 - INFO - [fuel_scraper.py:255] - Added new record for OILMAN INC. in Massachusetts zone 3
-2025-06-01 20:37:02,783 - INFO - [fuel_scraper.py:255] - Added new record for NICCOLI OIL & ENERGY in Massachusetts zone 3
-2025-06-01 20:37:02,784 - INFO - [fuel_scraper.py:255] - Added new record for LAW FUEL AND ENERGY in Massachusetts zone 3
-2025-06-01 20:37:02,785 - INFO - [fuel_scraper.py:255] - Added new record for BLACKSTONE VALLEY OIL in Massachusetts zone 3
-2025-06-01 20:37:02,786 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 3
-2025-06-01 20:37:02,787 - INFO - [fuel_scraper.py:255] - Added new record for OIL ONLY in Massachusetts zone 3
-2025-06-01 20:37:02,788 - INFO - [fuel_scraper.py:255] - Added new record for GUARANTEE FUEL in Massachusetts zone 3
-2025-06-01 20:37:02,789 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 3
-2025-06-01 20:37:02,790 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Massachusetts zone 3
-2025-06-01 20:37:02,791 - INFO - [fuel_scraper.py:255] - Added new record for GEORGES OIL CO in Massachusetts zone 3
-2025-06-01 20:37:02,792 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Massachusetts zone 3
-2025-06-01 20:37:02,793 - INFO - [fuel_scraper.py:255] - Added new record for PLAINVILLE OIL in Massachusetts zone 3
-2025-06-01 20:37:02,794 - INFO - [fuel_scraper.py:255] - Added new record for 4 SEASONS TRANSPORT LLC in Massachusetts zone 3
-2025-06-01 20:37:02,795 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Massachusetts zone 3
-2025-06-01 20:37:02,795 - INFO - [fuel_scraper.py:257] - Queued 15 records from NewEnglandOil - massachusetts/zone3 for DB insertion.
-2025-06-01 20:37:02,795 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone4.asp?type=0 (State: massachusetts, Zone Slug: zone4)
-2025-06-01 20:37:03,106 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone4.
-2025-06-01 20:37:03,109 - INFO - [fuel_scraper.py:255] - Added new record for NICCOLI OIL & ENERGY in Massachusetts zone 4
-2025-06-01 20:37:03,110 - INFO - [fuel_scraper.py:255] - Added new record for BRIDGEWATER FUEL in Massachusetts zone 4
-2025-06-01 20:37:03,111 - INFO - [fuel_scraper.py:255] - Added new record for KEN DUVAL OIL in Massachusetts zone 4
-2025-06-01 20:37:03,112 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN FUEL OIL CO in Massachusetts zone 4
-2025-06-01 20:37:03,113 - INFO - [fuel_scraper.py:255] - Added new record for CAREYS DISCOUNT OIL in Massachusetts zone 4
-2025-06-01 20:37:03,114 - INFO - [fuel_scraper.py:255] - Added new record for CURTIN BROS OIL in Massachusetts zone 4
-2025-06-01 20:37:03,115 - INFO - [fuel_scraper.py:255] - Added new record for SWEET HEAT in Massachusetts zone 4
-2025-06-01 20:37:03,116 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 4
-2025-06-01 20:37:03,117 - INFO - [fuel_scraper.py:255] - Added new record for GUARANTEE FUEL in Massachusetts zone 4
-2025-06-01 20:37:03,118 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT DISCOUNT FUEL in Massachusetts zone 4
-2025-06-01 20:37:03,119 - INFO - [fuel_scraper.py:255] - Added new record for C.O.D. PETRO in Massachusetts zone 4
-2025-06-01 20:37:03,120 - INFO - [fuel_scraper.py:255] - Added new record for YANKEE FUEL in Massachusetts zone 4
-2025-06-01 20:37:03,121 - INFO - [fuel_scraper.py:255] - Added new record for FORNI BROTHERS OIL CO in Massachusetts zone 4
-2025-06-01 20:37:03,122 - INFO - [fuel_scraper.py:255] - Added new record for HIGHWAY FUEL in Massachusetts zone 4
-2025-06-01 20:37:03,123 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 4
-2025-06-01 20:37:03,124 - INFO - [fuel_scraper.py:255] - Added new record for BURKE OIL in Massachusetts zone 4
-2025-06-01 20:37:03,125 - INFO - [fuel_scraper.py:255] - Added new record for OHARA FUEL in Massachusetts zone 4
-2025-06-01 20:37:03,126 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 4
-2025-06-01 20:37:03,127 - INFO - [fuel_scraper.py:255] - Added new record for CESARS OIL in Massachusetts zone 4
-2025-06-01 20:37:03,128 - INFO - [fuel_scraper.py:255] - Added new record for G&G FUEL INC in Massachusetts zone 4
-2025-06-01 20:37:03,129 - INFO - [fuel_scraper.py:255] - Added new record for RAYNARD BROTHERS OIL in Massachusetts zone 4
-2025-06-01 20:37:03,129 - INFO - [fuel_scraper.py:257] - Queued 21 records from NewEnglandOil - massachusetts/zone4 for DB insertion.
-2025-06-01 20:37:03,129 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone5.asp?type=0 (State: massachusetts, Zone Slug: zone5)
-2025-06-01 20:37:03,423 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone5.
-2025-06-01 20:37:03,425 - INFO - [fuel_scraper.py:255] - Added new record for FIREMANS FUEL in Massachusetts zone 5
-2025-06-01 20:37:03,426 - INFO - [fuel_scraper.py:255] - Added new record for LAW FUEL AND ENERGY in Massachusetts zone 5
-2025-06-01 20:37:03,428 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 5
-2025-06-01 20:37:03,428 - INFO - [fuel_scraper.py:255] - Added new record for SAVINO & SONS OIL in Massachusetts zone 5
-2025-06-01 20:37:03,429 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 5
-2025-06-01 20:37:03,430 - INFO - [fuel_scraper.py:255] - Added new record for ARLINGTON ENERGY in Massachusetts zone 5
-2025-06-01 20:37:03,431 - INFO - [fuel_scraper.py:255] - Added new record for JOHNSON FUEL CO in Massachusetts zone 5
-2025-06-01 20:37:03,432 - INFO - [fuel_scraper.py:255] - Added new record for S&D OIL CO in Massachusetts zone 5
-2025-06-01 20:37:03,433 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 5
-2025-06-01 20:37:03,434 - INFO - [fuel_scraper.py:255] - Added new record for 4 SEASONS TRANSPORT LLC in Massachusetts zone 5
-2025-06-01 20:37:03,434 - INFO - [fuel_scraper.py:257] - Queued 10 records from NewEnglandOil - massachusetts/zone5 for DB insertion.
-2025-06-01 20:37:03,434 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone6.asp?type=0 (State: massachusetts, Zone Slug: zone6)
-2025-06-01 20:37:03,700 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone6.
-2025-06-01 20:37:03,703 - INFO - [fuel_scraper.py:255] - Added new record for ARROW FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,704 - INFO - [fuel_scraper.py:255] - Added new record for PRICERITE OIL INC in Massachusetts zone 6
-2025-06-01 20:37:03,705 - INFO - [fuel_scraper.py:255] - Added new record for NICCOLI OIL & ENERGY in Massachusetts zone 6
-2025-06-01 20:37:03,706 - INFO - [fuel_scraper.py:255] - Added new record for LUZO FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,707 - INFO - [fuel_scraper.py:255] - Added new record for BRODEUR & SONS INC in Massachusetts zone 6
-2025-06-01 20:37:03,708 - INFO - [fuel_scraper.py:255] - Added new record for FUEL MAN LLC in Massachusetts zone 6
-2025-06-01 20:37:03,709 - INFO - [fuel_scraper.py:255] - Added new record for AFFORDABLE FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,710 - INFO - [fuel_scraper.py:255] - Added new record for PAPAS FUELS in Massachusetts zone 6
-2025-06-01 20:37:03,710 - INFO - [fuel_scraper.py:255] - Added new record for MIAMI HEAT DISCOUNT FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,711 - INFO - [fuel_scraper.py:255] - Added new record for SAV-ON OIL in Massachusetts zone 6
-2025-06-01 20:37:03,712 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 6
-2025-06-01 20:37:03,713 - INFO - [fuel_scraper.py:255] - Added new record for NITE OIL CO., INC. in Massachusetts zone 6
-2025-06-01 20:37:03,714 - INFO - [fuel_scraper.py:255] - Added new record for GEORGES OIL in Massachusetts zone 6
-2025-06-01 20:37:03,715 - INFO - [fuel_scraper.py:255] - Added new record for CHARLIES OIL COMPANY in Massachusetts zone 6
-2025-06-01 20:37:03,716 - INFO - [fuel_scraper.py:255] - Added new record for OIL ONLY in Massachusetts zone 6
-2025-06-01 20:37:03,717 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Massachusetts zone 6
-2025-06-01 20:37:03,718 - INFO - [fuel_scraper.py:255] - Added new record for GUARD OIL in Massachusetts zone 6
-2025-06-01 20:37:03,719 - INFO - [fuel_scraper.py:255] - Added new record for BUTCHIE OIL in Massachusetts zone 6
-2025-06-01 20:37:03,719 - INFO - [fuel_scraper.py:255] - Added new record for PAQUETTES FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,720 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Massachusetts zone 6
-2025-06-01 20:37:03,721 - INFO - [fuel_scraper.py:255] - Added new record for T & M FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,722 - INFO - [fuel_scraper.py:255] - Added new record for ELITE OIL HEATING & AIR CONDITIONING in Massachusetts zone 6
-2025-06-01 20:37:03,723 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 6
-2025-06-01 20:37:03,724 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Massachusetts zone 6
-2025-06-01 20:37:03,724 - INFO - [fuel_scraper.py:257] - Queued 24 records from NewEnglandOil - massachusetts/zone6 for DB insertion.
-2025-06-01 20:37:03,724 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone7.asp?type=0 (State: massachusetts, Zone Slug: zone7)
-2025-06-01 20:37:04,018 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone7.
-2025-06-01 20:37:04,020 - INFO - [fuel_scraper.py:255] - Added new record for RED WING OIL CO in Massachusetts zone 7
-2025-06-01 20:37:04,021 - INFO - [fuel_scraper.py:255] - Added new record for MID CAPE DISCOUNT OIL in Massachusetts zone 7
-2025-06-01 20:37:04,022 - INFO - [fuel_scraper.py:255] - Added new record for CAPE DISCOUNT FUEL in Massachusetts zone 7
-2025-06-01 20:37:04,023 - INFO - [fuel_scraper.py:255] - Added new record for COD DISCOUNT FUEL in Massachusetts zone 7
-2025-06-01 20:37:04,024 - INFO - [fuel_scraper.py:255] - Added new record for PILGRIM DISCOUNT OIL in Massachusetts zone 7
-2025-06-01 20:37:04,025 - INFO - [fuel_scraper.py:255] - Added new record for EASTERN PETROLEUM in Massachusetts zone 7
-2025-06-01 20:37:04,026 - INFO - [fuel_scraper.py:255] - Added new record for PAPAS FUELS in Massachusetts zone 7
-2025-06-01 20:37:04,027 - INFO - [fuel_scraper.py:255] - Added new record for MARKET PRICE OIL in Massachusetts zone 7
-2025-06-01 20:37:04,028 - INFO - [fuel_scraper.py:255] - Added new record for CAPE COD BIOFUELS in Massachusetts zone 7
-2025-06-01 20:37:04,029 - INFO - [fuel_scraper.py:255] - Added new record for THE OIL PEDDLER in Massachusetts zone 7
-2025-06-01 20:37:04,030 - INFO - [fuel_scraper.py:255] - Added new record for GUARD OIL in Massachusetts zone 7
-2025-06-01 20:37:04,031 - INFO - [fuel_scraper.py:255] - Added new record for YOUNGMANS OIL in Massachusetts zone 7
-2025-06-01 20:37:04,031 - INFO - [fuel_scraper.py:257] - Queued 12 records from NewEnglandOil - massachusetts/zone7 for DB insertion.
-2025-06-01 20:37:04,031 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone8.asp?type=0 (State: massachusetts, Zone Slug: zone8)
-2025-06-01 20:37:04,309 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone8.
-2025-06-01 20:37:04,312 - INFO - [fuel_scraper.py:255] - Added new record for NARDONE OIL in Massachusetts zone 8
-2025-06-01 20:37:04,313 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Massachusetts zone 8
-2025-06-01 20:37:04,314 - INFO - [fuel_scraper.py:255] - Added new record for S&D OIL CO in Massachusetts zone 8
-2025-06-01 20:37:04,315 - INFO - [fuel_scraper.py:255] - Added new record for COUNTY ENERGY in Massachusetts zone 8
-2025-06-01 20:37:04,316 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 8
-2025-06-01 20:37:04,317 - INFO - [fuel_scraper.py:255] - Added new record for MAHONEY OIL CO in Massachusetts zone 8
-2025-06-01 20:37:04,318 - INFO - [fuel_scraper.py:255] - Added new record for JOHNSON FUEL CO in Massachusetts zone 8
-2025-06-01 20:37:04,319 - INFO - [fuel_scraper.py:255] - Added new record for COLONIAL OIL CO in Massachusetts zone 8
-2025-06-01 20:37:04,320 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 8
-2025-06-01 20:37:04,321 - INFO - [fuel_scraper.py:255] - Added new record for GO GREEN OIL in Massachusetts zone 8
-2025-06-01 20:37:04,322 - INFO - [fuel_scraper.py:255] - Added new record for J A HEALY & SONS OIL CO in Massachusetts zone 8
-2025-06-01 20:37:04,323 - INFO - [fuel_scraper.py:255] - Added new record for BOBS OIL COMPANY in Massachusetts zone 8
-2025-06-01 20:37:04,324 - INFO - [fuel_scraper.py:255] - Added new record for KATIES DISCOUNT OIL in Massachusetts zone 8
-2025-06-01 20:37:04,324 - INFO - [fuel_scraper.py:257] - Queued 13 records from NewEnglandOil - massachusetts/zone8 for DB insertion.
-2025-06-01 20:37:04,324 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone9.asp?type=0 (State: massachusetts, Zone Slug: zone9)
-2025-06-01 20:37:04,653 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone9.
-2025-06-01 20:37:04,655 - INFO - [fuel_scraper.py:255] - Added new record for EATON OIL CO. in Massachusetts zone 9
-2025-06-01 20:37:04,656 - INFO - [fuel_scraper.py:255] - Added new record for DIRECT FUEL in Massachusetts zone 9
-2025-06-01 20:37:04,657 - INFO - [fuel_scraper.py:255] - Added new record for FIREMANS FUEL in Massachusetts zone 9
-2025-06-01 20:37:04,659 - INFO - [fuel_scraper.py:255] - Added new record for YNOT OIL in Massachusetts zone 9
-2025-06-01 20:37:04,660 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Massachusetts zone 9
-2025-06-01 20:37:04,661 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Massachusetts zone 9
-2025-06-01 20:37:04,662 - INFO - [fuel_scraper.py:255] - Added new record for SOLS FUEL CO in Massachusetts zone 9
-2025-06-01 20:37:04,663 - INFO - [fuel_scraper.py:255] - Added new record for NORTHEAST OIL DELIVERY in Massachusetts zone 9
-2025-06-01 20:37:04,664 - INFO - [fuel_scraper.py:255] - Added new record for GO GREEN OIL in Massachusetts zone 9
-2025-06-01 20:37:04,665 - INFO - [fuel_scraper.py:255] - Added new record for LEIGHTONS HEATING & COOLING INC. in Massachusetts zone 9
-2025-06-01 20:37:04,666 - INFO - [fuel_scraper.py:255] - Added new record for ATLANTIC OIL in Massachusetts zone 9
-2025-06-01 20:37:04,667 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Massachusetts zone 9
-2025-06-01 20:37:04,668 - INFO - [fuel_scraper.py:255] - Added new record for EDGEMONT OIL LLC in Massachusetts zone 9
-2025-06-01 20:37:04,669 - INFO - [fuel_scraper.py:255] - Added new record for SENIOR CITIZENS HEATING OIL in Massachusetts zone 9
-2025-06-01 20:37:04,669 - INFO - [fuel_scraper.py:255] - Added new record for SPARTAN OIL in Massachusetts zone 9
-2025-06-01 20:37:04,670 - INFO - [fuel_scraper.py:255] - Added new record for MARCHETTI COMMERCIAL FUELS INC. in Massachusetts zone 9
-2025-06-01 20:37:04,671 - INFO - [fuel_scraper.py:255] - Added new record for KATIES DISCOUNT OIL in Massachusetts zone 9
-2025-06-01 20:37:04,672 - INFO - [fuel_scraper.py:255] - Added new record for SAVINO & SONS OIL in Massachusetts zone 9
-2025-06-01 20:37:04,673 - INFO - [fuel_scraper.py:257] - Queued 18 records from NewEnglandOil - massachusetts/zone9 for DB insertion.
-2025-06-01 20:37:04,673 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone10.asp?type=0 (State: massachusetts, Zone Slug: zone10)
-2025-06-01 20:37:04,977 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone10.
-2025-06-01 20:37:04,980 - INFO - [fuel_scraper.py:255] - Added new record for CHARLTON OIL & PROPANE in Massachusetts zone 10
-2025-06-01 20:37:04,981 - INFO - [fuel_scraper.py:255] - Added new record for LEBLANC OIL LLC in Massachusetts zone 10
-2025-06-01 20:37:04,982 - INFO - [fuel_scraper.py:255] - Added new record for RED STAR OIL CO. in Massachusetts zone 10
-2025-06-01 20:37:04,983 - INFO - [fuel_scraper.py:255] - Added new record for NYDAM OIL SVC in Massachusetts zone 10
-2025-06-01 20:37:04,984 - INFO - [fuel_scraper.py:255] - Added new record for PETERSON OIL SVC in Massachusetts zone 10
-2025-06-01 20:37:04,985 - INFO - [fuel_scraper.py:255] - Added new record for HARRIS OIL CO in Massachusetts zone 10
-2025-06-01 20:37:04,986 - INFO - [fuel_scraper.py:255] - Added new record for KENS OIL & HEATING INC in Massachusetts zone 10
-2025-06-01 20:37:04,988 - INFO - [fuel_scraper.py:255] - Added new record for NALA INDUSTRIES INC in Massachusetts zone 10
-2025-06-01 20:37:04,989 - INFO - [fuel_scraper.py:255] - Added new record for HELLEN FUELS CORP in Massachusetts zone 10
-2025-06-01 20:37:04,989 - INFO - [fuel_scraper.py:255] - Added new record for BLACKSTONE VALLEY OIL in Massachusetts zone 10
-2025-06-01 20:37:04,990 - INFO - [fuel_scraper.py:255] - Added new record for OLD MAN OIL in Massachusetts zone 10
-2025-06-01 20:37:04,991 - INFO - [fuel_scraper.py:255] - Added new record for ALS OIL SERVICE in Massachusetts zone 10
-2025-06-01 20:37:04,992 - INFO - [fuel_scraper.py:255] - Added new record for ENDICOTT OIL SERVICE in Massachusetts zone 10
-2025-06-01 20:37:04,993 - INFO - [fuel_scraper.py:255] - Added new record for JUST OIL INC in Massachusetts zone 10
-2025-06-01 20:37:04,994 - INFO - [fuel_scraper.py:255] - Added new record for SOUTHBRIDGE TIRE CO in Massachusetts zone 10
-2025-06-01 20:37:04,995 - INFO - [fuel_scraper.py:255] - Added new record for AUBURN OIL in Massachusetts zone 10
-2025-06-01 20:37:04,996 - INFO - [fuel_scraper.py:255] - Added new record for LMT Oil, Inc. in Massachusetts zone 10
-2025-06-01 20:37:04,997 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 10
-2025-06-01 20:37:04,998 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Massachusetts zone 10
-2025-06-01 20:37:04,999 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Massachusetts zone 10
-2025-06-01 20:37:05,000 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Massachusetts zone 10
-2025-06-01 20:37:05,001 - INFO - [fuel_scraper.py:255] - Added new record for SHERMAN OIL in Massachusetts zone 10
-2025-06-01 20:37:05,002 - INFO - [fuel_scraper.py:255] - Added new record for CAMS OIL SERVICE in Massachusetts zone 10
-2025-06-01 20:37:05,003 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN DISCOUNT OIL & PROPANE in Massachusetts zone 10
-2025-06-01 20:37:05,004 - INFO - [fuel_scraper.py:255] - Added new record for RADIO OIL CO in Massachusetts zone 10
-2025-06-01 20:37:05,005 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT OIL SERVICE in Massachusetts zone 10
-2025-06-01 20:37:05,006 - INFO - [fuel_scraper.py:255] - Added new record for VALUE OIL INC in Massachusetts zone 10
-2025-06-01 20:37:05,007 - INFO - [fuel_scraper.py:255] - Added new record for DADDYS OIL in Massachusetts zone 10
-2025-06-01 20:37:05,008 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Massachusetts zone 10
-2025-06-01 20:37:05,009 - INFO - [fuel_scraper.py:255] - Added new record for FAIAS OIL in Massachusetts zone 10
-2025-06-01 20:37:05,010 - INFO - [fuel_scraper.py:255] - Added new record for PIONEER VALLEY OIL & PROPANE in Massachusetts zone 10
-2025-06-01 20:37:05,011 - INFO - [fuel_scraper.py:255] - Added new record for OIL4LESS & PROPANE in Massachusetts zone 10
-2025-06-01 20:37:05,011 - INFO - [fuel_scraper.py:257] - Queued 32 records from NewEnglandOil - massachusetts/zone10 for DB insertion.
-2025-06-01 20:37:05,011 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone11.asp?type=0 (State: massachusetts, Zone Slug: zone11)
-2025-06-01 20:37:05,338 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone11.
-2025-06-01 20:37:05,340 - INFO - [fuel_scraper.py:255] - Added new record for NALA INDUSTRIES INC in Massachusetts zone 11
-2025-06-01 20:37:05,341 - INFO - [fuel_scraper.py:255] - Added new record for ORLANDO FUEL SERVICE in Massachusetts zone 11
-2025-06-01 20:37:05,342 - INFO - [fuel_scraper.py:255] - Added new record for LOW COST FUEL in Massachusetts zone 11
-2025-06-01 20:37:05,343 - INFO - [fuel_scraper.py:255] - Added new record for J A HEALY & SONS OIL CO in Massachusetts zone 11
-2025-06-01 20:37:05,344 - INFO - [fuel_scraper.py:255] - Added new record for DORTENZIO OIL COMPANY in Massachusetts zone 11
-2025-06-01 20:37:05,345 - INFO - [fuel_scraper.py:255] - Added new record for AMERICAN DISCOUNT OIL & PROPANE in Massachusetts zone 11
-2025-06-01 20:37:05,346 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT OIL SERVICE in Massachusetts zone 11
-2025-06-01 20:37:05,347 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT LIQUID ENERGY in Massachusetts zone 11
-2025-06-01 20:37:05,348 - INFO - [fuel_scraper.py:255] - Added new record for BLACKSTONE VALLEY OIL in Massachusetts zone 11
-2025-06-01 20:37:05,349 - INFO - [fuel_scraper.py:255] - Added new record for WILL & SON TRUCKING INC in Massachusetts zone 11
-2025-06-01 20:37:05,350 - INFO - [fuel_scraper.py:255] - Added new record for PIONEER VALLEY OIL & PROPANE in Massachusetts zone 11
-2025-06-01 20:37:05,351 - INFO - [fuel_scraper.py:255] - Added new record for JUST OIL INC in Massachusetts zone 11
-2025-06-01 20:37:05,352 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Massachusetts zone 11
-2025-06-01 20:37:05,353 - INFO - [fuel_scraper.py:255] - Added new record for OIL4LESS & PROPANE in Massachusetts zone 11
-2025-06-01 20:37:05,354 - INFO - [fuel_scraper.py:255] - Added new record for VALUE OIL INC in Massachusetts zone 11
-2025-06-01 20:37:05,354 - INFO - [fuel_scraper.py:255] - Added new record for DADDYS OIL in Massachusetts zone 11
-2025-06-01 20:37:05,355 - INFO - [fuel_scraper.py:257] - Queued 16 records from NewEnglandOil - massachusetts/zone11 for DB insertion.
-2025-06-01 20:37:05,355 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/massachusetts/zone12.asp?type=0 (State: massachusetts, Zone Slug: zone12)
-2025-06-01 20:37:05,667 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for massachusetts - zone12.
-2025-06-01 20:37:05,669 - INFO - [fuel_scraper.py:255] - Added new record for KIERAS OIL INC in Massachusetts zone 12
-2025-06-01 20:37:05,670 - INFO - [fuel_scraper.py:255] - Added new record for SURNER DISCOUNT OIL in Massachusetts zone 12
-2025-06-01 20:37:05,672 - INFO - [fuel_scraper.py:255] - Added new record for FUELCO in Massachusetts zone 12
-2025-06-01 20:37:05,673 - INFO - [fuel_scraper.py:255] - Added new record for FAST FILL OIL in Massachusetts zone 12
-2025-06-01 20:37:05,674 - INFO - [fuel_scraper.py:255] - Added new record for RICHARDS FUEL INC in Massachusetts zone 12
-2025-06-01 20:37:05,675 - INFO - [fuel_scraper.py:255] - Added new record for DONOVAN OIL CO in Massachusetts zone 12
-2025-06-01 20:37:05,676 - INFO - [fuel_scraper.py:255] - Added new record for U S OIL CO in Massachusetts zone 12
-2025-06-01 20:37:05,677 - INFO - [fuel_scraper.py:255] - Added new record for BOTTOM LINE OIL in Massachusetts zone 12
-2025-06-01 20:37:05,678 - INFO - [fuel_scraper.py:255] - Added new record for PIONEER VALLEY OIL & PROPANE in Massachusetts zone 12
-2025-06-01 20:37:05,679 - INFO - [fuel_scraper.py:255] - Added new record for DANS OIL CO in Massachusetts zone 12
-2025-06-01 20:37:05,680 - INFO - [fuel_scraper.py:255] - Added new record for FRASCO FUEL OIL in Massachusetts zone 12
-2025-06-01 20:37:05,680 - INFO - [fuel_scraper.py:257] - Queued 11 records from NewEnglandOil - massachusetts/zone12 for DB insertion.
-2025-06-01 20:37:05,680 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone1.asp?type=0 (State: newhampshire, Zone Slug: zone1)
-2025-06-01 20:37:06,017 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone1.
-2025-06-01 20:37:06,019 - INFO - [fuel_scraper.py:255] - Added new record for HARRIS ENERGY in Newhampshire zone 1
-2025-06-01 20:37:06,021 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 1
-2025-06-01 20:37:06,022 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 1
-2025-06-01 20:37:06,023 - INFO - [fuel_scraper.py:255] - Added new record for PRESBY OIL in Newhampshire zone 1
-2025-06-01 20:37:06,024 - INFO - [fuel_scraper.py:255] - Added new record for AL'S PLUMBING HEATING & FUELS in Newhampshire zone 1
-2025-06-01 20:37:06,025 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 1
-2025-06-01 20:37:06,026 - INFO - [fuel_scraper.py:255] - Added new record for FITCH FUEL CO in Newhampshire zone 1
-2025-06-01 20:37:06,026 - INFO - [fuel_scraper.py:257] - Queued 7 records from NewEnglandOil - newhampshire/zone1 for DB insertion.
-2025-06-01 20:37:06,026 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone2.asp?type=0 (State: newhampshire, Zone Slug: zone2)
-2025-06-01 20:37:06,280 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone2.
-2025-06-01 20:37:06,283 - INFO - [fuel_scraper.py:255] - Added new record for NEIGHBORS OIL in Newhampshire zone 2
-2025-06-01 20:37:06,284 - INFO - [fuel_scraper.py:255] - Added new record for FIELDINGS OIL & PROPANE in Newhampshire zone 2
-2025-06-01 20:37:06,285 - INFO - [fuel_scraper.py:255] - Added new record for GRANITE STATE OIL in Newhampshire zone 2
-2025-06-01 20:37:06,286 - INFO - [fuel_scraper.py:255] - Added new record for QUALITY FUELS LLC in Newhampshire zone 2
-2025-06-01 20:37:06,287 - INFO - [fuel_scraper.py:255] - Added new record for NIBROC OIL in Newhampshire zone 2
-2025-06-01 20:37:06,288 - INFO - [fuel_scraper.py:255] - Added new record for WELCH OIL in Newhampshire zone 2
-2025-06-01 20:37:06,289 - INFO - [fuel_scraper.py:255] - Added new record for CARDINAL & GLIDDEN OIL CO., INC. in Newhampshire zone 2
-2025-06-01 20:37:06,290 - INFO - [fuel_scraper.py:255] - Added new record for ATLANTC OIL in Newhampshire zone 2
-2025-06-01 20:37:06,291 - INFO - [fuel_scraper.py:255] - Added new record for REED FAMILY ENERGY in Newhampshire zone 2
-2025-06-01 20:37:06,292 - INFO - [fuel_scraper.py:255] - Added new record for LEOS FUEL in Newhampshire zone 2
-2025-06-01 20:37:06,293 - INFO - [fuel_scraper.py:255] - Added new record for BROCO ENERGY in Newhampshire zone 2
-2025-06-01 20:37:06,294 - INFO - [fuel_scraper.py:255] - Added new record for 603 OIL CO. in Newhampshire zone 2
-2025-06-01 20:37:06,295 - INFO - [fuel_scraper.py:255] - Added new record for NOBLE FUELS in Newhampshire zone 2
-2025-06-01 20:37:06,296 - INFO - [fuel_scraper.py:255] - Added new record for ONLINE FUEL CO in Newhampshire zone 2
-2025-06-01 20:37:06,297 - INFO - [fuel_scraper.py:255] - Added new record for RC NIGHELLI HEATING SERVICES, LLC in Newhampshire zone 2
-2025-06-01 20:37:06,298 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Newhampshire zone 2
-2025-06-01 20:37:06,299 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 2
-2025-06-01 20:37:06,300 - INFO - [fuel_scraper.py:255] - Added new record for DEKES FUEL, LLC in Newhampshire zone 2
-2025-06-01 20:37:06,301 - INFO - [fuel_scraper.py:255] - Added new record for LOCAL PRIDE HEATING OIL in Newhampshire zone 2
-2025-06-01 20:37:06,302 - INFO - [fuel_scraper.py:255] - Added new record for HOMETOWN OIL in Newhampshire zone 2
-2025-06-01 20:37:06,303 - INFO - [fuel_scraper.py:255] - Added new record for SNH CLEAN ENERGY in Newhampshire zone 2
-2025-06-01 20:37:06,304 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT ENERGY in Newhampshire zone 2
-2025-06-01 20:37:06,304 - INFO - [fuel_scraper.py:257] - Queued 22 records from NewEnglandOil - newhampshire/zone2 for DB insertion.
-2025-06-01 20:37:06,304 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone3.asp?type=0 (State: newhampshire, Zone Slug: zone3)
-2025-06-01 20:37:06,664 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone3.
-2025-06-01 20:37:06,666 - INFO - [fuel_scraper.py:255] - Added new record for HEBERT FUEL CO in Newhampshire zone 3
-2025-06-01 20:37:06,667 - INFO - [fuel_scraper.py:255] - Added new record for CONTOOCOOK VALLEY FUEL SVC in Newhampshire zone 3
-2025-06-01 20:37:06,669 - INFO - [fuel_scraper.py:255] - Added new record for 603 OIL CO. in Newhampshire zone 3
-2025-06-01 20:37:06,669 - INFO - [fuel_scraper.py:255] - Added new record for JOELS OIL in Newhampshire zone 3
-2025-06-01 20:37:06,670 - INFO - [fuel_scraper.py:255] - Added new record for DUTILE & SONS INC in Newhampshire zone 3
-2025-06-01 20:37:06,671 - INFO - [fuel_scraper.py:255] - Added new record for FOLEY OIL CO in Newhampshire zone 3
-2025-06-01 20:37:06,672 - INFO - [fuel_scraper.py:255] - Added new record for CN BROWN ENERGY in Newhampshire zone 3
-2025-06-01 20:37:06,672 - INFO - [fuel_scraper.py:257] - Queued 7 records from NewEnglandOil - newhampshire/zone3 for DB insertion.
-2025-06-01 20:37:06,672 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone4.asp?type=0 (State: newhampshire, Zone Slug: zone4)
-2025-06-01 20:37:07,022 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone4.
-2025-06-01 20:37:07,024 - INFO - [fuel_scraper.py:255] - Added new record for R E HINKLEY CO in Newhampshire zone 4
-2025-06-01 20:37:07,024 - INFO - [fuel_scraper.py:257] - Queued 1 records from NewEnglandOil - newhampshire/zone4 for DB insertion.
-2025-06-01 20:37:07,024 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone5.asp?type=0 (State: newhampshire, Zone Slug: zone5)
-2025-06-01 20:37:07,369 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone5.
-2025-06-01 20:37:07,371 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL OF KEENE in Newhampshire zone 5
-2025-06-01 20:37:07,372 - INFO - [fuel_scraper.py:255] - Added new record for DAVIS OIL CO in Newhampshire zone 5
-2025-06-01 20:37:07,373 - INFO - [fuel_scraper.py:255] - Added new record for REDS OF JAFFREY LLC in Newhampshire zone 5
-2025-06-01 20:37:07,375 - INFO - [fuel_scraper.py:255] - Added new record for SWANZEY OIL in Newhampshire zone 5
-2025-06-01 20:37:07,376 - INFO - [fuel_scraper.py:255] - Added new record for BOBS FUEL COMPANY in Newhampshire zone 5
-2025-06-01 20:37:07,376 - INFO - [fuel_scraper.py:257] - Queued 5 records from NewEnglandOil - newhampshire/zone5 for DB insertion.
-2025-06-01 20:37:07,376 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newhampshire/zone6.asp?type=0 (State: newhampshire, Zone Slug: zone6)
-2025-06-01 20:37:07,620 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for newhampshire - zone6.
-2025-06-01 20:37:07,623 - INFO - [fuel_scraper.py:255] - Added new record for HEBERT FUEL CO in Newhampshire zone 6
-2025-06-01 20:37:07,624 - INFO - [fuel_scraper.py:255] - Added new record for NASHUA FUEL in Newhampshire zone 6
-2025-06-01 20:37:07,625 - INFO - [fuel_scraper.py:255] - Added new record for COUNTY ENERGY in Newhampshire zone 6
-2025-06-01 20:37:07,626 - INFO - [fuel_scraper.py:255] - Added new record for MY EASY OIL in Newhampshire zone 6
-2025-06-01 20:37:07,627 - INFO - [fuel_scraper.py:255] - Added new record for FUEL NRG in Newhampshire zone 6
-2025-06-01 20:37:07,628 - INFO - [fuel_scraper.py:255] - Added new record for SOUTHERN NEW HAMPSHIRE ENERGY in Newhampshire zone 6
-2025-06-01 20:37:07,629 - INFO - [fuel_scraper.py:255] - Added new record for DEEP DISCOUNT OIL in Newhampshire zone 6
-2025-06-01 20:37:07,630 - INFO - [fuel_scraper.py:255] - Added new record for SNH CLEAN ENERGY in Newhampshire zone 6
-2025-06-01 20:37:07,630 - INFO - [fuel_scraper.py:257] - Queued 8 records from NewEnglandOil - newhampshire/zone6 for DB insertion.
-2025-06-01 20:37:07,630 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone1.asp?type=0 (State: rhodeisland, Zone Slug: zone1)
-2025-06-01 20:37:07,860 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone1.
-2025-06-01 20:37:07,862 - INFO - [fuel_scraper.py:255] - Added new record for AFFORDABLE FUEL in Rhodeisland zone 1
-2025-06-01 20:37:07,864 - INFO - [fuel_scraper.py:255] - Added new record for NITE OIL CO., INC. in Rhodeisland zone 1
-2025-06-01 20:37:07,865 - INFO - [fuel_scraper.py:255] - Added new record for CHARLIES OIL COMPANY in Rhodeisland zone 1
-2025-06-01 20:37:07,866 - INFO - [fuel_scraper.py:255] - Added new record for DUDEK OIL CO in Rhodeisland zone 1
-2025-06-01 20:37:07,867 - INFO - [fuel_scraper.py:255] - Added new record for THE OIL MAN in Rhodeisland zone 1
-2025-06-01 20:37:07,868 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Rhodeisland zone 1
-2025-06-01 20:37:07,869 - INFO - [fuel_scraper.py:255] - Added new record for ELITE OIL HEATING & AIR CONDITIONING in Rhodeisland zone 1
-2025-06-01 20:37:07,870 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 1
-2025-06-01 20:37:07,871 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 1
-2025-06-01 20:37:07,871 - INFO - [fuel_scraper.py:257] - Queued 9 records from NewEnglandOil - rhodeisland/zone1 for DB insertion.
-2025-06-01 20:37:07,871 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone2.asp?type=0 (State: rhodeisland, Zone Slug: zone2)
-2025-06-01 20:37:08,151 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone2.
-2025-06-01 20:37:08,154 - INFO - [fuel_scraper.py:255] - Added new record for PRICERITE OIL INC in Rhodeisland zone 2
-2025-06-01 20:37:08,155 - INFO - [fuel_scraper.py:255] - Added new record for PROFESSIONAL HEATING/SAVE-ON OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,156 - INFO - [fuel_scraper.py:255] - Added new record for A-STAR OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,157 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Rhodeisland zone 2
-2025-06-01 20:37:08,157 - INFO - [fuel_scraper.py:255] - Added new record for AFFORDABLE FUEL in Rhodeisland zone 2
-2025-06-01 20:37:08,158 - INFO - [fuel_scraper.py:255] - Added new record for RAMBONE & SPRAQUE OIL SERVICE INC. in Rhodeisland zone 2
-2025-06-01 20:37:08,159 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,160 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Rhodeisland zone 2
-2025-06-01 20:37:08,161 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Rhodeisland zone 2
-2025-06-01 20:37:08,162 - INFO - [fuel_scraper.py:255] - Added new record for HENRY OIL COMPANY in Rhodeisland zone 2
-2025-06-01 20:37:08,163 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,164 - INFO - [fuel_scraper.py:255] - Added new record for ANTHONYS OIL & WATER, LLC in Rhodeisland zone 2
-2025-06-01 20:37:08,165 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Rhodeisland zone 2
-2025-06-01 20:37:08,166 - INFO - [fuel_scraper.py:255] - Added new record for M.J. MEEHAN EXCAVATING in Rhodeisland zone 2
-2025-06-01 20:37:08,166 - INFO - [fuel_scraper.py:255] - Added new record for BUTCHIE OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,168 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT FUEL OIL & Propane in Rhodeisland zone 2
-2025-06-01 20:37:08,168 - INFO - [fuel_scraper.py:255] - Added new record for MAJOR OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,169 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 2
-2025-06-01 20:37:08,170 - INFO - [fuel_scraper.py:255] - Added new record for WICKED WARM OIL in Rhodeisland zone 2
-2025-06-01 20:37:08,171 - INFO - [fuel_scraper.py:257] - Queued 19 records from NewEnglandOil - rhodeisland/zone2 for DB insertion.
-2025-06-01 20:37:08,171 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone3.asp?type=0 (State: rhodeisland, Zone Slug: zone3)
-2025-06-01 20:37:08,430 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone3.
-2025-06-01 20:37:08,433 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Rhodeisland zone 3
-2025-06-01 20:37:08,434 - INFO - [fuel_scraper.py:255] - Added new record for GUARDIAN FUEL ONLINE in Rhodeisland zone 3
-2025-06-01 20:37:08,435 - INFO - [fuel_scraper.py:255] - Added new record for A-STAR OIL in Rhodeisland zone 3
-2025-06-01 20:37:08,436 - INFO - [fuel_scraper.py:255] - Added new record for HENRY OIL COMPANY in Rhodeisland zone 3
-2025-06-01 20:37:08,437 - INFO - [fuel_scraper.py:255] - Added new record for PROFESSIONAL HEATING/SAVE-ON OIL in Rhodeisland zone 3
-2025-06-01 20:37:08,438 - INFO - [fuel_scraper.py:255] - Added new record for VALLEY FUEL in Rhodeisland zone 3
-2025-06-01 20:37:08,439 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 3
-2025-06-01 20:37:08,440 - INFO - [fuel_scraper.py:255] - Added new record for NET FUELS in Rhodeisland zone 3
-2025-06-01 20:37:08,441 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT FUEL OIL & Propane in Rhodeisland zone 3
-2025-06-01 20:37:08,442 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Rhodeisland zone 3
-2025-06-01 20:37:08,443 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Rhodeisland zone 3
-2025-06-01 20:37:08,444 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 3
-2025-06-01 20:37:08,445 - INFO - [fuel_scraper.py:255] - Added new record for PATRIOT OIL in Rhodeisland zone 3
-2025-06-01 20:37:08,446 - INFO - [fuel_scraper.py:255] - Added new record for MAJOR OIL in Rhodeisland zone 3
-2025-06-01 20:37:08,446 - INFO - [fuel_scraper.py:257] - Queued 14 records from NewEnglandOil - rhodeisland/zone3 for DB insertion.
-2025-06-01 20:37:08,446 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone4.asp?type=0 (State: rhodeisland, Zone Slug: zone4)
-2025-06-01 20:37:08,691 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for rhodeisland - zone4.
-2025-06-01 20:37:08,694 - INFO - [fuel_scraper.py:255] - Added new record for UNIVERSAL OIL COMPANY in Rhodeisland zone 4
-2025-06-01 20:37:08,695 - INFO - [fuel_scraper.py:255] - Added new record for A-STAR OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,696 - INFO - [fuel_scraper.py:255] - Added new record for SPEEDY OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,697 - INFO - [fuel_scraper.py:255] - Added new record for HENRY OIL COMPANY in Rhodeisland zone 4
-2025-06-01 20:37:08,698 - INFO - [fuel_scraper.py:255] - Added new record for GLOW OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,699 - INFO - [fuel_scraper.py:255] - Added new record for MAJOR OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,700 - INFO - [fuel_scraper.py:255] - Added new record for PROFESSIONAL HEATING/SAVE-ON OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,701 - INFO - [fuel_scraper.py:255] - Added new record for COD OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,702 - INFO - [fuel_scraper.py:255] - Added new record for ELITE OIL HEATING & AIR CONDITIONING in Rhodeisland zone 4
-2025-06-01 20:37:08,703 - INFO - [fuel_scraper.py:255] - Added new record for NORTHERN ENERGY LLC in Rhodeisland zone 4
-2025-06-01 20:37:08,704 - INFO - [fuel_scraper.py:255] - Added new record for ANTHONYS OIL & WATER, LLC in Rhodeisland zone 4
-2025-06-01 20:37:08,705 - INFO - [fuel_scraper.py:255] - Added new record for NET FUELS in Rhodeisland zone 4
-2025-06-01 20:37:08,706 - INFO - [fuel_scraper.py:255] - Added new record for RAMBONE & SPRAQUE OIL SERVICE INC in Rhodeisland zone 4
-2025-06-01 20:37:08,707 - INFO - [fuel_scraper.py:255] - Added new record for MIDNIGHT FUEL OIL & PROPANE in Rhodeisland zone 4
-2025-06-01 20:37:08,708 - INFO - [fuel_scraper.py:255] - Added new record for PEREZ OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,709 - INFO - [fuel_scraper.py:255] - Added new record for ADAMS FAMILY OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,710 - INFO - [fuel_scraper.py:255] - Added new record for 1ST CHOICE FUEL in Rhodeisland zone 4
-2025-06-01 20:37:08,711 - INFO - [fuel_scraper.py:255] - Added new record for AZOREAN OIL in Rhodeisland zone 4
-2025-06-01 20:37:08,712 - INFO - [fuel_scraper.py:255] - Added new record for THE HEATING OIL LADY in Rhodeisland zone 4
-2025-06-01 20:37:08,713 - INFO - [fuel_scraper.py:255] - Added new record for DISCOUNT OIL BROKERS in Rhodeisland zone 4
-2025-06-01 20:37:08,713 - INFO - [fuel_scraper.py:257] - Queued 20 records from NewEnglandOil - rhodeisland/zone4 for DB insertion.
-2025-06-01 20:37:08,713 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0 (State: rhodeisland, Zone Slug: zone5)
-2025-06-01 20:37:08,838 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0
-2025-06-01 20:37:08,839 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/rhodeisland/zone5.asp?type=0. Skipping.
-2025-06-01 20:37:08,839 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone1.asp?type=0 (State: vermont, Zone Slug: zone1)
-2025-06-01 20:37:09,047 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone1.
-2025-06-01 20:37:09,048 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone1.
-2025-06-01 20:37:09,048 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone1.asp?type=0
-2025-06-01 20:37:09,048 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone2.asp?type=0 (State: vermont, Zone Slug: zone2)
-2025-06-01 20:37:09,465 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone2.
-2025-06-01 20:37:09,466 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone2.
-2025-06-01 20:37:09,466 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone2.asp?type=0
-2025-06-01 20:37:09,466 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone3.asp?type=0 (State: vermont, Zone Slug: zone3)
-2025-06-01 20:37:09,840 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone3.
-2025-06-01 20:37:09,841 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone3.
-2025-06-01 20:37:09,841 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone3.asp?type=0
-2025-06-01 20:37:09,841 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone4.asp?type=0 (State: vermont, Zone Slug: zone4)
-2025-06-01 20:37:10,228 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone4.
-2025-06-01 20:37:10,229 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone4.
-2025-06-01 20:37:10,229 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone4.asp?type=0
-2025-06-01 20:37:10,229 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone5.asp?type=0 (State: vermont, Zone Slug: zone5)
-2025-06-01 20:37:10,603 - INFO - [fuel_scraper.py:97] - Found 2 table(s) on page for vermont - zone5.
-2025-06-01 20:37:10,603 - WARNING - [fuel_scraper.py:181] - No tables matching expected price table structure found for vermont - zone5.
-2025-06-01 20:37:10,603 - INFO - [fuel_scraper.py:259] - No data extracted from https://www.newenglandoil.com/vermont/zone5.asp?type=0
-2025-06-01 20:37:10,603 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/vermont/zone6.asp?type=0 (State: vermont, Zone Slug: zone6)
-2025-06-01 20:37:10,760 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/vermont/zone6.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/vermont/zone6.asp?type=0
-2025-06-01 20:37:10,760 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/vermont/zone6.asp?type=0. Skipping.
-2025-06-01 20:37:10,760 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone1.asp?type=0 (State: newyork, Zone Slug: zone1)
-2025-06-01 20:37:10,888 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone1.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone1.asp?type=0
-2025-06-01 20:37:10,888 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone1.asp?type=0. Skipping.
-2025-06-01 20:37:10,888 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone2.asp?type=0 (State: newyork, Zone Slug: zone2)
-2025-06-01 20:37:11,036 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone2.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone2.asp?type=0
-2025-06-01 20:37:11,036 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone2.asp?type=0. Skipping.
-2025-06-01 20:37:11,036 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone3.asp?type=0 (State: newyork, Zone Slug: zone3)
-2025-06-01 20:37:11,193 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone3.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone3.asp?type=0
-2025-06-01 20:37:11,193 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone3.asp?type=0. Skipping.
-2025-06-01 20:37:11,193 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone4.asp?type=0 (State: newyork, Zone Slug: zone4)
-2025-06-01 20:37:11,364 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone4.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone4.asp?type=0
-2025-06-01 20:37:11,364 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone4.asp?type=0. Skipping.
-2025-06-01 20:37:11,364 - INFO - [fuel_scraper.py:218] - Scraping: https://www.newenglandoil.com/newyork/zone5.asp?type=0 (State: newyork, Zone Slug: zone5)
-2025-06-01 20:37:11,523 - ERROR - [fuel_scraper.py:81] - Error fetching https://www.newenglandoil.com/newyork/zone5.asp?type=0: 404 Client Error: Not Found for url: https://www.newenglandoil.com/newyork/zone5.asp?type=0
-2025-06-01 20:37:11,523 - WARNING - [fuel_scraper.py:261] - Failed to retrieve or parse https://www.newenglandoil.com/newyork/zone5.asp?type=0. Skipping.
-2025-06-01 20:37:11,523 - INFO - [fuel_scraper.py:204] - --- Processing site: MaineOil ---
-2025-06-01 20:37:11,523 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone1.asp?type=0 (State: maine, Zone Slug: zone1)
-2025-06-01 20:37:11,799 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone1.
-2025-06-01 20:37:11,801 - INFO - [fuel_scraper.py:255] - Added new record for AJs Discount Oil in Maine zone 1
-2025-06-01 20:37:11,802 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 1
-2025-06-01 20:37:11,803 - INFO - [fuel_scraper.py:255] - Added new record for Pit Stop Fuels in Maine zone 1
-2025-06-01 20:37:11,804 - INFO - [fuel_scraper.py:255] - Added new record for Sea Land Energy in Maine zone 1
-2025-06-01 20:37:11,805 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 1
-2025-06-01 20:37:11,806 - INFO - [fuel_scraper.py:255] - Added new record for Pauls Oil Service in Maine zone 1
-2025-06-01 20:37:11,807 - INFO - [fuel_scraper.py:255] - Added new record for Higgins Energy in Maine zone 1
-2025-06-01 20:37:11,808 - INFO - [fuel_scraper.py:255] - Added new record for Willow Creek Fuel in Maine zone 1
-2025-06-01 20:37:11,809 - INFO - [fuel_scraper.py:255] - Added new record for Maine Heating Solutions in Maine zone 1
-2025-06-01 20:37:11,810 - INFO - [fuel_scraper.py:255] - Added new record for Atlantic Heating Company Inc in Maine zone 1
-2025-06-01 20:37:11,811 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 1
-2025-06-01 20:37:11,812 - INFO - [fuel_scraper.py:255] - Added new record for Conroys Oil in Maine zone 1
-2025-06-01 20:37:11,812 - INFO - [fuel_scraper.py:255] - Added new record for Dales Cash Fuel in Maine zone 1
-2025-06-01 20:37:11,813 - INFO - [fuel_scraper.py:255] - Added new record for Maine Standard Biofuels in Maine zone 1
-2025-06-01 20:37:11,814 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 1
-2025-06-01 20:37:11,815 - INFO - [fuel_scraper.py:255] - Added new record for Lowest Price Oil in Maine zone 1
-2025-06-01 20:37:11,816 - INFO - [fuel_scraper.py:255] - Added new record for Ace Oil in Maine zone 1
-2025-06-01 20:37:11,817 - INFO - [fuel_scraper.py:255] - Added new record for Northeast Fuels in Maine zone 1
-2025-06-01 20:37:11,818 - INFO - [fuel_scraper.py:255] - Added new record for Desrochers Oil in Maine zone 1
-2025-06-01 20:37:11,819 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 1
-2025-06-01 20:37:11,820 - INFO - [fuel_scraper.py:255] - Added new record for Rama Oil in Maine zone 1
-2025-06-01 20:37:11,821 - INFO - [fuel_scraper.py:255] - Added new record for Rinaldi Energy in Maine zone 1
-2025-06-01 20:37:11,822 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 1
-2025-06-01 20:37:11,822 - INFO - [fuel_scraper.py:255] - Added new record for Vic & Sons Fuel Co. in Maine zone 1
-2025-06-01 20:37:11,823 - INFO - [fuel_scraper.py:255] - Added new record for Atlantic Heating Company Inc in Maine zone 1
-2025-06-01 20:37:11,824 - INFO - [fuel_scraper.py:255] - Added new record for Cleaves Energy in Maine zone 1
-2025-06-01 20:37:11,825 - INFO - [fuel_scraper.py:255] - Added new record for Coastline Energy LLC in Maine zone 1
-2025-06-01 20:37:11,826 - INFO - [fuel_scraper.py:255] - Added new record for Daves Oil in Maine zone 1
-2025-06-01 20:37:11,827 - INFO - [fuel_scraper.py:255] - Added new record for SoPo Fuel in Maine zone 1
-2025-06-01 20:37:11,828 - INFO - [fuel_scraper.py:255] - Added new record for Order Oil Online in Maine zone 1
-2025-06-01 20:37:11,829 - INFO - [fuel_scraper.py:255] - Added new record for Maine-Ly Heating Online in Maine zone 1
-2025-06-01 20:37:11,830 - INFO - [fuel_scraper.py:255] - Added new record for Cash Energy in Maine zone 1
-2025-06-01 20:37:11,831 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 1
-2025-06-01 20:37:11,831 - INFO - [fuel_scraper.py:257] - Queued 33 records from MaineOil - maine/zone1 for DB insertion.
-2025-06-01 20:37:11,831 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone2.asp?type=0 (State: maine, Zone Slug: zone2)
-2025-06-01 20:37:12,123 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone2.
-2025-06-01 20:37:12,126 - INFO - [fuel_scraper.py:255] - Added new record for Bobs Cash Fuel in Maine zone 2
-2025-06-01 20:37:12,127 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 2
-2025-06-01 20:37:12,128 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 2
-2025-06-01 20:37:12,129 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 2
-2025-06-01 20:37:12,131 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2
-2025-06-01 20:37:12,132 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2
-2025-06-01 20:37:12,133 - INFO - [fuel_scraper.py:255] - Added new record for C.O.D. Cash Fuel in Maine zone 2
-2025-06-01 20:37:12,134 - INFO - [fuel_scraper.py:255] - Added new record for M.A. Haskell Fuel Company, LLC. in Maine zone 2
-2025-06-01 20:37:12,135 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2
-2025-06-01 20:37:12,136 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 2
-2025-06-01 20:37:12,137 - INFO - [fuel_scraper.py:255] - Added new record for C.B. Haskell Fuel Co. in Maine zone 2
-2025-06-01 20:37:12,138 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2
-2025-06-01 20:37:12,139 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 2
-2025-06-01 20:37:12,140 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 2
-2025-06-01 20:37:12,141 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 2
-2025-06-01 20:37:12,142 - INFO - [fuel_scraper.py:255] - Added new record for G & G Cash Fuel in Maine zone 2
-2025-06-01 20:37:12,143 - INFO - [fuel_scraper.py:255] - Added new record for Lisbon Fuel Co in Maine zone 2
-2025-06-01 20:37:12,144 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 2
-2025-06-01 20:37:12,144 - INFO - [fuel_scraper.py:257] - Queued 18 records from MaineOil - maine/zone2 for DB insertion.
-2025-06-01 20:37:12,144 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone3.asp?type=0 (State: maine, Zone Slug: zone3)
-2025-06-01 20:37:12,439 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone3.
-2025-06-01 20:37:12,441 - INFO - [fuel_scraper.py:255] - Added new record for Lisbon Fuel Co in Maine zone 3
-2025-06-01 20:37:12,443 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 3
-2025-06-01 20:37:12,444 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 3
-2025-06-01 20:37:12,445 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3
-2025-06-01 20:37:12,446 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 3
-2025-06-01 20:37:12,446 - INFO - [fuel_scraper.py:255] - Added new record for G & G Cash Fuel in Maine zone 3
-2025-06-01 20:37:12,447 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3
-2025-06-01 20:37:12,448 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3
-2025-06-01 20:37:12,449 - INFO - [fuel_scraper.py:255] - Added new record for Maine Heating Solutions in Maine zone 3
-2025-06-01 20:37:12,450 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 3
-2025-06-01 20:37:12,451 - INFO - [fuel_scraper.py:255] - Added new record for Rinaldi Energy in Maine zone 3
-2025-06-01 20:37:12,452 - INFO - [fuel_scraper.py:255] - Added new record for S K Fuel in Maine zone 3
-2025-06-01 20:37:12,453 - INFO - [fuel_scraper.py:255] - Added new record for Luckys Cash Fuel in Maine zone 3
-2025-06-01 20:37:12,454 - INFO - [fuel_scraper.py:255] - Added new record for Maine-Ly Heating Online in Maine zone 3
-2025-06-01 20:37:12,455 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3
-2025-06-01 20:37:12,456 - INFO - [fuel_scraper.py:255] - Added new record for Lake Region Energy in Maine zone 3
-2025-06-01 20:37:12,457 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 3
-2025-06-01 20:37:12,458 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 3
-2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:255] - Added new record for Big G Heating Fuel in Maine zone 3
-2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 3
-2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:257] - Queued 20 records from MaineOil - maine/zone3 for DB insertion.
-2025-06-01 20:37:12,459 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone4.asp?type=0 (State: maine, Zone Slug: zone4)
-2025-06-01 20:37:12,758 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone4.
-2025-06-01 20:37:12,761 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 4
-2025-06-01 20:37:12,762 - INFO - [fuel_scraper.py:255] - Added new record for Alfred Oil in Maine zone 4
-2025-06-01 20:37:12,763 - INFO - [fuel_scraper.py:255] - Added new record for Willow Creek Fuel in Maine zone 4
-2025-06-01 20:37:12,764 - INFO - [fuel_scraper.py:255] - Added new record for Maine Heating Solutions in Maine zone 4
-2025-06-01 20:37:12,765 - INFO - [fuel_scraper.py:255] - Added new record for Quality Fuels, LLC in Maine zone 4
-2025-06-01 20:37:12,766 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 4
-2025-06-01 20:37:12,767 - INFO - [fuel_scraper.py:255] - Added new record for Welch Oil in Maine zone 4
-2025-06-01 20:37:12,768 - INFO - [fuel_scraper.py:255] - Added new record for Ace Oil in Maine zone 4
-2025-06-01 20:37:12,769 - INFO - [fuel_scraper.py:255] - Added new record for Top It Off Oil in Maine zone 4
-2025-06-01 20:37:12,770 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 4
-2025-06-01 20:37:12,771 - INFO - [fuel_scraper.py:255] - Added new record for Garrett Pillsbury - Fleurent Fuel in Maine zone 4
-2025-06-01 20:37:12,772 - INFO - [fuel_scraper.py:255] - Added new record for Noble Fuels in Maine zone 4
-2025-06-01 20:37:12,773 - INFO - [fuel_scraper.py:255] - Added new record for Gils Oil Service, Inc. in Maine zone 4
-2025-06-01 20:37:12,774 - INFO - [fuel_scraper.py:255] - Added new record for Seacoast Energy, Inc. in Maine zone 4
-2025-06-01 20:37:12,774 - INFO - [fuel_scraper.py:255] - Added new record for Winterwood Fuel in Maine zone 4
-2025-06-01 20:37:12,775 - INFO - [fuel_scraper.py:255] - Added new record for Roberge Energy in Maine zone 4
-2025-06-01 20:37:12,776 - INFO - [fuel_scraper.py:255] - Added new record for Bargain Fuel in Maine zone 4
-2025-06-01 20:37:12,777 - INFO - [fuel_scraper.py:255] - Added new record for Branch Brook Fuels in Maine zone 4
-2025-06-01 20:37:12,778 - INFO - [fuel_scraper.py:255] - Added new record for Desrochers Oil in Maine zone 4
-2025-06-01 20:37:12,779 - INFO - [fuel_scraper.py:255] - Added new record for Rinaldi Energy in Maine zone 4
-2025-06-01 20:37:12,780 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 4
-2025-06-01 20:37:12,781 - INFO - [fuel_scraper.py:255] - Added new record for Rama Oil in Maine zone 4
-2025-06-01 20:37:12,782 - INFO - [fuel_scraper.py:255] - Added new record for Arrow Oil Co in Maine zone 4
-2025-06-01 20:37:12,783 - INFO - [fuel_scraper.py:255] - Added new record for My Easy Oil in Maine zone 4
-2025-06-01 20:37:12,784 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 4
-2025-06-01 20:37:12,785 - INFO - [fuel_scraper.py:255] - Added new record for Estes Oil Online in Maine zone 4
-2025-06-01 20:37:12,786 - INFO - [fuel_scraper.py:255] - Added new record for Double E Oil in Maine zone 4
-2025-06-01 20:37:12,787 - INFO - [fuel_scraper.py:255] - Added new record for R & R OIL in Maine zone 4
-2025-06-01 20:37:12,788 - INFO - [fuel_scraper.py:255] - Added new record for Cleaves Energy in Maine zone 4
-2025-06-01 20:37:12,789 - INFO - [fuel_scraper.py:255] - Added new record for Eagle Oil in Maine zone 4
-2025-06-01 20:37:12,790 - INFO - [fuel_scraper.py:255] - Added new record for Vadnais Oil in Maine zone 4
-2025-06-01 20:37:12,791 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 4
-2025-06-01 20:37:12,791 - INFO - [fuel_scraper.py:257] - Queued 32 records from MaineOil - maine/zone4 for DB insertion.
-2025-06-01 20:37:12,791 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone5.asp?type=0 (State: maine, Zone Slug: zone5)
-2025-06-01 20:37:13,076 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone5.
-2025-06-01 20:37:13,079 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 5
-2025-06-01 20:37:13,080 - INFO - [fuel_scraper.py:255] - Added new record for Crowley Energy in Maine zone 5
-2025-06-01 20:37:13,081 - INFO - [fuel_scraper.py:255] - Added new record for Country Fuel LLC in Maine zone 5
-2025-06-01 20:37:13,082 - INFO - [fuel_scraper.py:255] - Added new record for OFarrell Energy in Maine zone 5
-2025-06-01 20:37:13,083 - INFO - [fuel_scraper.py:255] - Added new record for M.A. Haskell Fuel Company, LLC. in Maine zone 5
-2025-06-01 20:37:13,084 - INFO - [fuel_scraper.py:255] - Added new record for Dales Cash Fuel in Maine zone 5
-2025-06-01 20:37:13,085 - INFO - [fuel_scraper.py:255] - Added new record for Online Fuel Co. in Maine zone 5
-2025-06-01 20:37:13,086 - INFO - [fuel_scraper.py:255] - Added new record for Kaler Oil Co., Inc. in Maine zone 5
-2025-06-01 20:37:13,087 - INFO - [fuel_scraper.py:255] - Added new record for Lisbon Fuel Co in Maine zone 5
-2025-06-01 20:37:13,088 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 5
-2025-06-01 20:37:13,089 - INFO - [fuel_scraper.py:255] - Added new record for Coastline Energy LLC in Maine zone 5
-2025-06-01 20:37:13,090 - INFO - [fuel_scraper.py:255] - Added new record for C.B. Haskell Fuel Co. in Maine zone 5
-2025-06-01 20:37:13,091 - INFO - [fuel_scraper.py:255] - Added new record for Discount Energy in Maine zone 5
-2025-06-01 20:37:13,091 - INFO - [fuel_scraper.py:257] - Queued 13 records from MaineOil - maine/zone5 for DB insertion.
-2025-06-01 20:37:13,091 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone6.asp?type=0 (State: maine, Zone Slug: zone6)
-2025-06-01 20:37:13,387 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone6.
-2025-06-01 20:37:13,389 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6
-2025-06-01 20:37:13,390 - INFO - [fuel_scraper.py:255] - Added new record for Pushaw Energy in Maine zone 6
-2025-06-01 20:37:13,391 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6
-2025-06-01 20:37:13,392 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6
-2025-06-01 20:37:13,394 - INFO - [fuel_scraper.py:255] - Added new record for Kennebec Energy in Maine zone 6
-2025-06-01 20:37:13,395 - INFO - [fuel_scraper.py:255] - Added new record for Hopkins Energy in Maine zone 6
-2025-06-01 20:37:13,396 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6
-2025-06-01 20:37:13,397 - INFO - [fuel_scraper.py:255] - Added new record for Pine Tree Oil in Maine zone 6
-2025-06-01 20:37:13,398 - INFO - [fuel_scraper.py:255] - Added new record for CN Brown Energy in Maine zone 6
-2025-06-01 20:37:13,398 - INFO - [fuel_scraper.py:255] - Added new record for Morin Fuel in Maine zone 6
-2025-06-01 20:37:13,399 - INFO - [fuel_scraper.py:255] - Added new record for Fettinger Fuels in Maine zone 6
-2025-06-01 20:37:13,400 - INFO - [fuel_scraper.py:255] - Added new record for Dysarts Fuel in Maine zone 6
-2025-06-01 20:37:13,401 - INFO - [fuel_scraper.py:255] - Added new record for Fieldings Oil & Propane in Maine zone 6
-2025-06-01 20:37:13,401 - INFO - [fuel_scraper.py:257] - Queued 13 records from MaineOil - maine/zone6 for DB insertion.
-2025-06-01 20:37:13,401 - INFO - [fuel_scraper.py:218] - Scraping: https://www.maineoil.com/zone7.asp?type=0 (State: maine, Zone Slug: zone7)
-2025-06-01 20:37:13,652 - INFO - [fuel_scraper.py:97] - Found 1 table(s) on page for maine - zone7.
-2025-06-01 20:37:13,654 - INFO - [fuel_scraper.py:255] - Added new record for Eastern Plumbing & Heating in Maine zone 7
-2025-06-01 20:37:13,655 - INFO - [fuel_scraper.py:255] - Added new record for Hometown Fuel in Maine zone 7
-2025-06-01 20:37:13,656 - INFO - [fuel_scraper.py:255] - Added new record for Huntley Plumbing & Heating in Maine zone 7
-2025-06-01 20:37:13,657 - INFO - [fuel_scraper.py:255] - Added new record for Kelley Oil in Maine zone 7
-2025-06-01 20:37:13,657 - INFO - [fuel_scraper.py:257] - Queued 4 records from MaineOil - maine/zone7 for DB insertion.
-2025-06-01 20:37:13,694 - INFO - [fuel_scraper.py:265] - Successfully committed 517 records to the database.
-2025-06-01 20:37:13,694 - INFO - [fuel_scraper.py:275] - Database session closed.
-2025-06-01 20:37:13,694 - INFO - [fuel_scraper.py:277] - Oil price scraper job finished.
-2025-06-01 20:37:13,694 - INFO - [run.py:33] - Fuel price scraper finished.
diff --git a/requirements.txt b/requirements.txt
index dd7af0f..614f8b7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
requests
beautifulsoup4
sqlalchemy
-psycopg2-binary
\ No newline at end of file
+psycopg2-binary
+fastapi
+uvicorn[standard]
diff --git a/run.py b/run.py
index 5e4bacc..429fa2d 100644
--- a/run.py
+++ b/run.py
@@ -2,44 +2,100 @@
import argparse
import logging
-# Import necessary functions/modules from your project
-# The 'import models' is crucial for init_db to know about the tables
import models
from database import init_db, SessionLocal
-from fuel_scraper import main as run_scraper_main # Import from modular package
+from newenglandoil import main as run_scraper_main
-# Configure basic logging for the run.py script itself if needed
-# Your other modules (fuel_scraper, database) will have their own logging
-# or you might centralize logging configuration further.
-# For simplicity, we'll let fuel_scraper handle its detailed logging.
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
+
def initialize_database():
"""Initializes the database by creating tables based on models."""
logger.info("Attempting to initialize database...")
try:
- init_db() # This function is imported from database.py
- # It relies on models being imported so Base.metadata is populated
+ init_db()
logger.info("Database initialization process completed.")
except Exception as e:
logger.error(f"Error during database initialization: {e}", exc_info=True)
-def scrape_data():
- """Runs the fuel price scraper."""
- logger.info("Starting the fuel price scraper...")
+
+def scrape_data(state_abbr: str | None = None, refresh_metadata: bool = False):
+ """Runs the NewEnglandOil scraper."""
+ logger.info("Starting the NewEnglandOil scraper...")
+ if refresh_metadata:
+ logger.info("Metadata refresh enabled: Existing phone/URL data may be overwritten.")
+ if state_abbr:
+ logger.info(f"Scraping restricted to state: {state_abbr}")
+
try:
- run_scraper_main() # This is the main function from fuel_scraper.py
- logger.info("Fuel price scraper finished.")
+ run_scraper_main(refresh_metadata=refresh_metadata, target_state_abbr=state_abbr)
+ logger.info("NewEnglandOil scraper finished.")
except Exception as e:
logger.error(f"Error during scraping process: {e}", exc_info=True)
+
+def scrape_cheapest(state_abbr: str, refresh_metadata: bool = False):
+ """Runs the CheapestOil scraper for a single state."""
+ from cheapestoil import scrape_state
+
+ logger.info(f"Starting CheapestOil scrape for {state_abbr}...")
+ if refresh_metadata:
+ logger.info("Metadata refresh enabled: Existing phone/URL data may be overwritten.")
+
+ db_session = SessionLocal()
+ try:
+ counties = db_session.query(models.County).all()
+ county_lookup = {(c.state.strip(), c.name.strip()): c.id for c in counties}
+ result = scrape_state(state_abbr, db_session, county_lookup, refresh_metadata=refresh_metadata)
+ logger.info(f"CheapestOil result: {result}")
+ except Exception as e:
+ db_session.rollback()
+ logger.error(f"Error during CheapestOil scrape: {e}", exc_info=True)
+ finally:
+ db_session.close()
+
+
+def run_migration():
+ """Runs the data normalization migration."""
+ from migrate_normalize import main as migrate_main
+ logger.info("Running data normalization migration...")
+ try:
+ migrate_main()
+ logger.info("Migration completed.")
+ except Exception as e:
+ logger.error(f"Error during migration: {e}", exc_info=True)
+
+
+def start_server():
+ """Starts the FastAPI server."""
+ import uvicorn
+ logger.info("Starting FastAPI crawler server on port 9553...")
+ uvicorn.run("app:app", host="0.0.0.0", port=9553)
+
+
def main():
parser = argparse.ArgumentParser(description="Fuel Price Scraper Control Script")
parser.add_argument(
"action",
- choices=["initdb", "scrape"],
- help="The action to perform: 'initdb' to initialize the database, 'scrape' to run the scraper."
+ choices=["initdb", "scrape", "scrape-cheapest", "migrate", "server"],
+ help=(
+ "'initdb' to initialize the database, "
+ "'scrape' to run NewEnglandOil scraper, "
+ "'scrape-cheapest' to run CheapestOil scraper, "
+ "'migrate' to run data normalization migration, "
+ "'server' to start the FastAPI server."
+ ),
+ )
+ parser.add_argument(
+ "--state",
+ default=None,
+ help="State abbreviation (MA, CT, ME, NH, RI, VT).",
+ )
+ parser.add_argument(
+ "--refresh-metadata",
+ action="store_true",
+ help="Force refresh phone numbers and URLs, overwriting existing data.",
)
args = parser.parse_args()
@@ -47,10 +103,18 @@ def main():
if args.action == "initdb":
initialize_database()
elif args.action == "scrape":
- scrape_data()
- else:
- logger.error(f"Unknown action: {args.action}")
- parser.print_help()
+ scrape_data(state_abbr=args.state, refresh_metadata=args.refresh_metadata)
+ elif args.action == "scrape-cheapest":
+ if not args.state:
+ logger.error("--state is required for scrape-cheapest action")
+ parser.print_help()
+ return
+ scrape_cheapest(args.state.upper(), refresh_metadata=args.refresh_metadata)
+ elif args.action == "migrate":
+ run_migration()
+ elif args.action == "server":
+ start_server()
+
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()
diff --git a/test.py b/test.py
deleted file mode 100644
index 2ab49c3..0000000
--- a/test.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-
-url = "https://www.newenglandoil.com/connecticut/zone1.asp?type=0"
-headers_req = { # Renamed to avoid conflict with 'headers' variable later
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-}
-
-try:
- response = requests.get(url, headers=headers_req, timeout=10)
- response.raise_for_status()
- soup = BeautifulSoup(response.content, 'html.parser')
-
- all_tables = soup.find_all('table')
- print(f"Found {len(all_tables)} table(s) in total.")
-
- if all_tables:
- table = all_tables[0] # Assuming it's the first (and only) table
- thead = table.find('thead')
- if thead:
- # Get the exact header texts
- actual_headers = [th.get_text(strip=True) for th in thead.find_all('th')]
- print(f"Actual headers found in the first table's thead: {actual_headers}")
- # Get the lowercased versions for easy comparison
- actual_headers_lower = [th.get_text(strip=True).lower() for th in thead.find_all('th')]
- print(f"Actual headers (lowercase): {actual_headers_lower}")
-
- else:
- print("The first table found does not have a element.")
- else:
- print("No tables found on the page.")
-
-except requests.exceptions.RequestException as e:
- print(f"Error fetching page: {e}")
\ No newline at end of file