feat(CRIT-010): add zone-to-county mapping and county_id to oil_prices

Add ZONE_COUNTY_MAP for all 5 scraped states (42 zone-to-county entries).
Scraper now resolves county_id at startup and assigns it to each record.
Upsert logic deduplicates by (name, state, county_id) to prevent duplicates
when multiple zones map to the same county. Also adds County model for
DB lookups and fixes Rhode Island zone count (4, not 5).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 18:31:46 -05:00
parent 6daa706e5a
commit 8f45f4c209
9 changed files with 746 additions and 34 deletions

114
fuel_scraper/config.py Normal file
View File

@@ -0,0 +1,114 @@
"""
Configuration module for the fuel scraper.
Contains site definitions, zone-to-county mapping, and logging setup.
"""
import logging
# --- SITES CONFIGURATION ---
SITES_CONFIG = [
{
"site_name": "NewEnglandOil",
"base_url": "https://www.newenglandoil.com",
"url_template": "{base_url}/{state_slug}/{zone_slug}.asp?type={oil_type}",
"oil_type": 0,
"locations": {
"connecticut": [
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6", "zone7",
"zone8", "zone9", "zone10"
],
"massachusetts": [
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6",
"zone7", "zone8", "zone9", "zone10", "zone11", "zone12",
"zone13", "zone14", "zone15"
],
"newhampshire": [
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6"
],
"rhodeisland": [
"zone1", "zone2", "zone3", "zone4"
],
}
},
{
"site_name": "MaineOil",
"base_url": "https://www.maineoil.com",
"url_template": "{base_url}/{zone_slug}.asp?type={oil_type}",
"oil_type": 0,
"locations": {
"maine": [
"zone1", "zone2", "zone3", "zone4", "zone5",
"zone6", "zone7"
]
}
}
]
# --- ZONE-TO-COUNTY MAPPING ---
# Maps (state_key, zone_number) -> (state_abbrev, county_name)
# state_key matches the keys in SITES_CONFIG locations (lowercase, no spaces)
# county_name must match the county.name in the database exactly
ZONE_COUNTY_MAP = {
# Connecticut (10 zones -> 8 counties)
("connecticut", 1): ("CT", "New London"), # Southeast CT
("connecticut", 2): ("CT", "Windham"), # Northeast CT
("connecticut", 3): ("CT", "New Haven"), # New Haven, Bridgeport
("connecticut", 4): ("CT", "Middlesex"), # Southeast Central CT
("connecticut", 5): ("CT", "New Haven"), # Southwest Central CT
("connecticut", 6): ("CT", "Hartford"), # Greater Hartford
("connecticut", 7): ("CT", "Litchfield"), # West CT
("connecticut", 8): ("CT", "Fairfield"), # Southwest CT
("connecticut", 9): ("CT", "Tolland"), # Northeast Central CT
("connecticut", 10): ("CT", "Litchfield"), # Northwest CT
# Massachusetts (15 zones -> 14 counties)
("massachusetts", 1): ("MA", "Suffolk"), # South Boston
("massachusetts", 2): ("MA", "Middlesex"), # North Boston
("massachusetts", 3): ("MA", "Norfolk"), # Southwest of Boston
("massachusetts", 4): ("MA", "Plymouth"), # South of Boston
("massachusetts", 5): ("MA", "Middlesex"), # West of Boston
("massachusetts", 6): ("MA", "Bristol"), # Southern Massachusetts
("massachusetts", 7): ("MA", "Barnstable"), # Cape Cod & Islands
("massachusetts", 8): ("MA", "Essex"), # Northwest of Boston
("massachusetts", 9): ("MA", "Essex"), # North of Boston
("massachusetts", 10): ("MA", "Worcester"), # Central Massachusetts
("massachusetts", 11): ("MA", "Worcester"), # East Central Massachusetts
("massachusetts", 12): ("MA", "Hampshire"), # West Central Massachusetts
("massachusetts", 13): ("MA", "Hampden"), # Springfield Area
("massachusetts", 14): ("MA", "Franklin"), # Northwestern Massachusetts
("massachusetts", 15): ("MA", "Berkshire"), # Western Massachusetts
# New Hampshire (6 zones -> 10 counties)
("newhampshire", 1): ("NH", "Coos"), # Northern NH
("newhampshire", 2): ("NH", "Strafford"), # Eastern NH
("newhampshire", 3): ("NH", "Merrimack"), # Central NH
("newhampshire", 4): ("NH", "Grafton"), # West Central NH
("newhampshire", 5): ("NH", "Cheshire"), # Southwest NH
("newhampshire", 6): ("NH", "Hillsborough"), # South Central NH
# Rhode Island (4 zones -> 5 counties)
("rhodeisland", 1): ("RI", "Newport"), # Southeast RI
("rhodeisland", 2): ("RI", "Providence"), # Northern RI
("rhodeisland", 3): ("RI", "Washington"), # Southwest RI
("rhodeisland", 4): ("RI", "Kent"), # Central RI
# Maine (7 zones -> 16 counties, via MaineOil.com)
("maine", 1): ("ME", "Cumberland"), # Greater Portland
("maine", 2): ("ME", "Kennebec"), # Augusta/Waterville
("maine", 3): ("ME", "Androscoggin"), # Auburn/Lewiston/Western
("maine", 4): ("ME", "York"), # Southern Maine
("maine", 5): ("ME", "Knox"), # Mid-Coast
("maine", 6): ("ME", "Penobscot"), # Bangor West
("maine", 7): ("ME", "Washington"), # Downeast
}
# --- LOGGING CONFIGURATION ---
LOG_FILE = "oil_scraper.log"
def setup_logging():
"""Configure logging for the scraper."""
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
)