feat(CRIT-010): add zone-to-county mapping and county_id to oil_prices
Add ZONE_COUNTY_MAP for all 5 scraped states (42 zone-to-county entries). Scraper now resolves county_id at startup and assigns it to each record. Upsert logic deduplicates by (name, state, county_id) to prevent duplicates when multiple zones map to the same county. Also adds County model for DB lookups and fixes Rhode Island zone count (4, not 5). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
114
fuel_scraper/config.py
Normal file
114
fuel_scraper/config.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""
|
||||
Configuration module for the fuel scraper.
|
||||
Contains site definitions, zone-to-county mapping, and logging setup.
|
||||
"""
|
||||
import logging
|
||||
|
||||
# --- SITES CONFIGURATION ---
|
||||
SITES_CONFIG = [
|
||||
{
|
||||
"site_name": "NewEnglandOil",
|
||||
"base_url": "https://www.newenglandoil.com",
|
||||
"url_template": "{base_url}/{state_slug}/{zone_slug}.asp?type={oil_type}",
|
||||
"oil_type": 0,
|
||||
"locations": {
|
||||
"connecticut": [
|
||||
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6", "zone7",
|
||||
"zone8", "zone9", "zone10"
|
||||
],
|
||||
"massachusetts": [
|
||||
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6",
|
||||
"zone7", "zone8", "zone9", "zone10", "zone11", "zone12",
|
||||
"zone13", "zone14", "zone15"
|
||||
],
|
||||
"newhampshire": [
|
||||
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6"
|
||||
],
|
||||
"rhodeisland": [
|
||||
"zone1", "zone2", "zone3", "zone4"
|
||||
],
|
||||
}
|
||||
},
|
||||
{
|
||||
"site_name": "MaineOil",
|
||||
"base_url": "https://www.maineoil.com",
|
||||
"url_template": "{base_url}/{zone_slug}.asp?type={oil_type}",
|
||||
"oil_type": 0,
|
||||
"locations": {
|
||||
"maine": [
|
||||
"zone1", "zone2", "zone3", "zone4", "zone5",
|
||||
"zone6", "zone7"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# --- ZONE-TO-COUNTY MAPPING ---
|
||||
# Maps (state_key, zone_number) -> (state_abbrev, county_name)
|
||||
# state_key matches the keys in SITES_CONFIG locations (lowercase, no spaces)
|
||||
# county_name must match the county.name in the database exactly
|
||||
ZONE_COUNTY_MAP = {
|
||||
# Connecticut (10 zones -> 8 counties)
|
||||
("connecticut", 1): ("CT", "New London"), # Southeast CT
|
||||
("connecticut", 2): ("CT", "Windham"), # Northeast CT
|
||||
("connecticut", 3): ("CT", "New Haven"), # New Haven, Bridgeport
|
||||
("connecticut", 4): ("CT", "Middlesex"), # Southeast Central CT
|
||||
("connecticut", 5): ("CT", "New Haven"), # Southwest Central CT
|
||||
("connecticut", 6): ("CT", "Hartford"), # Greater Hartford
|
||||
("connecticut", 7): ("CT", "Litchfield"), # West CT
|
||||
("connecticut", 8): ("CT", "Fairfield"), # Southwest CT
|
||||
("connecticut", 9): ("CT", "Tolland"), # Northeast Central CT
|
||||
("connecticut", 10): ("CT", "Litchfield"), # Northwest CT
|
||||
|
||||
# Massachusetts (15 zones -> 14 counties)
|
||||
("massachusetts", 1): ("MA", "Suffolk"), # South Boston
|
||||
("massachusetts", 2): ("MA", "Middlesex"), # North Boston
|
||||
("massachusetts", 3): ("MA", "Norfolk"), # Southwest of Boston
|
||||
("massachusetts", 4): ("MA", "Plymouth"), # South of Boston
|
||||
("massachusetts", 5): ("MA", "Middlesex"), # West of Boston
|
||||
("massachusetts", 6): ("MA", "Bristol"), # Southern Massachusetts
|
||||
("massachusetts", 7): ("MA", "Barnstable"), # Cape Cod & Islands
|
||||
("massachusetts", 8): ("MA", "Essex"), # Northwest of Boston
|
||||
("massachusetts", 9): ("MA", "Essex"), # North of Boston
|
||||
("massachusetts", 10): ("MA", "Worcester"), # Central Massachusetts
|
||||
("massachusetts", 11): ("MA", "Worcester"), # East Central Massachusetts
|
||||
("massachusetts", 12): ("MA", "Hampshire"), # West Central Massachusetts
|
||||
("massachusetts", 13): ("MA", "Hampden"), # Springfield Area
|
||||
("massachusetts", 14): ("MA", "Franklin"), # Northwestern Massachusetts
|
||||
("massachusetts", 15): ("MA", "Berkshire"), # Western Massachusetts
|
||||
|
||||
# New Hampshire (6 zones -> 10 counties)
|
||||
("newhampshire", 1): ("NH", "Coos"), # Northern NH
|
||||
("newhampshire", 2): ("NH", "Strafford"), # Eastern NH
|
||||
("newhampshire", 3): ("NH", "Merrimack"), # Central NH
|
||||
("newhampshire", 4): ("NH", "Grafton"), # West Central NH
|
||||
("newhampshire", 5): ("NH", "Cheshire"), # Southwest NH
|
||||
("newhampshire", 6): ("NH", "Hillsborough"), # South Central NH
|
||||
|
||||
# Rhode Island (4 zones -> 5 counties)
|
||||
("rhodeisland", 1): ("RI", "Newport"), # Southeast RI
|
||||
("rhodeisland", 2): ("RI", "Providence"), # Northern RI
|
||||
("rhodeisland", 3): ("RI", "Washington"), # Southwest RI
|
||||
("rhodeisland", 4): ("RI", "Kent"), # Central RI
|
||||
|
||||
# Maine (7 zones -> 16 counties, via MaineOil.com)
|
||||
("maine", 1): ("ME", "Cumberland"), # Greater Portland
|
||||
("maine", 2): ("ME", "Kennebec"), # Augusta/Waterville
|
||||
("maine", 3): ("ME", "Androscoggin"), # Auburn/Lewiston/Western
|
||||
("maine", 4): ("ME", "York"), # Southern Maine
|
||||
("maine", 5): ("ME", "Knox"), # Mid-Coast
|
||||
("maine", 6): ("ME", "Penobscot"), # Bangor West
|
||||
("maine", 7): ("ME", "Washington"), # Downeast
|
||||
}
|
||||
|
||||
# --- LOGGING CONFIGURATION ---
|
||||
LOG_FILE = "oil_scraper.log"
|
||||
|
||||
|
||||
def setup_logging():
|
||||
"""Configure logging for the scraper."""
|
||||
logging.basicConfig(
|
||||
filename=LOG_FILE,
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
|
||||
)
|
||||
Reference in New Issue
Block a user