- Add newenglandoil/ package as the primary scraper (replaces fuel_scraper) - Add cheapestoil/ package as a secondary market price scraper - Add app.py entry point for direct execution - Update run.py: new scrape_cheapest(), migrate command, --state filter, --refresh-metadata flag for overwriting existing phone/URL data - Update models.py with latest schema fields - Update requirements.txt dependencies - Update Dockerfile and docker-compose.yml for new structure - Remove deprecated fuel_scraper module, test.py, and log file Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
126 lines
5.4 KiB
Python
126 lines
5.4 KiB
Python
"""
|
|
Configuration module for the fuel scraper.
|
|
Contains site definitions, zone-to-county mapping, and logging setup.
|
|
"""
|
|
import logging
|
|
|
|
# --- SITES CONFIGURATION ---
|
|
SITES_CONFIG = [
|
|
{
|
|
"site_name": "NewEnglandOil",
|
|
"base_url": "https://www.newenglandoil.com",
|
|
"url_template": "{base_url}/{state_slug}/{zone_slug}.asp?type={oil_type}",
|
|
"oil_type": 0,
|
|
"locations": {
|
|
"connecticut": [
|
|
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6", "zone7",
|
|
"zone8", "zone9", "zone10"
|
|
],
|
|
"massachusetts": [
|
|
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6",
|
|
"zone7", "zone8", "zone9", "zone10", "zone11", "zone12",
|
|
"zone13", "zone14", "zone15"
|
|
],
|
|
"newhampshire": [
|
|
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6"
|
|
],
|
|
"rhodeisland": [
|
|
"zone1", "zone2", "zone3", "zone4"
|
|
],
|
|
}
|
|
},
|
|
{
|
|
"site_name": "MaineOil",
|
|
"base_url": "https://www.maineoil.com",
|
|
"url_template": "{base_url}/{zone_slug}.asp?type={oil_type}",
|
|
"oil_type": 0,
|
|
"locations": {
|
|
"maine": [
|
|
"zone1", "zone2", "zone3", "zone4", "zone5",
|
|
"zone6", "zone7"
|
|
]
|
|
}
|
|
}
|
|
]
|
|
|
|
# --- STATE ABBREVIATION MAP ---
|
|
# Maps lowercase state keys (as used in SITES_CONFIG locations) to 2-letter abbreviations
|
|
STATE_ABBREV_MAP = {
|
|
"connecticut": "CT",
|
|
"massachusetts": "MA",
|
|
"maine": "ME",
|
|
"newhampshire": "NH",
|
|
"rhodeisland": "RI",
|
|
"vermont": "VT",
|
|
}
|
|
|
|
# --- ZONE-TO-COUNTY MAPPING ---
|
|
# Maps (state_key, zone_number) -> (state_abbrev, county_name)
|
|
# state_key matches the keys in SITES_CONFIG locations (lowercase, no spaces)
|
|
# county_name must match the county.name in the database exactly
|
|
ZONE_COUNTY_MAP = {
|
|
# Connecticut (10 zones -> 8 counties)
|
|
("connecticut", 1): ("CT", "New London"), # Southeast CT
|
|
("connecticut", 2): ("CT", "Windham"), # Northeast CT
|
|
("connecticut", 3): ("CT", "New Haven"), # New Haven, Bridgeport
|
|
("connecticut", 4): ("CT", "Middlesex"), # Southeast Central CT
|
|
("connecticut", 5): ("CT", "New Haven"), # Southwest Central CT
|
|
("connecticut", 6): ("CT", "Hartford"), # Greater Hartford
|
|
("connecticut", 7): ("CT", "Litchfield"), # West CT
|
|
("connecticut", 8): ("CT", "Fairfield"), # Southwest CT
|
|
("connecticut", 9): ("CT", "Tolland"), # Northeast Central CT
|
|
("connecticut", 10): ("CT", "Litchfield"), # Northwest CT
|
|
|
|
# Massachusetts (15 zones -> 14 counties)
|
|
("massachusetts", 1): ("MA", "Suffolk"), # South Boston
|
|
("massachusetts", 2): ("MA", "Middlesex"), # North Boston
|
|
("massachusetts", 3): ("MA", "Norfolk"), # Southwest of Boston
|
|
("massachusetts", 4): ("MA", "Plymouth"), # South of Boston
|
|
("massachusetts", 5): ("MA", "Middlesex"), # West of Boston
|
|
("massachusetts", 6): ("MA", "Bristol"), # Southern Massachusetts
|
|
("massachusetts", 7): ("MA", "Barnstable"), # Cape Cod & Islands
|
|
("massachusetts", 8): ("MA", "Essex"), # Northwest of Boston
|
|
("massachusetts", 9): ("MA", "Essex"), # North of Boston
|
|
("massachusetts", 10): ("MA", "Worcester"), # Central Massachusetts
|
|
("massachusetts", 11): ("MA", "Worcester"), # East Central Massachusetts
|
|
("massachusetts", 12): ("MA", "Hampshire"), # West Central Massachusetts
|
|
("massachusetts", 13): ("MA", "Hampden"), # Springfield Area
|
|
("massachusetts", 14): ("MA", "Franklin"), # Northwestern Massachusetts
|
|
("massachusetts", 15): ("MA", "Berkshire"), # Western Massachusetts
|
|
|
|
# New Hampshire (6 zones -> 10 counties)
|
|
("newhampshire", 1): ("NH", "Coos"), # Northern NH
|
|
("newhampshire", 2): ("NH", "Strafford"), # Eastern NH
|
|
("newhampshire", 3): ("NH", "Merrimack"), # Central NH
|
|
("newhampshire", 4): ("NH", "Grafton"), # West Central NH
|
|
("newhampshire", 5): ("NH", "Cheshire"), # Southwest NH
|
|
("newhampshire", 6): ("NH", "Hillsborough"), # South Central NH
|
|
|
|
# Rhode Island (4 zones -> 5 counties)
|
|
("rhodeisland", 1): ("RI", "Newport"), # Southeast RI
|
|
("rhodeisland", 2): ("RI", "Providence"), # Northern RI
|
|
("rhodeisland", 3): ("RI", "Washington"), # Southwest RI
|
|
("rhodeisland", 4): ("RI", "Kent"), # Central RI
|
|
|
|
# Maine (7 zones -> 16 counties, via MaineOil.com)
|
|
("maine", 1): ("ME", "Cumberland"), # Greater Portland
|
|
("maine", 2): ("ME", "Kennebec"), # Augusta/Waterville
|
|
("maine", 3): ("ME", "Androscoggin"), # Auburn/Lewiston/Western
|
|
("maine", 4): ("ME", "York"), # Southern Maine
|
|
("maine", 5): ("ME", "Knox"), # Mid-Coast
|
|
("maine", 6): ("ME", "Penobscot"), # Bangor West
|
|
("maine", 7): ("ME", "Washington"), # Downeast
|
|
}
|
|
|
|
# --- LOGGING CONFIGURATION ---
|
|
LOG_FILE = "oil_scraper.log"
|
|
|
|
|
|
def setup_logging():
|
|
"""Configure logging for the scraper."""
|
|
logging.basicConfig(
|
|
filename=LOG_FILE,
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
|
|
)
|