Files
crawler/newenglandoil/config.py
Edwin Eames 1592e6d685 refactor: replace fuel_scraper with newenglandoil + cheapestoil scrapers
- Add newenglandoil/ package as the primary scraper (replaces fuel_scraper)
- Add cheapestoil/ package as a secondary market price scraper
- Add app.py entry point for direct execution
- Update run.py: new scrape_cheapest(), migrate command, --state filter,
  --refresh-metadata flag for overwriting existing phone/URL data
- Update models.py with latest schema fields
- Update requirements.txt dependencies
- Update Dockerfile and docker-compose.yml for new structure
- Remove deprecated fuel_scraper module, test.py, and log file

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-06 11:34:21 -05:00

126 lines
5.4 KiB
Python

"""
Configuration module for the fuel scraper.
Contains site definitions, zone-to-county mapping, and logging setup.
"""
import logging
# --- SITES CONFIGURATION ---
SITES_CONFIG = [
{
"site_name": "NewEnglandOil",
"base_url": "https://www.newenglandoil.com",
"url_template": "{base_url}/{state_slug}/{zone_slug}.asp?type={oil_type}",
"oil_type": 0,
"locations": {
"connecticut": [
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6", "zone7",
"zone8", "zone9", "zone10"
],
"massachusetts": [
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6",
"zone7", "zone8", "zone9", "zone10", "zone11", "zone12",
"zone13", "zone14", "zone15"
],
"newhampshire": [
"zone1", "zone2", "zone3", "zone4", "zone5", "zone6"
],
"rhodeisland": [
"zone1", "zone2", "zone3", "zone4"
],
}
},
{
"site_name": "MaineOil",
"base_url": "https://www.maineoil.com",
"url_template": "{base_url}/{zone_slug}.asp?type={oil_type}",
"oil_type": 0,
"locations": {
"maine": [
"zone1", "zone2", "zone3", "zone4", "zone5",
"zone6", "zone7"
]
}
}
]
# --- STATE ABBREVIATION MAP ---
# Maps lowercase state keys (as used in SITES_CONFIG locations) to 2-letter abbreviations
STATE_ABBREV_MAP = {
"connecticut": "CT",
"massachusetts": "MA",
"maine": "ME",
"newhampshire": "NH",
"rhodeisland": "RI",
"vermont": "VT",
}
# --- ZONE-TO-COUNTY MAPPING ---
# Maps (state_key, zone_number) -> (state_abbrev, county_name)
# state_key matches the keys in SITES_CONFIG locations (lowercase, no spaces)
# county_name must match the county.name in the database exactly
ZONE_COUNTY_MAP = {
# Connecticut (10 zones -> 8 counties)
("connecticut", 1): ("CT", "New London"), # Southeast CT
("connecticut", 2): ("CT", "Windham"), # Northeast CT
("connecticut", 3): ("CT", "New Haven"), # New Haven, Bridgeport
("connecticut", 4): ("CT", "Middlesex"), # Southeast Central CT
("connecticut", 5): ("CT", "New Haven"), # Southwest Central CT
("connecticut", 6): ("CT", "Hartford"), # Greater Hartford
("connecticut", 7): ("CT", "Litchfield"), # West CT
("connecticut", 8): ("CT", "Fairfield"), # Southwest CT
("connecticut", 9): ("CT", "Tolland"), # Northeast Central CT
("connecticut", 10): ("CT", "Litchfield"), # Northwest CT
# Massachusetts (15 zones -> 14 counties)
("massachusetts", 1): ("MA", "Suffolk"), # South Boston
("massachusetts", 2): ("MA", "Middlesex"), # North Boston
("massachusetts", 3): ("MA", "Norfolk"), # Southwest of Boston
("massachusetts", 4): ("MA", "Plymouth"), # South of Boston
("massachusetts", 5): ("MA", "Middlesex"), # West of Boston
("massachusetts", 6): ("MA", "Bristol"), # Southern Massachusetts
("massachusetts", 7): ("MA", "Barnstable"), # Cape Cod & Islands
("massachusetts", 8): ("MA", "Essex"), # Northwest of Boston
("massachusetts", 9): ("MA", "Essex"), # North of Boston
("massachusetts", 10): ("MA", "Worcester"), # Central Massachusetts
("massachusetts", 11): ("MA", "Worcester"), # East Central Massachusetts
("massachusetts", 12): ("MA", "Hampshire"), # West Central Massachusetts
("massachusetts", 13): ("MA", "Hampden"), # Springfield Area
("massachusetts", 14): ("MA", "Franklin"), # Northwestern Massachusetts
("massachusetts", 15): ("MA", "Berkshire"), # Western Massachusetts
# New Hampshire (6 zones -> 10 counties)
("newhampshire", 1): ("NH", "Coos"), # Northern NH
("newhampshire", 2): ("NH", "Strafford"), # Eastern NH
("newhampshire", 3): ("NH", "Merrimack"), # Central NH
("newhampshire", 4): ("NH", "Grafton"), # West Central NH
("newhampshire", 5): ("NH", "Cheshire"), # Southwest NH
("newhampshire", 6): ("NH", "Hillsborough"), # South Central NH
# Rhode Island (4 zones -> 5 counties)
("rhodeisland", 1): ("RI", "Newport"), # Southeast RI
("rhodeisland", 2): ("RI", "Providence"), # Northern RI
("rhodeisland", 3): ("RI", "Washington"), # Southwest RI
("rhodeisland", 4): ("RI", "Kent"), # Central RI
# Maine (7 zones -> 16 counties, via MaineOil.com)
("maine", 1): ("ME", "Cumberland"), # Greater Portland
("maine", 2): ("ME", "Kennebec"), # Augusta/Waterville
("maine", 3): ("ME", "Androscoggin"), # Auburn/Lewiston/Western
("maine", 4): ("ME", "York"), # Southern Maine
("maine", 5): ("ME", "Knox"), # Mid-Coast
("maine", 6): ("ME", "Penobscot"), # Bangor West
("maine", 7): ("ME", "Washington"), # Downeast
}
# --- LOGGING CONFIGURATION ---
LOG_FILE = "oil_scraper.log"
def setup_logging():
"""Configure logging for the scraper."""
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
)