Add ZONE_COUNTY_MAP for all 5 scraped states (42 zone-to-county entries). Scraper now resolves county_id at startup and assigns it to each record. Upsert logic deduplicates by (name, state, county_id) to prevent duplicates when multiple zones map to the same county. Also adds County model for DB lookups and fixes Rhode Island zone count (4, not 5). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
33 lines
898 B
Python
33 lines
898 B
Python
"""
|
|
HTTP client module for making web requests.
|
|
"""
|
|
import logging
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
# Default headers to mimic a browser
|
|
DEFAULT_HEADERS = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
|
|
REQUEST_TIMEOUT = 20
|
|
|
|
|
|
def make_request(url: str) -> BeautifulSoup | None:
|
|
"""
|
|
Fetch a URL and return a BeautifulSoup object.
|
|
|
|
Args:
|
|
url: The URL to fetch
|
|
|
|
Returns:
|
|
BeautifulSoup object if successful, None otherwise
|
|
"""
|
|
try:
|
|
response = requests.get(url, headers=DEFAULT_HEADERS, timeout=REQUEST_TIMEOUT)
|
|
response.raise_for_status()
|
|
return BeautifulSoup(response.content, 'html.parser')
|
|
except requests.exceptions.RequestException as e:
|
|
logging.error(f"Error fetching {url}: {e}")
|
|
return None
|