crawler/test.py

import requests
from bs4 import BeautifulSoup

url = "https://www.newenglandoil.com/connecticut/zone1.asp?type=0"
headers_req = { # Renamed to avoid conflict with 'headers' variable later
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

try:
    response = requests.get(url, headers=headers_req, timeout=10)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, 'html.parser')

    all_tables = soup.find_all('table')
    print(f"Found {len(all_tables)} table(s) in total.")

    if all_tables:
        table = all_tables[0] # Assuming it's the first (and only) table
        thead = table.find('thead')
        if thead:
            # Get the exact header texts
            actual_headers = [th.get_text(strip=True) for th in thead.find_all('th')]
            print(f"Actual headers found in the first table's thead: {actual_headers}")
            # Get the lowercased versions for easy comparison
            actual_headers_lower = [th.get_text(strip=True).lower() for th in thead.find_all('th')]
            print(f"Actual headers (lowercase): {actual_headers_lower}")

        else:
            print("The first table found does not have a <thead> element.")
    else:
        print("No tables found on the page.")

except requests.exceptions.RequestException as e:
    print(f"Error fetching page: {e}")