34 lines
1.4 KiB
Python
34 lines
1.4 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
url = "https://www.newenglandoil.com/connecticut/zone1.asp?type=0"
|
|
headers_req = { # Renamed to avoid conflict with 'headers' variable later
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
|
|
try:
|
|
response = requests.get(url, headers=headers_req, timeout=10)
|
|
response.raise_for_status()
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
all_tables = soup.find_all('table')
|
|
print(f"Found {len(all_tables)} table(s) in total.")
|
|
|
|
if all_tables:
|
|
table = all_tables[0] # Assuming it's the first (and only) table
|
|
thead = table.find('thead')
|
|
if thead:
|
|
# Get the exact header texts
|
|
actual_headers = [th.get_text(strip=True) for th in thead.find_all('th')]
|
|
print(f"Actual headers found in the first table's thead: {actual_headers}")
|
|
# Get the lowercased versions for easy comparison
|
|
actual_headers_lower = [th.get_text(strip=True).lower() for th in thead.find_all('th')]
|
|
print(f"Actual headers (lowercase): {actual_headers_lower}")
|
|
|
|
else:
|
|
print("The first table found does not have a <thead> element.")
|
|
else:
|
|
print("No tables found on the page.")
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching page: {e}") |