Files
crawler/models.py
Edwin Eames 1592e6d685 refactor: replace fuel_scraper with newenglandoil + cheapestoil scrapers
- Add newenglandoil/ package as the primary scraper (replaces fuel_scraper)
- Add cheapestoil/ package as a secondary market price scraper
- Add app.py entry point for direct execution
- Update run.py: new scrape_cheapest(), migrate command, --state filter,
  --refresh-metadata flag for overwriting existing phone/URL data
- Update models.py with latest schema fields
- Update requirements.txt dependencies
- Update Dockerfile and docker-compose.yml for new structure
- Remove deprecated fuel_scraper module, test.py, and log file

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-06 11:34:21 -05:00

74 lines
3.0 KiB
Python

# models.py
from sqlalchemy import Column, Integer, String, Float, Date, Boolean, BigInteger, ForeignKey, DateTime # Added DateTime
from sqlalchemy.orm import relationship
from datetime import date, datetime # Import datetime as well
# Import Base from our database.py
from database import Base
# --- OilPrice Model ---
class OilPrice(Base):
__tablename__ = "oil_prices"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
state = Column(String(100), index=True)
zone = Column(Integer, index=True)
name = Column(String(255), index=True)
price = Column(Float, nullable=True)
date = Column(String(20)) # This is the 'Date Posted' from the website
# --- UPDATED scrapetimestamp ---
# To store both date and time of scraping
scrapetimestamp = Column(DateTime, default=datetime.utcnow, index=True)
# 'datetime.utcnow' will automatically provide the current UTC date and time
# when a new record is created and this field is not explicitly set.
company_id = Column(Integer, ForeignKey("company.id"), nullable=True)
county_id = Column(Integer, nullable=True)
phone = Column(String(20), nullable=True)
url = Column(String(500), nullable=True)
def __repr__(self):
return (f"<OilPrice(id={self.id}, state='{self.state}', zone='{self.zone}', "
f"name='{self.name}', price={self.price}, date='{self.date}', "
f"county_id={self.county_id}, scraped_at='{self.scrapetimestamp}')>")
# --- County Model (read-only, for lookups) ---
class County(Base):
__tablename__ = "county"
id = Column(Integer, primary_key=True)
name = Column(String(255))
state = Column(String(2))
# --- Company Model (remains the same) ---
class Company(Base):
__tablename__ = "company"
# ... (fields as before) ...
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
active = Column(Boolean, nullable=False, default=True)
created = Column(Date, nullable=False, default=date.today) # This 'created' is for the company record
name = Column(String(255), nullable=False, index=True, unique=True)
address = Column(String(500), nullable=True)
town = Column(String(100), nullable=True)
state = Column(String(50), nullable=True)
phone = Column(String(20), nullable=True)
owner_name = Column(String(255), nullable=True)
owner_phone_number = Column(String(20), nullable=True)
email = Column(String(255), nullable=True, unique=True)
user_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
def __repr__(self):
return f"<Company(id={self.id}, name='{self.name}', active={self.active})>"
# --- StatsPrice Model ---
class StatsPrice(Base):
__tablename__ = "stats_prices"
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
state = Column(String(2), nullable=False)
price = Column(Float, nullable=False)
created_at = Column(DateTime, default=datetime.utcnow)
def __repr__(self):
return f"<StatsPrice(state='{self.state}', price={self.price})>"