feat: initial commit for oil price scraper service

FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures)
and competitor oil pricing from NewEnglandOil. Includes cron-driven
scraping, PostgreSQL storage, and REST endpoints for price retrieval.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:57:44 -05:00
commit af9c2f99e7
25 changed files with 1566 additions and 0 deletions

1
app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# eamco_scraper app package

111
app/config.py Normal file
View File

@@ -0,0 +1,111 @@
"""
Configuration settings for eamco_scraper.
This module provides configuration with environment-based switching:
- LOCAL: Uses 'eamco' database, localhost CORS origins
- PRODUCTION: Uses 'auburnoil' database, production domain CORS origins
Environment variables are loaded from .env.local or .env.prod depending
on the Docker compose file used.
"""
import logging
import os
from typing import List
from dotenv import load_dotenv
# Load environment variables from .env file if present
load_dotenv()
# =============================================================================
# ENVIRONMENT MODE
# =============================================================================
MODE = os.getenv("MODE", "LOCAL")
CURRENT_SETTINGS = os.getenv("CURRENT_SETTINGS", "DEVELOPMENT")
# Log configuration mode (logger setup happens after config is loaded)
_config_mode_msg = f"Using {'PRODUCTION' if CURRENT_SETTINGS == 'PRODUCTION' else 'DEVELOPMENT'} configuration"
# =============================================================================
# DATABASE CONFIGURATION
# =============================================================================
# Database connection components (can be overridden individually)
POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME", "postgres")
POSTGRES_PW = os.getenv("POSTGRES_PW", "password")
POSTGRES_SERVER = os.getenv("POSTGRES_SERVER", "192.168.1.204")
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
# Database name differs by environment
if CURRENT_SETTINGS == "PRODUCTION":
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "auburnoil")
else:
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "eamco")
# Build connection URI from components (fallback)
_DEFAULT_DATABASE_URI = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
POSTGRES_USERNAME,
POSTGRES_PW,
POSTGRES_SERVER,
POSTGRES_PORT,
POSTGRES_DBNAME
)
# Allow full DATABASE_URL override
DATABASE_URL: str = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URI)
# SQLAlchemy binds (for compatibility)
SQLALCHEMY_DATABASE_URI = DATABASE_URL
SQLALCHEMY_BINDS = {POSTGRES_DBNAME: SQLALCHEMY_DATABASE_URI}
# =============================================================================
# CORS CONFIGURATION
# =============================================================================
# Parse CORS origins from environment (comma-separated) or use defaults
_cors_env = os.getenv("CORS_ORIGINS", "")
if _cors_env:
CORS_ORIGINS: List[str] = [origin.strip() for origin in _cors_env.split(",")]
elif CURRENT_SETTINGS == "PRODUCTION":
# Production CORS origins
CORS_ORIGINS = [
"https://oil.edwineames.com",
"https://edwineames.com",
]
else:
# Development CORS origins
CORS_ORIGINS = [
"http://localhost:9000",
"https://localhost:9513",
"http://localhost:9514",
"http://localhost:9512",
"http://localhost:9511",
"http://localhost:5173", # Frontend port
"http://localhost:9616", # Authorize service port
]
# =============================================================================
# SCRAPER CONFIGURATION
# =============================================================================
# User agent for web scraping (identifies your application)
SCRAPER_USER_AGENT: str = "Unraid-EamcoScraper/1.0 (eeames214@gmail.com)"
# Rate limiting: Sleep between requests (be respectful to target servers)
SCRAPER_DELAY_SECONDS: float = float(os.getenv("SCRAPER_DELAY", "2.0"))
# Request timeout in seconds
SCRAPER_TIMEOUT: int = int(os.getenv("SCRAPER_TIMEOUT", "10"))
# Target URL for New England Oil Zone 10
NEWENGLAND_OIL_ZONE10_URL: str = "https://www.newenglandoil.com/massachusetts/zone10.asp?x=0"
# =============================================================================
# LOGGING CONFIGURATION
# =============================================================================
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

88
app/database.py Normal file
View File

@@ -0,0 +1,88 @@
import logging
import sys
from contextlib import contextmanager
from typing import Generator
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker, Session
from sqlalchemy.exc import SQLAlchemyError
from app.config import (
DATABASE_URL,
LOG_LEVEL,
LOG_FORMAT,
)
# =============================================================================
# LOGGING CONFIGURATION
# =============================================================================
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
format=LOG_FORMAT,
handlers=[
logging.StreamHandler(sys.stdout),
]
)
logger = logging.getLogger(__name__)
# =============================================================================
# DATABASE SETUP
# =============================================================================
# Create SQLAlchemy engine with connection pooling
engine = create_engine(
DATABASE_URL,
pool_pre_ping=True, # Verify connections before use
pool_size=5,
max_overflow=10,
echo=False, # Set to True for SQL debugging
)
# Session factory
SessionLocal = sessionmaker(
autocommit=False,
autoflush=False,
bind=engine,
)
def get_db() -> Generator[Session, None, None]:
"""
Dependency that provides a database session.
Yields a SQLAlchemy session and ensures proper cleanup.
"""
db = SessionLocal()
try:
yield db
finally:
db.close()
@contextmanager
def get_db_session() -> Generator[Session, None, None]:
"""
Context manager for database sessions (non-dependency use).
"""
db = SessionLocal()
try:
yield db
finally:
db.close()
def check_db_connection() -> bool:
"""
Test database connectivity.
Returns:
True if database is reachable, False otherwise
"""
try:
with get_db_session() as db:
db.execute(text("SELECT 1"))
return True
except SQLAlchemyError as e:
logger.error(f"Database connection failed: {e}")
return False

195
app/main.py Normal file
View File

@@ -0,0 +1,195 @@
"""
eamco_scraper - FastAPI Oil Price Scraping Microservice.
This microservice provides endpoints for scraping oil prices from New England Oil
and storing them in the database for historical tracking.
Endpoints:
GET /health - Health check with database connectivity status
GET /scraper/newenglandoil/latestprice - Trigger scrape and return latest prices
Usage:
# Development
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
# Production (Docker)
docker run -p 8000:8000 eamco_scraper
# Trigger from cron
curl http://localhost:8000/scraper/newenglandoil/latestprice
"""
import logging
import os
import sys
from typing import List
from datetime import datetime
from fastapi import FastAPI, Depends, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
from app.config import (
DATABASE_URL,
CORS_ORIGINS,
LOG_LEVEL,
LOG_FORMAT,
)
from app.models import Base, CompanyPrice
from app.database import engine, get_db, check_db_connection
from app.schemas import HealthResponse, PriceRecord
from app.newenglandoil.router import router as newenglandoil_router
from app.priceticker.router import router as priceticker_router
# =============================================================================
# LOGGING CONFIGURATION
# =============================================================================
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
format=LOG_FORMAT,
handlers=[
logging.StreamHandler(sys.stdout),
]
)
logger = logging.getLogger(__name__)
# =============================================================================
# FASTAPI APPLICATION
# =============================================================================
app = FastAPI(
title="eamco_scraper",
description="Oil price scraping microservice for New England Oil",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc",
)
# =============================================================================
# CORS MIDDLEWARE
# =============================================================================
app.add_middleware(
CORSMiddleware,
allow_origins=CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =============================================================================
# ROUTERS
# =============================================================================
app.include_router(newenglandoil_router)
app.include_router(priceticker_router)
# =============================================================================
# ENDPOINTS
# =============================================================================
@app.get("/", include_in_schema=False)
async def root():
"""Root endpoint - redirect to docs."""
return {
"service": "eamco_scraper",
"version": "1.0.0",
"docs": "/docs",
}
@app.get("/health", response_model=HealthResponse, tags=["Health"])
async def health_check():
"""
Health check endpoint.
Returns service status and database connectivity.
Use this endpoint for container health checks and monitoring.
Returns:
HealthResponse with status and db_connected flag
"""
db_connected = check_db_connection()
return HealthResponse(
status="healthy" if db_connected else "degraded",
db_connected=db_connected,
)
@app.get(
"/scraper/prices",
response_model=List[PriceRecord],
tags=["Prices"],
)
async def get_stored_prices(
date: str | None = None,
db: Session = Depends(get_db)
):
"""
Get stored oil prices from the database.
If no date is provided, returns prices for the current date (UTC).
Does NOT trigger a scrape.
"""
if not date:
date = datetime.utcnow().date().isoformat()
try:
# Query prices for the specific date
prices = db.query(CompanyPrice).filter(
CompanyPrice.scrape_date == date
).all()
return [
PriceRecord(
company_name=p.company_name,
town=p.town,
price_decimal=float(p.price_decimal),
scrape_date=str(p.scrape_date),
zone=p.zone
) for p in prices
]
except Exception as e:
logger.error(f"Error fetching prices: {e}")
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# STARTUP/SHUTDOWN EVENTS
# =============================================================================
@app.on_event("startup")
async def startup_event():
"""Application startup - log configuration and test DB connection."""
logger.info("🚀 eamco_scraper STARTING")
mode = os.environ.get('MODE', 'DEVELOPMENT').upper()
if mode in ['DEVELOPMENT', 'DEV', 'LOCAL']:
logger.info("🤖🤖🤖🤖🤖 Mode: Development 🤖🤖🤖🤖🤖")
elif mode in ['PRODUCTION', 'PROD']:
logger.info("💀💀💀💀💀💀💀💀💀💀 ⚠️ WARNING PRODUCTION 💀💀💀💀💀💀💀💀💀💀")
logger.info(f"DB: {DATABASE_URL[:30]}...")
logger.info(f"CORS: {len(CORS_ORIGINS)} origins configured")
# Test database connection
if check_db_connection():
logger.info("DB Connection: ✅ OK")
else:
logger.warning("DB Connection: ❌ FAILED")
# Create tables if they don't exist
try:
Base.metadata.create_all(bind=engine)
logger.info("Database tables verified/created")
except Exception as e:
logger.error(f"Failed to create tables: {e}")
@app.on_event("shutdown")
async def shutdown_event():
"""Application shutdown - cleanup."""
logger.info("🛑 eamco_scraper SHUTTING DOWN")

99
app/models.py Normal file
View File

@@ -0,0 +1,99 @@
"""
SQLAlchemy models for eamco_scraper.
This module defines the database models for storing scraped oil price data.
"""
from datetime import datetime
from sqlalchemy import Column, Integer, String, Numeric, Date, DateTime, Index
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class CompanyPrice(Base):
"""
Model for storing oil company pricing data.
This table stores historical pricing data from oil companies.
Each scrape creates new records (no updates) to enable price trend analysis.
Attributes:
id: Primary key
company_name: Name of the oil company
town: Town where the company operates
price_decimal: Price per gallon (e.g., 2.599)
scrape_date: Date when the price was listed on the website
zone: Geographic zone (default: 'zone10' for Central Massachusetts)
created_at: Timestamp when the record was inserted into database
"""
__tablename__ = "company_prices"
id = Column(Integer, primary_key=True, autoincrement=True)
company_name = Column(String(255), nullable=False, index=True)
town = Column(String(100), nullable=True)
price_decimal = Column(Numeric(6, 3), nullable=False)
scrape_date = Column(Date, nullable=False, index=True)
zone = Column(String(50), nullable=False, default="zone10", index=True)
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
def __repr__(self):
return f"<CompanyPrice(company='{self.company_name}', price={self.price_decimal}, date={self.scrape_date})>"
def to_dict(self):
"""Convert model instance to dictionary for JSON serialization."""
return {
"id": self.id,
"company_name": self.company_name,
"town": self.town,
"price_decimal": float(self.price_decimal) if self.price_decimal else None,
"scrape_date": self.scrape_date.isoformat() if self.scrape_date else None,
"zone": self.zone,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
# Create composite indexes for common queries
Index('idx_company_prices_company_date', CompanyPrice.company_name, CompanyPrice.scrape_date)
Index('idx_company_prices_zone_date', CompanyPrice.zone, CompanyPrice.scrape_date)
class TickerPrice(Base):
"""
Model for storing ticker prices (Stocks/Commodities).
Attributes:
id: Primary key
symbol: Ticker symbol (e.g., HO=F, CL=F, RB=F)
price_decimal: Current price
currency: Currency code (e.g., USD)
change_decimal: Price change amount
percent_change_decimal: Price change percentage
timestamp: Time of scrape
"""
__tablename__ = "ticker_prices"
id = Column(Integer, primary_key=True, autoincrement=True)
symbol = Column(String(20), nullable=False, index=True)
price_decimal = Column(Numeric(10, 4), nullable=False)
currency = Column(String(10), nullable=True)
change_decimal = Column(Numeric(10, 4), nullable=True)
percent_change_decimal = Column(Numeric(10, 4), nullable=True)
timestamp = Column(DateTime, nullable=False, default=datetime.utcnow, index=True)
def __repr__(self):
return f"<TickerPrice(symbol='{self.symbol}', price={self.price_decimal}, time={self.timestamp})>"
def to_dict(self):
return {
"id": self.id,
"symbol": self.symbol,
"price": float(self.price_decimal) if self.price_decimal is not None else None,
"currency": self.currency,
"change": float(self.change_decimal) if self.change_decimal is not None else None,
"percent_change": float(self.percent_change_decimal) if self.percent_change_decimal is not None else None,
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
}

View File

@@ -0,0 +1,9 @@
"""
New England Oil Scraper Module.
This package contains code specific to scraping prices from the New England Oil website.
"""
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
__all__ = ['scrape_newengland_oil', 'ScraperError']

140
app/newenglandoil/router.py Normal file
View File

@@ -0,0 +1,140 @@
import logging
from datetime import datetime
from typing import List
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from app.models import CompanyPrice
from app.schemas import PriceRecord
from app.newenglandoil.schemas import LatestPriceResponse
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
from app.database import get_db
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/scraper/newenglandoil",
tags=["New England Oil Scraper"],
)
@router.get(
"/latestprice",
response_model=LatestPriceResponse,
)
async def get_latest_prices(db: Session = Depends(get_db)):
"""
Scrape latest oil prices from New England Oil Zone 10.
This endpoint:
1. Scrapes the New England Oil website for current prices
2. Stores all prices in the database (historical tracking)
3. Returns the scraped prices
Designed to be called from cron for automated price tracking.
Example:
curl http://localhost:8000/scraper/newenglandoil/latestprice
Returns:
LatestPriceResponse with scraped prices and storage statistics
"""
logger.info("=" * 60)
logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
logger.info("=" * 60)
try:
# Scrape the website
scraped_prices = scrape_newengland_oil()
if not scraped_prices:
logger.warning("No prices were scraped")
return LatestPriceResponse(
status="warning",
message="No prices found on website",
prices_scraped=0,
prices_stored=0,
scrape_timestamp=datetime.utcnow().isoformat(),
prices=[]
)
# Store prices in database
stored_count = 0
price_records = []
for price_data in scraped_prices:
# Add to response list (regardless of whether it's new or existing)
price_records.append(PriceRecord(
company_name=price_data["company_name"],
town=price_data.get("town"),
price_decimal=float(price_data["price_decimal"]),
scrape_date=price_data["scrape_date"].isoformat(),
zone=price_data.get("zone", "zone10"),
))
try:
# Check for existing record to prevent duplicates
existing_record = db.query(CompanyPrice).filter(
CompanyPrice.company_name == price_data["company_name"],
CompanyPrice.scrape_date == price_data["scrape_date"],
CompanyPrice.zone == price_data.get("zone", "zone10")
).first()
if existing_record:
logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
continue
# Create database record
price_record = CompanyPrice(
company_name=price_data["company_name"],
town=price_data.get("town"),
price_decimal=price_data["price_decimal"],
scrape_date=price_data["scrape_date"],
zone=price_data.get("zone", "zone10"),
)
db.add(price_record)
stored_count += 1
except Exception as e:
logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")
# Commit all new records
if stored_count > 0:
db.commit()
logger.info(f"Successfully stored {stored_count} new price records")
else:
logger.info("No new price records to store (all duplicates)")
return LatestPriceResponse(
status="success",
message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
prices_scraped=len(scraped_prices),
prices_stored=stored_count,
scrape_timestamp=datetime.utcnow().isoformat(),
prices=price_records,
)
except ScraperError as e:
logger.error(f"Scraper error: {e}")
raise HTTPException(
status_code=500,
detail=f"Scraping failed: {str(e)}"
)
except SQLAlchemyError as e:
db.rollback()
logger.error(f"Database error during price storage: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Database error: {str(e)}"
)
except Exception as e:
db.rollback()
logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Scraping failed: {str(e)}"
)

View File

@@ -0,0 +1,12 @@
from typing import List
from pydantic import BaseModel
from app.schemas import PriceRecord
class LatestPriceResponse(BaseModel):
"""Latest price scrape response schema."""
status: str
message: str
prices_scraped: int
prices_stored: int
scrape_timestamp: str
prices: List[PriceRecord]

View File

@@ -0,0 +1,170 @@
"""
Web scraping module for New England Oil prices.
This module handles scraping oil price data from the New England Oil website
for Zone 10 (Central Massachusetts).
"""
import logging
import time
from datetime import date
from typing import List, Dict, Optional
from decimal import Decimal
import requests
from bs4 import BeautifulSoup
from app.config import (
NEWENGLAND_OIL_ZONE10_URL,
SCRAPER_USER_AGENT,
SCRAPER_TIMEOUT,
SCRAPER_DELAY_SECONDS,
)
logger = logging.getLogger(__name__)
class ScraperError(Exception):
"""Custom exception for scraper errors."""
pass
def scrape_newengland_oil() -> List[Dict[str, any]]:
"""
Scrape oil prices from New England Oil Zone 10 page.
Fetches the page, parses the HTML table, and extracts company names,
towns, and prices.
Returns:
List of dictionaries with keys: company_name, town, price_decimal, scrape_date, zone
Raises:
ScraperError: If the request fails or parsing fails
"""
logger.info(f"Starting scrape of {NEWENGLAND_OIL_ZONE10_URL}")
headers = {
"User-Agent": SCRAPER_USER_AGENT,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
}
try:
# Make the request
response = requests.get(
NEWENGLAND_OIL_ZONE10_URL,
headers=headers,
timeout=SCRAPER_TIMEOUT
)
response.raise_for_status()
logger.info(f"Successfully fetched page (status: {response.status_code})")
# Parse HTML
soup = BeautifulSoup(response.content, 'lxml')
# Find the price table
# The table typically has company names, towns, and prices
# We need to inspect the actual HTML structure
prices = []
today = date.today()
# Look for table rows with price data
# The structure appears to be: company links followed by town and price info
# We'll look for patterns in the HTML
# Find all table rows
tables = soup.find_all('table')
if not tables:
logger.warning("No tables found on page")
# Debug: Save HTMl to file
with open("debug_page.html", "wb") as f:
f.write(response.content)
raise ScraperError("No price table found on page")
# The main price table is usually the largest one or contains specific markers
# Let's find rows that contain price information
for table in tables:
rows = table.find_all('tr')
for row in rows:
cells = row.find_all(['td', 'th'])
if len(cells) >= 3: # Expect at least company, town, price
# Try to extract company name (usually in a link)
company_link = row.find('a')
if company_link:
company_name = company_link.get_text(strip=True)
# Extract text from all cells
cell_texts = [cell.get_text(strip=True) for cell in cells]
# Look for price pattern (e.g., "$2.599" or "2.599")
price_value = None
town_value = None
for text in cell_texts:
# Check if this looks like a price
text_clean = text.replace('$', '').replace(',', '').strip()
try:
# Try to parse as decimal
if text_clean and '.' in text_clean:
potential_price = Decimal(text_clean)
# Reasonable price range for heating oil (0.50 to 10.00)
if Decimal('0.50') <= potential_price <= Decimal('10.00'):
price_value = potential_price
break
except (ValueError, ArithmeticError):
# Not a valid price, might be town name
if text and not text.startswith('$') and len(text) > 2:
if not town_value: # Take first non-price text as town
town_value = text
if price_value:
prices.append({
"company_name": company_name,
"town": town_value,
"price_decimal": price_value,
"scrape_date": today,
"zone": "zone10"
})
logger.debug(f"Found: {company_name} - {town_value} - ${price_value}")
if not prices:
logger.warning("No prices extracted from page")
raise ScraperError("Failed to extract any price data from page")
logger.info(f"Successfully scraped {len(prices)} price records")
return prices
except requests.RequestException as e:
logger.error(f"Request failed: {e}")
raise ScraperError(f"Failed to fetch page: {str(e)}")
except Exception as e:
logger.error(f"Scraping failed: {e}", exc_info=True)
raise ScraperError(f"Failed to parse page: {str(e)}")
def scrape_and_delay() -> List[Dict[str, any]]:
"""
Scrape prices and apply rate limiting delay.
This is a convenience function that scrapes and then sleeps
to respect rate limits.
Returns:
List of price dictionaries
"""
prices = scrape_newengland_oil()
# Apply rate limiting delay
if SCRAPER_DELAY_SECONDS > 0:
logger.debug(f"Sleeping {SCRAPER_DELAY_SECONDS}s for rate limiting")
time.sleep(SCRAPER_DELAY_SECONDS)
return prices

View File

72
app/priceticker/router.py Normal file
View File

@@ -0,0 +1,72 @@
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from typing import List
import logging
from app.database import get_db
from app.models import TickerPrice
from app.priceticker.scraper import fetch_ticker_data
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/scraper/priceticker",
tags=["Price Ticker"]
)
@router.post("/update")
async def update_prices(db: Session = Depends(get_db)):
"""
Trigger an immediate update of stock/commodity prices.
"""
logger.info("Triggering ticker update...")
data = fetch_ticker_data()
if not data:
raise HTTPException(status_code=500, detail="Failed to fetch ticker data")
try:
saved_records = []
for item in data:
record = TickerPrice(
symbol=item["symbol"],
price_decimal=item["price"],
currency=item["currency"],
change_decimal=item["change"],
percent_change_decimal=item["percent_change"],
timestamp=item["timestamp"]
)
db.add(record)
saved_records.append(record)
db.commit()
return {"status": "success", "updated": len(saved_records)}
except Exception as e:
db.rollback()
logger.error(f"Database error saving tickers: {e}")
raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
@router.get("/latest")
async def get_latest_prices(db: Session = Depends(get_db)):
"""
Get the most recent price for each ticker symbol.
"""
# Subquery to find the latest timestamp for each symbol
# This is a bit complex in pure ORM, so we might do it simply for now:
# 1. Get list of distinct symbols we care about
# 2. Query latest for each
results = []
# We know the symbols we care about: HO=F, CL=F, RB=F
TARGET_SYMBOLS = ["HO=F", "CL=F", "RB=F"]
for symbol in TARGET_SYMBOLS:
latest = db.query(TickerPrice).filter(
TickerPrice.symbol == symbol
).order_by(TickerPrice.timestamp.desc()).first()
if latest:
results.append(latest.to_dict())
return results

View File

@@ -0,0 +1,65 @@
import logging
import yfinance as yf
from decimal import Decimal
from datetime import datetime
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
# Ticker mapping
# HO=F : Heating Oil
# CL=F : Crude Oil
# RB=F : RBOB Gasoline
TICKERS = ["HO=F", "CL=F", "RB=F"]
def fetch_ticker_data() -> List[Dict[str, Any]]:
"""
Fetch current data for oil tickers from Yahoo Finance.
Returns:
List of dictionaries containing ticker data.
"""
results = []
try:
# Fetch data for all tickers at once
tickers_str = " ".join(TICKERS)
data = yf.Tickers(tickers_str)
for symbol in TICKERS:
try:
ticker = data.tickers[symbol]
# Fast info usually contains the latest price
info = ticker.fast_info
# Fallback to history if fast_info is missing crucial data (simplified here)
# But fast_info is generally faster and sufficient for last_price
last_price = info.last_price
previous_close = info.previous_close
if last_price is None:
logger.warning(f"No price found for {symbol}")
continue
change = last_price - previous_close
percent_change = (change / previous_close) * 100 if previous_close else 0
results.append({
"symbol": symbol,
"price": Decimal(str(last_price)),
"currency": info.currency,
"change": Decimal(str(change)),
"percent_change": Decimal(str(percent_change)),
"timestamp": datetime.utcnow()
})
logger.info(f"Fetched {symbol}: {last_price} ({percent_change:.2f}%)")
except Exception as e:
logger.error(f"Error processing {symbol}: {e}")
except Exception as e:
logger.error(f"Error fetching ticker data: {e}")
return results

16
app/schemas.py Normal file
View File

@@ -0,0 +1,16 @@
from typing import Optional
from pydantic import BaseModel
class HealthResponse(BaseModel):
"""Health check response schema."""
status: str
db_connected: bool
class PriceRecord(BaseModel):
"""Single price record schema."""
company_name: str
town: Optional[str] = None
price_decimal: float
scrape_date: str
zone: str