feat: initial commit for oil price scraper service

FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures)
and competitor oil pricing from NewEnglandOil. Includes cron-driven
scraping, PostgreSQL storage, and REST endpoints for price retrieval.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:57:44 -05:00
commit af9c2f99e7
25 changed files with 1566 additions and 0 deletions

111
app/config.py Normal file
View File

@@ -0,0 +1,111 @@
"""
Configuration settings for eamco_scraper.
This module provides configuration with environment-based switching:
- LOCAL: Uses 'eamco' database, localhost CORS origins
- PRODUCTION: Uses 'auburnoil' database, production domain CORS origins
Environment variables are loaded from .env.local or .env.prod depending
on the Docker compose file used.
"""
import logging
import os
from typing import List
from dotenv import load_dotenv
# Load environment variables from .env file if present
load_dotenv()
# =============================================================================
# ENVIRONMENT MODE
# =============================================================================
MODE = os.getenv("MODE", "LOCAL")
CURRENT_SETTINGS = os.getenv("CURRENT_SETTINGS", "DEVELOPMENT")
# Log configuration mode (logger setup happens after config is loaded)
_config_mode_msg = f"Using {'PRODUCTION' if CURRENT_SETTINGS == 'PRODUCTION' else 'DEVELOPMENT'} configuration"
# =============================================================================
# DATABASE CONFIGURATION
# =============================================================================
# Database connection components (can be overridden individually)
POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME", "postgres")
POSTGRES_PW = os.getenv("POSTGRES_PW", "password")
POSTGRES_SERVER = os.getenv("POSTGRES_SERVER", "192.168.1.204")
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
# Database name differs by environment
if CURRENT_SETTINGS == "PRODUCTION":
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "auburnoil")
else:
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "eamco")
# Build connection URI from components (fallback)
_DEFAULT_DATABASE_URI = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
POSTGRES_USERNAME,
POSTGRES_PW,
POSTGRES_SERVER,
POSTGRES_PORT,
POSTGRES_DBNAME
)
# Allow full DATABASE_URL override
DATABASE_URL: str = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URI)
# SQLAlchemy binds (for compatibility)
SQLALCHEMY_DATABASE_URI = DATABASE_URL
SQLALCHEMY_BINDS = {POSTGRES_DBNAME: SQLALCHEMY_DATABASE_URI}
# =============================================================================
# CORS CONFIGURATION
# =============================================================================
# Parse CORS origins from environment (comma-separated) or use defaults
_cors_env = os.getenv("CORS_ORIGINS", "")
if _cors_env:
CORS_ORIGINS: List[str] = [origin.strip() for origin in _cors_env.split(",")]
elif CURRENT_SETTINGS == "PRODUCTION":
# Production CORS origins
CORS_ORIGINS = [
"https://oil.edwineames.com",
"https://edwineames.com",
]
else:
# Development CORS origins
CORS_ORIGINS = [
"http://localhost:9000",
"https://localhost:9513",
"http://localhost:9514",
"http://localhost:9512",
"http://localhost:9511",
"http://localhost:5173", # Frontend port
"http://localhost:9616", # Authorize service port
]
# =============================================================================
# SCRAPER CONFIGURATION
# =============================================================================
# User agent for web scraping (identifies your application)
SCRAPER_USER_AGENT: str = "Unraid-EamcoScraper/1.0 (eeames214@gmail.com)"
# Rate limiting: Sleep between requests (be respectful to target servers)
SCRAPER_DELAY_SECONDS: float = float(os.getenv("SCRAPER_DELAY", "2.0"))
# Request timeout in seconds
SCRAPER_TIMEOUT: int = int(os.getenv("SCRAPER_TIMEOUT", "10"))
# Target URL for New England Oil Zone 10
NEWENGLAND_OIL_ZONE10_URL: str = "https://www.newenglandoil.com/massachusetts/zone10.asp?x=0"
# =============================================================================
# LOGGING CONFIGURATION
# =============================================================================
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"