feat: initial commit for oil price scraper service
FastAPI-based scraper for commodity ticker prices (HO, CL, RB futures) and competitor oil pricing from NewEnglandOil. Includes cron-driven scraping, PostgreSQL storage, and REST endpoints for price retrieval. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
23
.env.example
Normal file
23
.env.example
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Environment variables for eamco_scraper - PRODUCTION
|
||||||
|
# Copy this to .env.prod and fill in actual values
|
||||||
|
|
||||||
|
# Application mode
|
||||||
|
MODE=PRODUCTION
|
||||||
|
CURRENT_SETTINGS=PRODUCTION
|
||||||
|
|
||||||
|
# Database configuration
|
||||||
|
POSTGRES_USERNAME=postgres
|
||||||
|
POSTGRES_PW=your_password_here
|
||||||
|
POSTGRES_SERVER=192.168.1.204
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_DBNAME=auburnoil
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# Scraper configuration
|
||||||
|
SCRAPER_DELAY=2.0
|
||||||
|
SCRAPER_TIMEOUT=10
|
||||||
|
|
||||||
|
# CORS (optional - comma-separated)
|
||||||
|
# CORS_ORIGINS=https://oil.edwineames.com,https://edwineames.com
|
||||||
132
.gitignore
vendored
Normal file
132
.gitignore
vendored
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having sub-dependencies with platform-specific binaries, it is better to ignore the Pipfile.lock.
|
||||||
|
# Pipfile.lock
|
||||||
|
|
||||||
|
# PEP 582; __pypackages__
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyderworkspace
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env.local
|
||||||
|
.env.prod
|
||||||
18
Dockerfile.dev
Normal file
18
Dockerfile.dev
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# eamco_scraper - DEVELOPMENT Dockerfile
|
||||||
|
# Minimal development setup
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev gcc && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["uvicorn", "app.main:app", "--reload", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
37
Dockerfile.local
Normal file
37
Dockerfile.local
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# eamco_scraper - LOCAL Dockerfile
|
||||||
|
# Used by docker-compose.local.yml
|
||||||
|
# Features: Volume mounts for development, hot reload
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy requirements
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy environment file for local
|
||||||
|
COPY .env.local .env
|
||||||
|
|
||||||
|
# Copy application code (will be overridden by volume mount)
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Development: Run with reload
|
||||||
|
CMD ["uvicorn", "app.main:app", "--reload", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
49
Dockerfile.prod
Normal file
49
Dockerfile.prod
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# eamco_scraper - PRODUCTION Dockerfile
|
||||||
|
# Used by docker-compose.prod.yml
|
||||||
|
# Features: Optimized for production, non-root user, health checks
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app \
|
||||||
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies (psycopg2 requirements)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& apt-get clean
|
||||||
|
|
||||||
|
# Copy requirements first (for better layer caching)
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy environment file for production
|
||||||
|
COPY .env.prod .env
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# Create non-root user for security
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||||
|
|
||||||
|
# Production: Run without reload, with workers
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
|
||||||
155
README.md
Normal file
155
README.md
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
# eamco_scraper
|
||||||
|
|
||||||
|
FastAPI microservice for scraping heating oil prices from New England Oil and storing historical pricing data.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This service scrapes oil company pricing data from the New England Oil website (Zone 10 - Central Massachusetts) and stores it in a PostgreSQL database for historical tracking and trend analysis.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Web Scraping**: Automated scraping of oil prices using BeautifulSoup4
|
||||||
|
- **Historical Tracking**: Stores all price records (no updates, only inserts) for trend analysis
|
||||||
|
- **Cron-Friendly**: Single GET request triggers scrape and storage
|
||||||
|
- **Health Checks**: Built-in health endpoint for monitoring
|
||||||
|
- **Docker Ready**: Production and development Docker configurations
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### `GET /health`
|
||||||
|
Health check endpoint with database connectivity status.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "healthy",
|
||||||
|
"db_connected": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### `GET /scraper/newenglandoil/latestprice`
|
||||||
|
Trigger scrape of New England Oil Zone 10 prices, store in database, and return results.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"message": "Successfully scraped and stored 30 prices",
|
||||||
|
"prices_scraped": 30,
|
||||||
|
"prices_stored": 30,
|
||||||
|
"scrape_timestamp": "2026-02-07T22:00:00",
|
||||||
|
"prices": [
|
||||||
|
{
|
||||||
|
"company_name": "AUBURN OIL",
|
||||||
|
"town": "Auburn",
|
||||||
|
"price_decimal": 2.599,
|
||||||
|
"scrape_date": "2026-02-07",
|
||||||
|
"zone": "zone10"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
### `company_prices` Table
|
||||||
|
|
||||||
|
| Column | Type | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| id | SERIAL | Primary key |
|
||||||
|
| company_name | VARCHAR(255) | Oil company name |
|
||||||
|
| town | VARCHAR(100) | Town/city |
|
||||||
|
| price_decimal | DECIMAL(6,3) | Price per gallon |
|
||||||
|
| scrape_date | DATE | Date price was listed |
|
||||||
|
| zone | VARCHAR(50) | Geographic zone (default: zone10) |
|
||||||
|
| created_at | TIMESTAMP | Record creation timestamp |
|
||||||
|
|
||||||
|
**Indexes:**
|
||||||
|
- `idx_company_prices_company` on `company_name`
|
||||||
|
- `idx_company_prices_scrape_date` on `scrape_date`
|
||||||
|
- `idx_company_prices_zone` on `zone`
|
||||||
|
- `idx_company_prices_company_date` on `(company_name, scrape_date)`
|
||||||
|
- `idx_company_prices_zone_date` on `(zone, scrape_date)`
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Local Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create virtual environment
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Copy environment file
|
||||||
|
cp .env.example .env.local
|
||||||
|
|
||||||
|
# Edit .env.local with your database credentials
|
||||||
|
|
||||||
|
# Run the application
|
||||||
|
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Local
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /mnt/code/oil/eamco/eamco_deploy
|
||||||
|
docker-compose -f docker-compose.local.yml up scraper_local
|
||||||
|
```
|
||||||
|
|
||||||
|
Access at: http://localhost:9619
|
||||||
|
|
||||||
|
## Production
|
||||||
|
|
||||||
|
### Docker Production
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /mnt/code/oil/eamco/eamco_deploy
|
||||||
|
docker-compose -f docker-compose.prod.yml up -d scraper_prod
|
||||||
|
```
|
||||||
|
|
||||||
|
Access at: http://192.168.1.204:9519
|
||||||
|
|
||||||
|
## Cron Integration
|
||||||
|
|
||||||
|
Add to Unraid cron or system crontab:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Scrape prices daily at 6 AM
|
||||||
|
0 6 * * * curl -s http://192.168.1.204:9619/scraper/newenglandoil/latestprice > /dev/null 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| MODE | Application mode (LOCAL/PRODUCTION) | LOCAL |
|
||||||
|
| POSTGRES_USERNAME | Database username | postgres |
|
||||||
|
| POSTGRES_PW | Database password | password |
|
||||||
|
| POSTGRES_SERVER | Database server | 192.168.1.204 |
|
||||||
|
| POSTGRES_PORT | Database port | 5432 |
|
||||||
|
| POSTGRES_DBNAME | Database name | eamco |
|
||||||
|
| LOG_LEVEL | Logging level | INFO |
|
||||||
|
| SCRAPER_DELAY | Delay between requests (seconds) | 2.0 |
|
||||||
|
| SCRAPER_TIMEOUT | Request timeout (seconds) | 10 |
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
- **Framework**: FastAPI 0.109+
|
||||||
|
- **Database**: PostgreSQL 15+ with SQLAlchemy 2.0
|
||||||
|
- **Scraping**: BeautifulSoup4 + lxml + requests
|
||||||
|
- **Server**: Uvicorn with 2 workers (production)
|
||||||
|
|
||||||
|
## Ports
|
||||||
|
|
||||||
|
- **Local Development**: 9619
|
||||||
|
- **Production**: 9519
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
- Frontend display on Home page (table or cards)
|
||||||
|
- Price change alerts/notifications
|
||||||
|
- Support for additional zones
|
||||||
|
- Price trend graphs and analytics
|
||||||
1
app/__init__.py
Normal file
1
app/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# eamco_scraper app package
|
||||||
111
app/config.py
Normal file
111
app/config.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
"""
|
||||||
|
Configuration settings for eamco_scraper.
|
||||||
|
|
||||||
|
This module provides configuration with environment-based switching:
|
||||||
|
- LOCAL: Uses 'eamco' database, localhost CORS origins
|
||||||
|
- PRODUCTION: Uses 'auburnoil' database, production domain CORS origins
|
||||||
|
|
||||||
|
Environment variables are loaded from .env.local or .env.prod depending
|
||||||
|
on the Docker compose file used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables from .env file if present
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ENVIRONMENT MODE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
MODE = os.getenv("MODE", "LOCAL")
|
||||||
|
CURRENT_SETTINGS = os.getenv("CURRENT_SETTINGS", "DEVELOPMENT")
|
||||||
|
|
||||||
|
# Log configuration mode (logger setup happens after config is loaded)
|
||||||
|
_config_mode_msg = f"Using {'PRODUCTION' if CURRENT_SETTINGS == 'PRODUCTION' else 'DEVELOPMENT'} configuration"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DATABASE CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Database connection components (can be overridden individually)
|
||||||
|
POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME", "postgres")
|
||||||
|
POSTGRES_PW = os.getenv("POSTGRES_PW", "password")
|
||||||
|
POSTGRES_SERVER = os.getenv("POSTGRES_SERVER", "192.168.1.204")
|
||||||
|
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
|
||||||
|
|
||||||
|
# Database name differs by environment
|
||||||
|
if CURRENT_SETTINGS == "PRODUCTION":
|
||||||
|
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "auburnoil")
|
||||||
|
else:
|
||||||
|
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "eamco")
|
||||||
|
|
||||||
|
# Build connection URI from components (fallback)
|
||||||
|
_DEFAULT_DATABASE_URI = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
|
||||||
|
POSTGRES_USERNAME,
|
||||||
|
POSTGRES_PW,
|
||||||
|
POSTGRES_SERVER,
|
||||||
|
POSTGRES_PORT,
|
||||||
|
POSTGRES_DBNAME
|
||||||
|
)
|
||||||
|
|
||||||
|
# Allow full DATABASE_URL override
|
||||||
|
DATABASE_URL: str = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URI)
|
||||||
|
|
||||||
|
# SQLAlchemy binds (for compatibility)
|
||||||
|
SQLALCHEMY_DATABASE_URI = DATABASE_URL
|
||||||
|
SQLALCHEMY_BINDS = {POSTGRES_DBNAME: SQLALCHEMY_DATABASE_URI}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORS CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Parse CORS origins from environment (comma-separated) or use defaults
|
||||||
|
_cors_env = os.getenv("CORS_ORIGINS", "")
|
||||||
|
|
||||||
|
if _cors_env:
|
||||||
|
CORS_ORIGINS: List[str] = [origin.strip() for origin in _cors_env.split(",")]
|
||||||
|
elif CURRENT_SETTINGS == "PRODUCTION":
|
||||||
|
# Production CORS origins
|
||||||
|
CORS_ORIGINS = [
|
||||||
|
"https://oil.edwineames.com",
|
||||||
|
"https://edwineames.com",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# Development CORS origins
|
||||||
|
CORS_ORIGINS = [
|
||||||
|
"http://localhost:9000",
|
||||||
|
"https://localhost:9513",
|
||||||
|
"http://localhost:9514",
|
||||||
|
"http://localhost:9512",
|
||||||
|
"http://localhost:9511",
|
||||||
|
"http://localhost:5173", # Frontend port
|
||||||
|
"http://localhost:9616", # Authorize service port
|
||||||
|
]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SCRAPER CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# User agent for web scraping (identifies your application)
|
||||||
|
SCRAPER_USER_AGENT: str = "Unraid-EamcoScraper/1.0 (eeames214@gmail.com)"
|
||||||
|
|
||||||
|
# Rate limiting: Sleep between requests (be respectful to target servers)
|
||||||
|
SCRAPER_DELAY_SECONDS: float = float(os.getenv("SCRAPER_DELAY", "2.0"))
|
||||||
|
|
||||||
|
# Request timeout in seconds
|
||||||
|
SCRAPER_TIMEOUT: int = int(os.getenv("SCRAPER_TIMEOUT", "10"))
|
||||||
|
|
||||||
|
# Target URL for New England Oil Zone 10
|
||||||
|
NEWENGLAND_OIL_ZONE10_URL: str = "https://www.newenglandoil.com/massachusetts/zone10.asp?x=0"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOGGING CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
||||||
|
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
88
app/database.py
Normal file
88
app/database.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Generator
|
||||||
|
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
from sqlalchemy.orm import sessionmaker, Session
|
||||||
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
||||||
|
from app.config import (
|
||||||
|
DATABASE_URL,
|
||||||
|
LOG_LEVEL,
|
||||||
|
LOG_FORMAT,
|
||||||
|
)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOGGING CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
||||||
|
format=LOG_FORMAT,
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DATABASE SETUP
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Create SQLAlchemy engine with connection pooling
|
||||||
|
engine = create_engine(
|
||||||
|
DATABASE_URL,
|
||||||
|
pool_pre_ping=True, # Verify connections before use
|
||||||
|
pool_size=5,
|
||||||
|
max_overflow=10,
|
||||||
|
echo=False, # Set to True for SQL debugging
|
||||||
|
)
|
||||||
|
|
||||||
|
# Session factory
|
||||||
|
SessionLocal = sessionmaker(
|
||||||
|
autocommit=False,
|
||||||
|
autoflush=False,
|
||||||
|
bind=engine,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_db() -> Generator[Session, None, None]:
|
||||||
|
"""
|
||||||
|
Dependency that provides a database session.
|
||||||
|
|
||||||
|
Yields a SQLAlchemy session and ensures proper cleanup.
|
||||||
|
"""
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_db_session() -> Generator[Session, None, None]:
|
||||||
|
"""
|
||||||
|
Context manager for database sessions (non-dependency use).
|
||||||
|
"""
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def check_db_connection() -> bool:
|
||||||
|
"""
|
||||||
|
Test database connectivity.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if database is reachable, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with get_db_session() as db:
|
||||||
|
db.execute(text("SELECT 1"))
|
||||||
|
return True
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
logger.error(f"Database connection failed: {e}")
|
||||||
|
return False
|
||||||
195
app/main.py
Normal file
195
app/main.py
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
"""
|
||||||
|
eamco_scraper - FastAPI Oil Price Scraping Microservice.
|
||||||
|
|
||||||
|
This microservice provides endpoints for scraping oil prices from New England Oil
|
||||||
|
and storing them in the database for historical tracking.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
GET /health - Health check with database connectivity status
|
||||||
|
GET /scraper/newenglandoil/latestprice - Trigger scrape and return latest prices
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Development
|
||||||
|
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||||
|
|
||||||
|
# Production (Docker)
|
||||||
|
docker run -p 8000:8000 eamco_scraper
|
||||||
|
|
||||||
|
# Trigger from cron
|
||||||
|
curl http://localhost:8000/scraper/newenglandoil/latestprice
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import List
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Depends, HTTPException
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import (
|
||||||
|
DATABASE_URL,
|
||||||
|
CORS_ORIGINS,
|
||||||
|
LOG_LEVEL,
|
||||||
|
LOG_FORMAT,
|
||||||
|
)
|
||||||
|
from app.models import Base, CompanyPrice
|
||||||
|
from app.database import engine, get_db, check_db_connection
|
||||||
|
from app.schemas import HealthResponse, PriceRecord
|
||||||
|
from app.newenglandoil.router import router as newenglandoil_router
|
||||||
|
from app.priceticker.router import router as priceticker_router
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOGGING CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
||||||
|
format=LOG_FORMAT,
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FASTAPI APPLICATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="eamco_scraper",
|
||||||
|
description="Oil price scraping microservice for New England Oil",
|
||||||
|
version="1.0.0",
|
||||||
|
docs_url="/docs",
|
||||||
|
redoc_url="/redoc",
|
||||||
|
)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORS MIDDLEWARE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=CORS_ORIGINS,
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ROUTERS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
app.include_router(newenglandoil_router)
|
||||||
|
app.include_router(priceticker_router)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ENDPOINTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", include_in_schema=False)
|
||||||
|
async def root():
|
||||||
|
"""Root endpoint - redirect to docs."""
|
||||||
|
return {
|
||||||
|
"service": "eamco_scraper",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"docs": "/docs",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health", response_model=HealthResponse, tags=["Health"])
|
||||||
|
async def health_check():
|
||||||
|
"""
|
||||||
|
Health check endpoint.
|
||||||
|
|
||||||
|
Returns service status and database connectivity.
|
||||||
|
Use this endpoint for container health checks and monitoring.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
HealthResponse with status and db_connected flag
|
||||||
|
"""
|
||||||
|
db_connected = check_db_connection()
|
||||||
|
|
||||||
|
return HealthResponse(
|
||||||
|
status="healthy" if db_connected else "degraded",
|
||||||
|
db_connected=db_connected,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/scraper/prices",
|
||||||
|
response_model=List[PriceRecord],
|
||||||
|
tags=["Prices"],
|
||||||
|
)
|
||||||
|
async def get_stored_prices(
|
||||||
|
date: str | None = None,
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get stored oil prices from the database.
|
||||||
|
|
||||||
|
If no date is provided, returns prices for the current date (UTC).
|
||||||
|
Does NOT trigger a scrape.
|
||||||
|
"""
|
||||||
|
if not date:
|
||||||
|
date = datetime.utcnow().date().isoformat()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Query prices for the specific date
|
||||||
|
prices = db.query(CompanyPrice).filter(
|
||||||
|
CompanyPrice.scrape_date == date
|
||||||
|
).all()
|
||||||
|
|
||||||
|
return [
|
||||||
|
PriceRecord(
|
||||||
|
company_name=p.company_name,
|
||||||
|
town=p.town,
|
||||||
|
price_decimal=float(p.price_decimal),
|
||||||
|
scrape_date=str(p.scrape_date),
|
||||||
|
zone=p.zone
|
||||||
|
) for p in prices
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching prices: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# STARTUP/SHUTDOWN EVENTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Application startup - log configuration and test DB connection."""
|
||||||
|
logger.info("🚀 eamco_scraper STARTING")
|
||||||
|
mode = os.environ.get('MODE', 'DEVELOPMENT').upper()
|
||||||
|
if mode in ['DEVELOPMENT', 'DEV', 'LOCAL']:
|
||||||
|
logger.info("🤖🤖🤖🤖🤖 Mode: Development 🤖🤖🤖🤖🤖")
|
||||||
|
elif mode in ['PRODUCTION', 'PROD']:
|
||||||
|
logger.info("💀💀💀💀💀💀💀💀💀💀 ⚠️ WARNING PRODUCTION 💀💀💀💀💀💀💀💀💀💀")
|
||||||
|
logger.info(f"DB: {DATABASE_URL[:30]}...")
|
||||||
|
logger.info(f"CORS: {len(CORS_ORIGINS)} origins configured")
|
||||||
|
|
||||||
|
# Test database connection
|
||||||
|
if check_db_connection():
|
||||||
|
logger.info("DB Connection: ✅ OK")
|
||||||
|
else:
|
||||||
|
logger.warning("DB Connection: ❌ FAILED")
|
||||||
|
|
||||||
|
# Create tables if they don't exist
|
||||||
|
try:
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
logger.info("Database tables verified/created")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to create tables: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown_event():
|
||||||
|
"""Application shutdown - cleanup."""
|
||||||
|
logger.info("🛑 eamco_scraper SHUTTING DOWN")
|
||||||
99
app/models.py
Normal file
99
app/models.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
"""
|
||||||
|
SQLAlchemy models for eamco_scraper.
|
||||||
|
|
||||||
|
This module defines the database models for storing scraped oil price data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from sqlalchemy import Column, Integer, String, Numeric, Date, DateTime, Index
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class CompanyPrice(Base):
|
||||||
|
"""
|
||||||
|
Model for storing oil company pricing data.
|
||||||
|
|
||||||
|
This table stores historical pricing data from oil companies.
|
||||||
|
Each scrape creates new records (no updates) to enable price trend analysis.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
id: Primary key
|
||||||
|
company_name: Name of the oil company
|
||||||
|
town: Town where the company operates
|
||||||
|
price_decimal: Price per gallon (e.g., 2.599)
|
||||||
|
scrape_date: Date when the price was listed on the website
|
||||||
|
zone: Geographic zone (default: 'zone10' for Central Massachusetts)
|
||||||
|
created_at: Timestamp when the record was inserted into database
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "company_prices"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
company_name = Column(String(255), nullable=False, index=True)
|
||||||
|
town = Column(String(100), nullable=True)
|
||||||
|
price_decimal = Column(Numeric(6, 3), nullable=False)
|
||||||
|
scrape_date = Column(Date, nullable=False, index=True)
|
||||||
|
zone = Column(String(50), nullable=False, default="zone10", index=True)
|
||||||
|
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<CompanyPrice(company='{self.company_name}', price={self.price_decimal}, date={self.scrape_date})>"
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
"""Convert model instance to dictionary for JSON serialization."""
|
||||||
|
return {
|
||||||
|
"id": self.id,
|
||||||
|
"company_name": self.company_name,
|
||||||
|
"town": self.town,
|
||||||
|
"price_decimal": float(self.price_decimal) if self.price_decimal else None,
|
||||||
|
"scrape_date": self.scrape_date.isoformat() if self.scrape_date else None,
|
||||||
|
"zone": self.zone,
|
||||||
|
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Create composite indexes for common queries
|
||||||
|
Index('idx_company_prices_company_date', CompanyPrice.company_name, CompanyPrice.scrape_date)
|
||||||
|
Index('idx_company_prices_zone_date', CompanyPrice.zone, CompanyPrice.scrape_date)
|
||||||
|
|
||||||
|
|
||||||
|
class TickerPrice(Base):
|
||||||
|
"""
|
||||||
|
Model for storing ticker prices (Stocks/Commodities).
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
id: Primary key
|
||||||
|
symbol: Ticker symbol (e.g., HO=F, CL=F, RB=F)
|
||||||
|
price_decimal: Current price
|
||||||
|
currency: Currency code (e.g., USD)
|
||||||
|
change_decimal: Price change amount
|
||||||
|
percent_change_decimal: Price change percentage
|
||||||
|
timestamp: Time of scrape
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "ticker_prices"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
symbol = Column(String(20), nullable=False, index=True)
|
||||||
|
price_decimal = Column(Numeric(10, 4), nullable=False)
|
||||||
|
currency = Column(String(10), nullable=True)
|
||||||
|
change_decimal = Column(Numeric(10, 4), nullable=True)
|
||||||
|
percent_change_decimal = Column(Numeric(10, 4), nullable=True)
|
||||||
|
timestamp = Column(DateTime, nullable=False, default=datetime.utcnow, index=True)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<TickerPrice(symbol='{self.symbol}', price={self.price_decimal}, time={self.timestamp})>"
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return {
|
||||||
|
"id": self.id,
|
||||||
|
"symbol": self.symbol,
|
||||||
|
"price": float(self.price_decimal) if self.price_decimal is not None else None,
|
||||||
|
"currency": self.currency,
|
||||||
|
"change": float(self.change_decimal) if self.change_decimal is not None else None,
|
||||||
|
"percent_change": float(self.percent_change_decimal) if self.percent_change_decimal is not None else None,
|
||||||
|
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
|
||||||
|
}
|
||||||
|
|
||||||
9
app/newenglandoil/__init__.py
Normal file
9
app/newenglandoil/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
"""
|
||||||
|
New England Oil Scraper Module.
|
||||||
|
|
||||||
|
This package contains code specific to scraping prices from the New England Oil website.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
|
||||||
|
|
||||||
|
__all__ = ['scrape_newengland_oil', 'ScraperError']
|
||||||
140
app/newenglandoil/router.py
Normal file
140
app/newenglandoil/router.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
||||||
|
from app.models import CompanyPrice
|
||||||
|
from app.schemas import PriceRecord
|
||||||
|
from app.newenglandoil.schemas import LatestPriceResponse
|
||||||
|
from app.newenglandoil.scraper import scrape_newengland_oil, ScraperError
|
||||||
|
from app.database import get_db
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/scraper/newenglandoil",
|
||||||
|
tags=["New England Oil Scraper"],
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/latestprice",
|
||||||
|
response_model=LatestPriceResponse,
|
||||||
|
)
|
||||||
|
async def get_latest_prices(db: Session = Depends(get_db)):
|
||||||
|
"""
|
||||||
|
Scrape latest oil prices from New England Oil Zone 10.
|
||||||
|
|
||||||
|
This endpoint:
|
||||||
|
1. Scrapes the New England Oil website for current prices
|
||||||
|
2. Stores all prices in the database (historical tracking)
|
||||||
|
3. Returns the scraped prices
|
||||||
|
|
||||||
|
Designed to be called from cron for automated price tracking.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
curl http://localhost:8000/scraper/newenglandoil/latestprice
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LatestPriceResponse with scraped prices and storage statistics
|
||||||
|
"""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("SCRAPER ENDPOINT CALLED - New England Oil Zone 10")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Scrape the website
|
||||||
|
scraped_prices = scrape_newengland_oil()
|
||||||
|
|
||||||
|
if not scraped_prices:
|
||||||
|
logger.warning("No prices were scraped")
|
||||||
|
return LatestPriceResponse(
|
||||||
|
status="warning",
|
||||||
|
message="No prices found on website",
|
||||||
|
prices_scraped=0,
|
||||||
|
prices_stored=0,
|
||||||
|
scrape_timestamp=datetime.utcnow().isoformat(),
|
||||||
|
prices=[]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Store prices in database
|
||||||
|
stored_count = 0
|
||||||
|
price_records = []
|
||||||
|
|
||||||
|
for price_data in scraped_prices:
|
||||||
|
# Add to response list (regardless of whether it's new or existing)
|
||||||
|
price_records.append(PriceRecord(
|
||||||
|
company_name=price_data["company_name"],
|
||||||
|
town=price_data.get("town"),
|
||||||
|
price_decimal=float(price_data["price_decimal"]),
|
||||||
|
scrape_date=price_data["scrape_date"].isoformat(),
|
||||||
|
zone=price_data.get("zone", "zone10"),
|
||||||
|
))
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check for existing record to prevent duplicates
|
||||||
|
existing_record = db.query(CompanyPrice).filter(
|
||||||
|
CompanyPrice.company_name == price_data["company_name"],
|
||||||
|
CompanyPrice.scrape_date == price_data["scrape_date"],
|
||||||
|
CompanyPrice.zone == price_data.get("zone", "zone10")
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if existing_record:
|
||||||
|
logger.debug(f"Skipping duplicate record for {price_data['company_name']} on {price_data['scrape_date']}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create database record
|
||||||
|
price_record = CompanyPrice(
|
||||||
|
company_name=price_data["company_name"],
|
||||||
|
town=price_data.get("town"),
|
||||||
|
price_decimal=price_data["price_decimal"],
|
||||||
|
scrape_date=price_data["scrape_date"],
|
||||||
|
zone=price_data.get("zone", "zone10"),
|
||||||
|
)
|
||||||
|
|
||||||
|
db.add(price_record)
|
||||||
|
stored_count += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to store price for {price_data.get('company_name')}: {e}")
|
||||||
|
|
||||||
|
# Commit all new records
|
||||||
|
if stored_count > 0:
|
||||||
|
db.commit()
|
||||||
|
logger.info(f"Successfully stored {stored_count} new price records")
|
||||||
|
else:
|
||||||
|
logger.info("No new price records to store (all duplicates)")
|
||||||
|
|
||||||
|
return LatestPriceResponse(
|
||||||
|
status="success",
|
||||||
|
message=f"Successfully scraped {len(scraped_prices)} prices, stored {stored_count} new records",
|
||||||
|
prices_scraped=len(scraped_prices),
|
||||||
|
prices_stored=stored_count,
|
||||||
|
scrape_timestamp=datetime.utcnow().isoformat(),
|
||||||
|
prices=price_records,
|
||||||
|
)
|
||||||
|
|
||||||
|
except ScraperError as e:
|
||||||
|
logger.error(f"Scraper error: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Scraping failed: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
db.rollback()
|
||||||
|
logger.error(f"Database error during price storage: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Database error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
db.rollback()
|
||||||
|
logger.error(f"Unexpected error during scraping: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Scraping failed: {str(e)}"
|
||||||
|
)
|
||||||
12
app/newenglandoil/schemas.py
Normal file
12
app/newenglandoil/schemas.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from typing import List
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from app.schemas import PriceRecord
|
||||||
|
|
||||||
|
class LatestPriceResponse(BaseModel):
|
||||||
|
"""Latest price scrape response schema."""
|
||||||
|
status: str
|
||||||
|
message: str
|
||||||
|
prices_scraped: int
|
||||||
|
prices_stored: int
|
||||||
|
scrape_timestamp: str
|
||||||
|
prices: List[PriceRecord]
|
||||||
170
app/newenglandoil/scraper.py
Normal file
170
app/newenglandoil/scraper.py
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
"""
|
||||||
|
Web scraping module for New England Oil prices.
|
||||||
|
|
||||||
|
This module handles scraping oil price data from the New England Oil website
|
||||||
|
for Zone 10 (Central Massachusetts).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from datetime import date
|
||||||
|
from typing import List, Dict, Optional
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from app.config import (
|
||||||
|
NEWENGLAND_OIL_ZONE10_URL,
|
||||||
|
SCRAPER_USER_AGENT,
|
||||||
|
SCRAPER_TIMEOUT,
|
||||||
|
SCRAPER_DELAY_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperError(Exception):
|
||||||
|
"""Custom exception for scraper errors."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_newengland_oil() -> List[Dict[str, any]]:
|
||||||
|
"""
|
||||||
|
Scrape oil prices from New England Oil Zone 10 page.
|
||||||
|
|
||||||
|
Fetches the page, parses the HTML table, and extracts company names,
|
||||||
|
towns, and prices.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries with keys: company_name, town, price_decimal, scrape_date, zone
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ScraperError: If the request fails or parsing fails
|
||||||
|
"""
|
||||||
|
logger.info(f"Starting scrape of {NEWENGLAND_OIL_ZONE10_URL}")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": SCRAPER_USER_AGENT,
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding": "gzip, deflate",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Make the request
|
||||||
|
response = requests.get(
|
||||||
|
NEWENGLAND_OIL_ZONE10_URL,
|
||||||
|
headers=headers,
|
||||||
|
timeout=SCRAPER_TIMEOUT
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
logger.info(f"Successfully fetched page (status: {response.status_code})")
|
||||||
|
|
||||||
|
# Parse HTML
|
||||||
|
soup = BeautifulSoup(response.content, 'lxml')
|
||||||
|
|
||||||
|
# Find the price table
|
||||||
|
# The table typically has company names, towns, and prices
|
||||||
|
# We need to inspect the actual HTML structure
|
||||||
|
prices = []
|
||||||
|
today = date.today()
|
||||||
|
|
||||||
|
# Look for table rows with price data
|
||||||
|
# The structure appears to be: company links followed by town and price info
|
||||||
|
# We'll look for patterns in the HTML
|
||||||
|
|
||||||
|
# Find all table rows
|
||||||
|
tables = soup.find_all('table')
|
||||||
|
|
||||||
|
if not tables:
|
||||||
|
logger.warning("No tables found on page")
|
||||||
|
# Debug: Save HTMl to file
|
||||||
|
with open("debug_page.html", "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
raise ScraperError("No price table found on page")
|
||||||
|
|
||||||
|
# The main price table is usually the largest one or contains specific markers
|
||||||
|
# Let's find rows that contain price information
|
||||||
|
for table in tables:
|
||||||
|
rows = table.find_all('tr')
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
cells = row.find_all(['td', 'th'])
|
||||||
|
|
||||||
|
if len(cells) >= 3: # Expect at least company, town, price
|
||||||
|
# Try to extract company name (usually in a link)
|
||||||
|
company_link = row.find('a')
|
||||||
|
if company_link:
|
||||||
|
company_name = company_link.get_text(strip=True)
|
||||||
|
|
||||||
|
# Extract text from all cells
|
||||||
|
cell_texts = [cell.get_text(strip=True) for cell in cells]
|
||||||
|
|
||||||
|
# Look for price pattern (e.g., "$2.599" or "2.599")
|
||||||
|
price_value = None
|
||||||
|
town_value = None
|
||||||
|
|
||||||
|
for text in cell_texts:
|
||||||
|
# Check if this looks like a price
|
||||||
|
text_clean = text.replace('$', '').replace(',', '').strip()
|
||||||
|
try:
|
||||||
|
# Try to parse as decimal
|
||||||
|
if text_clean and '.' in text_clean:
|
||||||
|
potential_price = Decimal(text_clean)
|
||||||
|
# Reasonable price range for heating oil (0.50 to 10.00)
|
||||||
|
if Decimal('0.50') <= potential_price <= Decimal('10.00'):
|
||||||
|
price_value = potential_price
|
||||||
|
break
|
||||||
|
except (ValueError, ArithmeticError):
|
||||||
|
# Not a valid price, might be town name
|
||||||
|
if text and not text.startswith('$') and len(text) > 2:
|
||||||
|
if not town_value: # Take first non-price text as town
|
||||||
|
town_value = text
|
||||||
|
|
||||||
|
if price_value:
|
||||||
|
prices.append({
|
||||||
|
"company_name": company_name,
|
||||||
|
"town": town_value,
|
||||||
|
"price_decimal": price_value,
|
||||||
|
"scrape_date": today,
|
||||||
|
"zone": "zone10"
|
||||||
|
})
|
||||||
|
logger.debug(f"Found: {company_name} - {town_value} - ${price_value}")
|
||||||
|
|
||||||
|
if not prices:
|
||||||
|
logger.warning("No prices extracted from page")
|
||||||
|
raise ScraperError("Failed to extract any price data from page")
|
||||||
|
|
||||||
|
logger.info(f"Successfully scraped {len(prices)} price records")
|
||||||
|
return prices
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logger.error(f"Request failed: {e}")
|
||||||
|
raise ScraperError(f"Failed to fetch page: {str(e)}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Scraping failed: {e}", exc_info=True)
|
||||||
|
raise ScraperError(f"Failed to parse page: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_and_delay() -> List[Dict[str, any]]:
|
||||||
|
"""
|
||||||
|
Scrape prices and apply rate limiting delay.
|
||||||
|
|
||||||
|
This is a convenience function that scrapes and then sleeps
|
||||||
|
to respect rate limits.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of price dictionaries
|
||||||
|
"""
|
||||||
|
prices = scrape_newengland_oil()
|
||||||
|
|
||||||
|
# Apply rate limiting delay
|
||||||
|
if SCRAPER_DELAY_SECONDS > 0:
|
||||||
|
logger.debug(f"Sleeping {SCRAPER_DELAY_SECONDS}s for rate limiting")
|
||||||
|
time.sleep(SCRAPER_DELAY_SECONDS)
|
||||||
|
|
||||||
|
return prices
|
||||||
0
app/priceticker/__init__.py
Normal file
0
app/priceticker/__init__.py
Normal file
72
app/priceticker/router.py
Normal file
72
app/priceticker/router.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from typing import List
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from app.database import get_db
|
||||||
|
from app.models import TickerPrice
|
||||||
|
from app.priceticker.scraper import fetch_ticker_data
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/scraper/priceticker",
|
||||||
|
tags=["Price Ticker"]
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.post("/update")
|
||||||
|
async def update_prices(db: Session = Depends(get_db)):
|
||||||
|
"""
|
||||||
|
Trigger an immediate update of stock/commodity prices.
|
||||||
|
"""
|
||||||
|
logger.info("Triggering ticker update...")
|
||||||
|
data = fetch_ticker_data()
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to fetch ticker data")
|
||||||
|
|
||||||
|
try:
|
||||||
|
saved_records = []
|
||||||
|
for item in data:
|
||||||
|
record = TickerPrice(
|
||||||
|
symbol=item["symbol"],
|
||||||
|
price_decimal=item["price"],
|
||||||
|
currency=item["currency"],
|
||||||
|
change_decimal=item["change"],
|
||||||
|
percent_change_decimal=item["percent_change"],
|
||||||
|
timestamp=item["timestamp"]
|
||||||
|
)
|
||||||
|
db.add(record)
|
||||||
|
saved_records.append(record)
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
return {"status": "success", "updated": len(saved_records)}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
db.rollback()
|
||||||
|
logger.error(f"Database error saving tickers: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
|
||||||
|
|
||||||
|
@router.get("/latest")
|
||||||
|
async def get_latest_prices(db: Session = Depends(get_db)):
|
||||||
|
"""
|
||||||
|
Get the most recent price for each ticker symbol.
|
||||||
|
"""
|
||||||
|
# Subquery to find the latest timestamp for each symbol
|
||||||
|
# This is a bit complex in pure ORM, so we might do it simply for now:
|
||||||
|
# 1. Get list of distinct symbols we care about
|
||||||
|
# 2. Query latest for each
|
||||||
|
|
||||||
|
results = []
|
||||||
|
# We know the symbols we care about: HO=F, CL=F, RB=F
|
||||||
|
TARGET_SYMBOLS = ["HO=F", "CL=F", "RB=F"]
|
||||||
|
|
||||||
|
for symbol in TARGET_SYMBOLS:
|
||||||
|
latest = db.query(TickerPrice).filter(
|
||||||
|
TickerPrice.symbol == symbol
|
||||||
|
).order_by(TickerPrice.timestamp.desc()).first()
|
||||||
|
|
||||||
|
if latest:
|
||||||
|
results.append(latest.to_dict())
|
||||||
|
|
||||||
|
return results
|
||||||
65
app/priceticker/scraper.py
Normal file
65
app/priceticker/scraper.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import logging
|
||||||
|
import yfinance as yf
|
||||||
|
from decimal import Decimal
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Ticker mapping
|
||||||
|
# HO=F : Heating Oil
|
||||||
|
# CL=F : Crude Oil
|
||||||
|
# RB=F : RBOB Gasoline
|
||||||
|
TICKERS = ["HO=F", "CL=F", "RB=F"]
|
||||||
|
|
||||||
|
def fetch_ticker_data() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Fetch current data for oil tickers from Yahoo Finance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries containing ticker data.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch data for all tickers at once
|
||||||
|
tickers_str = " ".join(TICKERS)
|
||||||
|
data = yf.Tickers(tickers_str)
|
||||||
|
|
||||||
|
for symbol in TICKERS:
|
||||||
|
try:
|
||||||
|
ticker = data.tickers[symbol]
|
||||||
|
# Fast info usually contains the latest price
|
||||||
|
info = ticker.fast_info
|
||||||
|
|
||||||
|
# Fallback to history if fast_info is missing crucial data (simplified here)
|
||||||
|
# But fast_info is generally faster and sufficient for last_price
|
||||||
|
|
||||||
|
last_price = info.last_price
|
||||||
|
previous_close = info.previous_close
|
||||||
|
|
||||||
|
if last_price is None:
|
||||||
|
logger.warning(f"No price found for {symbol}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
change = last_price - previous_close
|
||||||
|
percent_change = (change / previous_close) * 100 if previous_close else 0
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"symbol": symbol,
|
||||||
|
"price": Decimal(str(last_price)),
|
||||||
|
"currency": info.currency,
|
||||||
|
"change": Decimal(str(change)),
|
||||||
|
"percent_change": Decimal(str(percent_change)),
|
||||||
|
"timestamp": datetime.utcnow()
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(f"Fetched {symbol}: {last_price} ({percent_change:.2f}%)")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing {symbol}: {e}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching ticker data: {e}")
|
||||||
|
|
||||||
|
return results
|
||||||
16
app/schemas.py
Normal file
16
app/schemas.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
from typing import Optional
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class HealthResponse(BaseModel):
|
||||||
|
"""Health check response schema."""
|
||||||
|
status: str
|
||||||
|
db_connected: bool
|
||||||
|
|
||||||
|
|
||||||
|
class PriceRecord(BaseModel):
|
||||||
|
"""Single price record schema."""
|
||||||
|
company_name: str
|
||||||
|
town: Optional[str] = None
|
||||||
|
price_decimal: float
|
||||||
|
scrape_date: str
|
||||||
|
zone: str
|
||||||
1
cronjob.txt
Normal file
1
cronjob.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
*/5 * * * * /usr/bin/curl -m 120 -s http://localhost:9519/scraper/newenglandoil/latestprice &>/dev/null
|
||||||
26
debug_scraper.py
Normal file
26
debug_scraper.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
url = "https://www.newenglandoil.com/massachusetts/zone10.asp?x=0"
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers, timeout=15)
|
||||||
|
print(f"Status Code: {response.status_code}")
|
||||||
|
|
||||||
|
with open("debug_page.html", "w", encoding="utf-8") as f:
|
||||||
|
f.write(response.text)
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.content, 'lxml')
|
||||||
|
tables = soup.find_all('table')
|
||||||
|
print(f"Tables found: {len(tables)}")
|
||||||
|
|
||||||
|
if not tables:
|
||||||
|
print("Preview of content:")
|
||||||
|
print(response.text[:500])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
25
debug_scraper_ua.py
Normal file
25
debug_scraper_ua.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
url = "https://www.newenglandoil.com/massachusetts/zone10.asp?x=0"
|
||||||
|
# Use the UA from config.py
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Unraid-EamcoScraper/1.0 (eeames214@gmail.com)"
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
print(f"Testing with UA: {headers['User-Agent']}")
|
||||||
|
response = requests.get(url, headers=headers, timeout=15)
|
||||||
|
print(f"Status Code: {response.status_code}")
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.content, 'lxml')
|
||||||
|
tables = soup.find_all('table')
|
||||||
|
print(f"Tables found: {len(tables)}")
|
||||||
|
|
||||||
|
if not tables:
|
||||||
|
print("No tables found. Dumping start of response:")
|
||||||
|
print(response.text[:500])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
29
migration.sql
Normal file
29
migration.sql
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
-- Database migration for eamco_scraper
|
||||||
|
-- Creates company_prices table for storing historical oil price data
|
||||||
|
|
||||||
|
-- Create company_prices table
|
||||||
|
CREATE TABLE IF NOT EXISTS company_prices (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
company_name VARCHAR(255) NOT NULL,
|
||||||
|
town VARCHAR(100),
|
||||||
|
price_decimal DECIMAL(6,3) NOT NULL,
|
||||||
|
scrape_date DATE NOT NULL,
|
||||||
|
zone VARCHAR(50) NOT NULL DEFAULT 'zone10',
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create indexes for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_company_prices_company ON company_prices(company_name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_company_prices_scrape_date ON company_prices(scrape_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_company_prices_zone ON company_prices(zone);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_company_prices_company_date ON company_prices(company_name, scrape_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_company_prices_zone_date ON company_prices(zone, scrape_date);
|
||||||
|
|
||||||
|
-- Add comment to table
|
||||||
|
COMMENT ON TABLE company_prices IS 'Historical oil price data scraped from New England Oil website';
|
||||||
|
COMMENT ON COLUMN company_prices.company_name IS 'Name of the oil company';
|
||||||
|
COMMENT ON COLUMN company_prices.town IS 'Town where the company operates';
|
||||||
|
COMMENT ON COLUMN company_prices.price_decimal IS 'Price per gallon in dollars';
|
||||||
|
COMMENT ON COLUMN company_prices.scrape_date IS 'Date when the price was listed on the website';
|
||||||
|
COMMENT ON COLUMN company_prices.zone IS 'Geographic zone (e.g., zone10 for Central Massachusetts)';
|
||||||
|
COMMENT ON COLUMN company_prices.created_at IS 'Timestamp when the record was inserted into database';
|
||||||
18
requirements.txt
Normal file
18
requirements.txt
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# eamco_scraper dependencies
|
||||||
|
# FastAPI web framework and server
|
||||||
|
fastapi>=0.109.0,<1.0.0
|
||||||
|
uvicorn[standard]>=0.27.0,<1.0.0
|
||||||
|
pydantic>=2.5.0,<3.0.0
|
||||||
|
|
||||||
|
# Database
|
||||||
|
sqlalchemy>=2.0.0,<3.0.0
|
||||||
|
psycopg2-binary>=2.9.9,<3.0.0
|
||||||
|
|
||||||
|
# Web scraping
|
||||||
|
beautifulsoup4>=4.12.0,<5.0.0
|
||||||
|
requests>=2.31.0,<3.0.0
|
||||||
|
lxml>=5.0.0,<6.0.0
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
python-dotenv>=1.0.0,<2.0.0
|
||||||
|
yfinance>=0.2.36
|
||||||
75
test.sh
Executable file
75
test.sh
Executable file
@@ -0,0 +1,75 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Test script for eamco_scraper service
|
||||||
|
|
||||||
|
echo "=== eamco_scraper Test Script ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if service directory exists
|
||||||
|
if [ ! -d "/mnt/code/oil/eamco/eamco_scraper" ]; then
|
||||||
|
echo "❌ Service directory not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ Service directory exists"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Test 1: Build Docker image
|
||||||
|
echo "Test 1: Building Docker image..."
|
||||||
|
cd /mnt/code/oil/eamco/eamco_deploy
|
||||||
|
docker-compose -f docker-compose.local.yml build scraper_local
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "✅ Docker build successful"
|
||||||
|
else
|
||||||
|
echo "❌ Docker build failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Test 2: Start the service
|
||||||
|
echo "Test 2: Starting service..."
|
||||||
|
docker-compose -f docker-compose.local.yml up -d scraper_local
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "✅ Service started"
|
||||||
|
else
|
||||||
|
echo "❌ Service failed to start"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Waiting 5 seconds for service to initialize..."
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Test 3: Health check
|
||||||
|
echo "Test 3: Testing health endpoint..."
|
||||||
|
HEALTH_RESPONSE=$(curl -s http://localhost:9619/health)
|
||||||
|
|
||||||
|
if echo "$HEALTH_RESPONSE" | grep -q "status"; then
|
||||||
|
echo "✅ Health endpoint responding"
|
||||||
|
echo "Response: $HEALTH_RESPONSE"
|
||||||
|
else
|
||||||
|
echo "❌ Health endpoint not responding"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Test 4: Scraper endpoint
|
||||||
|
echo "Test 4: Testing scraper endpoint..."
|
||||||
|
echo "This will scrape live data from New England Oil..."
|
||||||
|
SCRAPER_RESPONSE=$(curl -s http://localhost:9619/scraper/newenglandoil/latestprice)
|
||||||
|
|
||||||
|
if echo "$SCRAPER_RESPONSE" | grep -q "status"; then
|
||||||
|
echo "✅ Scraper endpoint responding"
|
||||||
|
echo "Response preview:"
|
||||||
|
echo "$SCRAPER_RESPONSE" | head -n 20
|
||||||
|
else
|
||||||
|
echo "❌ Scraper endpoint not responding"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Test Complete ==="
|
||||||
|
echo ""
|
||||||
|
echo "To view logs: docker-compose -f docker-compose.local.yml logs scraper_local"
|
||||||
|
echo "To stop service: docker-compose -f docker-compose.local.yml down scraper_local"
|
||||||
Reference in New Issue
Block a user