first commit
This commit is contained in:
39
.env.example
Normal file
39
.env.example
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# eamco_address_checker Environment Configuration
|
||||||
|
# =============================================================================
|
||||||
|
# Copy this file to .env and adjust values as needed.
|
||||||
|
# All values have sensible defaults; only override what you need.
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DATABASE
|
||||||
|
# =============================================================================
|
||||||
|
# Override the default PostgreSQL connection string
|
||||||
|
# Default: postgresql+psycopg2://postgres:password@192.168.1.204/eamco
|
||||||
|
# DATABASE_URL=postgresql+psycopg2://user:pass@host:5432/database
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# BATCH PROCESSING
|
||||||
|
# =============================================================================
|
||||||
|
# Maximum records to process per batch run (default: 150)
|
||||||
|
# BATCH_SIZE=150
|
||||||
|
|
||||||
|
# Number of records to process before committing to database (default: 20)
|
||||||
|
# COMMIT_BATCH_SIZE=20
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# RATE LIMITING (Nominatim)
|
||||||
|
# =============================================================================
|
||||||
|
# Minimum sleep between geocoding requests in seconds (default: 1.2)
|
||||||
|
# MIN_SLEEP=1.2
|
||||||
|
|
||||||
|
# Maximum sleep between geocoding requests in seconds (default: 1.8)
|
||||||
|
# MAX_SLEEP=1.8
|
||||||
|
|
||||||
|
# Geocoding request timeout in seconds (default: 10)
|
||||||
|
# GEOCODE_TIMEOUT=10
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOGGING
|
||||||
|
# =============================================================================
|
||||||
|
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO)
|
||||||
|
# LOG_LEVEL=INFO
|
||||||
132
.gitignore
vendored
Normal file
132
.gitignore
vendored
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having sub-dependencies with platform-specific binaries, it is better to ignore the Pipfile.lock.
|
||||||
|
# Pipfile.lock
|
||||||
|
|
||||||
|
# PEP 582; __pypackages__
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyderworkspace
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env.local
|
||||||
|
.env.prod
|
||||||
45
Dockerfile
Normal file
45
Dockerfile
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# eamco_address_checker Dockerfile
|
||||||
|
# Lightweight Python 3.11 image for Unraid Docker deployment
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app \
|
||||||
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies (psycopg2 requirements)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& apt-get clean
|
||||||
|
|
||||||
|
# Copy requirements first (for better layer caching)
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# Create non-root user for security
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||||
|
|
||||||
|
# Run the application
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
14
Dockerfile.dev
Normal file
14
Dockerfile.dev
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
FROM python:3.11
|
||||||
|
|
||||||
|
ENV PYTHONFAULTHANDLER=1
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV MODE="DEVELOPMENT"
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
40
Dockerfile.local
Normal file
40
Dockerfile.local
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# eamco_address_checker - DEVELOPMENT Dockerfile
|
||||||
|
# Used by docker-compose.local.yml
|
||||||
|
# Features: Hot reload via volume mount, debug logging
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app \
|
||||||
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies (psycopg2 requirements)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& apt-get clean
|
||||||
|
|
||||||
|
# Copy requirements first (for better layer caching)
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy environment file for local development
|
||||||
|
COPY .env.local .env
|
||||||
|
|
||||||
|
# Copy application code (will be overridden by volume mount in compose)
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Development: Run with reload enabled
|
||||||
|
CMD ["uvicorn", "app.main:app", "--reload", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
49
Dockerfile.prod
Normal file
49
Dockerfile.prod
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# eamco_address_checker - PRODUCTION Dockerfile
|
||||||
|
# Used by docker-compose.prod.yml
|
||||||
|
# Features: Optimized for production, non-root user, health checks
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app \
|
||||||
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies (psycopg2 requirements)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& apt-get clean
|
||||||
|
|
||||||
|
# Copy requirements first (for better layer caching)
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy environment file for production
|
||||||
|
COPY .env.prod .env
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# Create non-root user for security
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||||
|
|
||||||
|
# Production: Run without reload, with workers
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
|
||||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# EAMCO Address Checker
|
||||||
|
|
||||||
|
This service checks addresses.
|
||||||
1
app/__init__.py
Normal file
1
app/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# eamco_address_checker app package
|
||||||
516
app/agent.py
Normal file
516
app/agent.py
Normal file
@@ -0,0 +1,516 @@
|
|||||||
|
"""
|
||||||
|
Agentic Address Verification Orchestrator.
|
||||||
|
|
||||||
|
This module implements a lightweight ReAct-inspired autonomous agent for batch
|
||||||
|
address verification. The agent follows a structured workflow:
|
||||||
|
|
||||||
|
1. PLANNING PHASE: Query records needing verification
|
||||||
|
2. EXECUTION PHASE: For each record, follow think-act-observe-reflect cycle
|
||||||
|
- If geocoding fails, attempt fuzzy matching to correct misspellings
|
||||||
|
- Retry geocoding with corrected address
|
||||||
|
3. REFLECTION PHASE: Summarize batch results and statistics
|
||||||
|
|
||||||
|
The agent is designed for resilience - individual record failures don't stop
|
||||||
|
the batch, and progress is committed incrementally.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, date
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from geopy.geocoders import Nominatim
|
||||||
|
from sqlalchemy import or_, func
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import (
|
||||||
|
BATCH_SIZE,
|
||||||
|
COMMIT_BATCH_SIZE,
|
||||||
|
NOMINATIM_USER_AGENT,
|
||||||
|
)
|
||||||
|
from app.models import CustomerCustomer
|
||||||
|
from app.tools import (
|
||||||
|
build_address,
|
||||||
|
validate_address_components,
|
||||||
|
format_address_string,
|
||||||
|
geocode_address,
|
||||||
|
validate_geocode_result,
|
||||||
|
update_record,
|
||||||
|
rate_limit_sleep,
|
||||||
|
GeocodeResult,
|
||||||
|
get_state_abbreviation,
|
||||||
|
)
|
||||||
|
from app.streets import correct_address, get_town_street_count
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BatchStats:
|
||||||
|
"""Statistics for a batch verification run."""
|
||||||
|
total_queried: int = 0
|
||||||
|
processed: int = 0
|
||||||
|
updated: int = 0
|
||||||
|
corrected: int = 0 # Addresses fixed via fuzzy matching
|
||||||
|
failed: int = 0
|
||||||
|
skipped: int = 0
|
||||||
|
rate_limited: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
corrections: List[str] = field(default_factory=list) # Log of corrections made
|
||||||
|
start_time: Optional[datetime] = None
|
||||||
|
end_time: Optional[datetime] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def duration_seconds(self) -> float:
|
||||||
|
"""Calculate batch duration in seconds."""
|
||||||
|
if self.start_time and self.end_time:
|
||||||
|
return (self.end_time - self.start_time).total_seconds()
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Convert stats to dictionary for JSON response."""
|
||||||
|
return {
|
||||||
|
"total_queried": self.total_queried,
|
||||||
|
"processed": self.processed,
|
||||||
|
"updated": self.updated,
|
||||||
|
"corrected": self.corrected,
|
||||||
|
"failed": self.failed,
|
||||||
|
"skipped": self.skipped,
|
||||||
|
"rate_limited": self.rate_limited,
|
||||||
|
"duration_seconds": round(self.duration_seconds, 2),
|
||||||
|
"errors_count": len(self.errors),
|
||||||
|
"sample_errors": self.errors[:5] if self.errors else [],
|
||||||
|
"sample_corrections": self.corrections[:5] if self.corrections else [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AddressVerificationAgent:
|
||||||
|
"""
|
||||||
|
Lightweight autonomous agent for address verification.
|
||||||
|
|
||||||
|
Implements a ReAct-inspired workflow where each record goes through:
|
||||||
|
- OBSERVE: Examine the address data
|
||||||
|
- THINK: Decide if geocoding should be attempted
|
||||||
|
- ACT: Call geocoding API
|
||||||
|
- OBSERVE: Examine the result
|
||||||
|
- REFLECT: Log decision and update database
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
session: SQLAlchemy database session
|
||||||
|
batch_size: Maximum records per batch
|
||||||
|
commit_size: Records between commits
|
||||||
|
stats: Running statistics for the batch
|
||||||
|
geocoder: Nominatim geocoder instance
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
session: Session,
|
||||||
|
batch_size: int = BATCH_SIZE,
|
||||||
|
commit_size: int = COMMIT_BATCH_SIZE,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the address verification agent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: SQLAlchemy session for database operations
|
||||||
|
batch_size: Max records to process (default from config)
|
||||||
|
commit_size: Records before intermediate commit
|
||||||
|
"""
|
||||||
|
self.session = session
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.commit_size = commit_size
|
||||||
|
self.stats = BatchStats()
|
||||||
|
self.geocoder = Nominatim(user_agent=NOMINATIM_USER_AGENT)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Agent initialized: batch_size={batch_size}, commit_size={commit_size}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# PHASE 1: PLANNING
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def plan_batch(self) -> List[CustomerCustomer]:
|
||||||
|
"""
|
||||||
|
PLANNING PHASE: Query records that need address verification.
|
||||||
|
|
||||||
|
Criteria for selection:
|
||||||
|
- correct_address = FALSE, OR
|
||||||
|
- verified_at IS NULL, OR
|
||||||
|
- verified_at < today (not verified today)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CustomerCustomer records to process
|
||||||
|
"""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("PLANNING PHASE: Querying records needing verification")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
today = date.today()
|
||||||
|
|
||||||
|
# Build query for records needing verification
|
||||||
|
query = self.session.query(CustomerCustomer).filter(
|
||||||
|
or_(
|
||||||
|
CustomerCustomer.correct_address == False, # noqa: E712
|
||||||
|
CustomerCustomer.verified_at.is_(None),
|
||||||
|
func.date(CustomerCustomer.verified_at) < today,
|
||||||
|
)
|
||||||
|
).limit(self.batch_size)
|
||||||
|
|
||||||
|
records = query.all()
|
||||||
|
self.stats.total_queried = len(records)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"PLAN RESULT: Found {len(records)} records needing verification",
|
||||||
|
extra={"record_count": len(records), "batch_limit": self.batch_size}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log sample of record IDs for debugging
|
||||||
|
if records:
|
||||||
|
sample_ids = [r.id for r in records[:10]]
|
||||||
|
logger.debug(f"Sample record IDs: {sample_ids}")
|
||||||
|
|
||||||
|
return records
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# PHASE 2: EXECUTION (ReAct-style per record)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def process_record(self, customer: CustomerCustomer) -> bool:
|
||||||
|
"""
|
||||||
|
EXECUTION PHASE: Process a single record with ReAct-style workflow.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. OBSERVE: Build address from record components
|
||||||
|
2. THINK: Validate address - skip if obviously invalid
|
||||||
|
3. ACT: Call Nominatim geocoder
|
||||||
|
4. OBSERVE: Examine geocoding result
|
||||||
|
5. REFLECT: Log decision and update database
|
||||||
|
|
||||||
|
Args:
|
||||||
|
customer: CustomerCustomer record to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if record was successfully updated, False otherwise
|
||||||
|
"""
|
||||||
|
logger.info("-" * 40)
|
||||||
|
logger.info(f"Processing record ID: {customer.id}")
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 1: OBSERVE - Build address from components
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
logger.debug(f"[OBSERVE] Building address for customer {customer.id}")
|
||||||
|
address_components = build_address(customer)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 2: THINK - Validate address components
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
logger.debug(f"[THINK] Validating address components")
|
||||||
|
address_components = validate_address_components(address_components)
|
||||||
|
|
||||||
|
if not address_components.is_valid:
|
||||||
|
# REFLECT: Skip invalid addresses
|
||||||
|
logger.info(
|
||||||
|
f"[REFLECT] Skipping record {customer.id}: "
|
||||||
|
f"{address_components.validation_error}"
|
||||||
|
)
|
||||||
|
self.stats.skipped += 1
|
||||||
|
|
||||||
|
# Still update the record to mark it as processed
|
||||||
|
geocode_result = GeocodeResult(
|
||||||
|
success=False,
|
||||||
|
skipped=True,
|
||||||
|
skip_reason=address_components.validation_error,
|
||||||
|
error_message=address_components.validation_error,
|
||||||
|
)
|
||||||
|
update_record(self.session, customer, geocode_result, is_valid=False)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Format address for geocoding
|
||||||
|
address_string = format_address_string(address_components)
|
||||||
|
logger.debug(f"[THINK] Formatted address: {address_string}")
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 3: ACT - Call geocoding API
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
logger.debug(f"[ACT] Calling Nominatim geocoder")
|
||||||
|
geocode_result = geocode_address(address_string, self.geocoder)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 4: OBSERVE - Examine geocoding result
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
logger.debug(f"[OBSERVE] Geocoding result: success={geocode_result.success}")
|
||||||
|
|
||||||
|
if not geocode_result.success:
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 4a: THINK - Try fuzzy matching to correct address
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
logger.info(
|
||||||
|
f"[THINK] Geocoding failed, attempting fuzzy street matching..."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get state abbreviation for fuzzy matching
|
||||||
|
state_abbr = get_state_abbreviation(customer.customer_state)
|
||||||
|
town = address_components.city
|
||||||
|
|
||||||
|
if state_abbr and town:
|
||||||
|
# Check if we have street data for this town
|
||||||
|
street_count = get_town_street_count(self.session, town, state_abbr)
|
||||||
|
|
||||||
|
if street_count > 0:
|
||||||
|
# Try to correct the address
|
||||||
|
match = correct_address(
|
||||||
|
session=self.session,
|
||||||
|
full_address=address_components.street or "",
|
||||||
|
town=town,
|
||||||
|
state=state_abbr,
|
||||||
|
min_confidence=75.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if match and match.corrected_address:
|
||||||
|
logger.info(
|
||||||
|
f"[ACT] Found correction: '{address_components.street}' "
|
||||||
|
f"-> '{match.corrected_address}' "
|
||||||
|
f"(confidence: {match.confidence_score:.1f}%)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build corrected address string
|
||||||
|
corrected_components = address_components
|
||||||
|
corrected_components.street = match.corrected_address
|
||||||
|
corrected_address_string = format_address_string(corrected_components)
|
||||||
|
|
||||||
|
logger.info(f"[ACT] Retrying with corrected address: {corrected_address_string}")
|
||||||
|
|
||||||
|
# Rate limit before retry
|
||||||
|
rate_limit_sleep()
|
||||||
|
|
||||||
|
# Retry geocoding with corrected address
|
||||||
|
geocode_result = geocode_address(corrected_address_string, self.geocoder)
|
||||||
|
|
||||||
|
if geocode_result.success:
|
||||||
|
logger.info(
|
||||||
|
f"[OBSERVE] Corrected address geocoded successfully!"
|
||||||
|
)
|
||||||
|
self.stats.corrected += 1
|
||||||
|
self.stats.corrections.append(
|
||||||
|
f"ID {customer.id}: '{address_components.street}' "
|
||||||
|
f"-> '{match.corrected_address}'"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
f"[OBSERVE] Corrected address still failed to geocode"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"[THINK] No confident fuzzy match found"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"[THINK] No street reference data for {town}, {state_abbr}. "
|
||||||
|
f"Use POST /streets/{town}/{state_abbr} to populate."
|
||||||
|
)
|
||||||
|
|
||||||
|
# If still failed after correction attempt
|
||||||
|
if not geocode_result.success:
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 5a: REFLECT - Handle failed geocoding
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
logger.info(
|
||||||
|
f"[REFLECT] Geocoding failed for record {customer.id}: "
|
||||||
|
f"{geocode_result.error_message}"
|
||||||
|
)
|
||||||
|
self.stats.failed += 1
|
||||||
|
self.stats.errors.append(
|
||||||
|
f"ID {customer.id}: {geocode_result.error_message}"
|
||||||
|
)
|
||||||
|
|
||||||
|
update_record(self.session, customer, geocode_result, is_valid=False)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Validate geocode result quality
|
||||||
|
is_valid, validation_reason = validate_geocode_result(geocode_result)
|
||||||
|
logger.debug(f"[OBSERVE] Validation: valid={is_valid}, reason={validation_reason}")
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# STEP 5b: REFLECT - Update database with result
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
if is_valid:
|
||||||
|
logger.info(
|
||||||
|
f"[REFLECT] Success for record {customer.id}: "
|
||||||
|
f"lat={geocode_result.latitude}, lon={geocode_result.longitude}"
|
||||||
|
)
|
||||||
|
self.stats.updated += 1
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
f"[REFLECT] Invalid result for record {customer.id}: {validation_reason}"
|
||||||
|
)
|
||||||
|
self.stats.failed += 1
|
||||||
|
self.stats.errors.append(f"ID {customer.id}: {validation_reason}")
|
||||||
|
|
||||||
|
update_record(self.session, customer, geocode_result, is_valid=is_valid)
|
||||||
|
return is_valid
|
||||||
|
|
||||||
|
def execute_batch(self, records: List[CustomerCustomer]) -> None:
|
||||||
|
"""
|
||||||
|
Execute the batch processing loop with rate limiting.
|
||||||
|
|
||||||
|
Processes records sequentially with proper rate limiting between
|
||||||
|
geocoding calls. Commits to database periodically.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
records: List of CustomerCustomer records to process
|
||||||
|
"""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("EXECUTION PHASE: Processing records")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
uncommitted_count = 0
|
||||||
|
|
||||||
|
for i, customer in enumerate(records):
|
||||||
|
try:
|
||||||
|
# Process the record
|
||||||
|
self.process_record(customer)
|
||||||
|
self.stats.processed += 1
|
||||||
|
uncommitted_count += 1
|
||||||
|
|
||||||
|
# Commit in batches
|
||||||
|
if uncommitted_count >= self.commit_size:
|
||||||
|
logger.info(f"Committing batch of {uncommitted_count} records")
|
||||||
|
self.session.commit()
|
||||||
|
uncommitted_count = 0
|
||||||
|
|
||||||
|
# Rate limiting (skip on last record)
|
||||||
|
if i < len(records) - 1:
|
||||||
|
rate_limit_sleep()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Handle unexpected errors - continue processing
|
||||||
|
logger.error(
|
||||||
|
f"Unexpected error processing record {customer.id}: {e}",
|
||||||
|
exc_info=True
|
||||||
|
)
|
||||||
|
self.stats.failed += 1
|
||||||
|
self.stats.errors.append(f"ID {customer.id}: Unexpected error: {str(e)}")
|
||||||
|
self.stats.processed += 1
|
||||||
|
|
||||||
|
# Rollback the current transaction and continue
|
||||||
|
self.session.rollback()
|
||||||
|
uncommitted_count = 0
|
||||||
|
|
||||||
|
# Final commit for any remaining records
|
||||||
|
if uncommitted_count > 0:
|
||||||
|
logger.info(f"Final commit of {uncommitted_count} records")
|
||||||
|
self.session.commit()
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# PHASE 3: REFLECTION
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def reflect(self) -> dict:
|
||||||
|
"""
|
||||||
|
REFLECTION PHASE: Summarize batch results and statistics.
|
||||||
|
|
||||||
|
Logs comprehensive statistics about the batch run and returns
|
||||||
|
a summary dictionary suitable for API response.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with batch statistics
|
||||||
|
"""
|
||||||
|
self.stats.end_time = datetime.utcnow()
|
||||||
|
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("REFLECTION PHASE: Batch Summary")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
stats_dict = self.stats.to_dict()
|
||||||
|
|
||||||
|
logger.info(f"Total queried: {stats_dict['total_queried']}")
|
||||||
|
logger.info(f"Processed: {stats_dict['processed']}")
|
||||||
|
logger.info(f"Updated (valid): {stats_dict['updated']}")
|
||||||
|
logger.info(f"Corrected: {stats_dict['corrected']}")
|
||||||
|
logger.info(f"Failed: {stats_dict['failed']}")
|
||||||
|
logger.info(f"Skipped: {stats_dict['skipped']}")
|
||||||
|
logger.info(f"Duration: {stats_dict['duration_seconds']}s")
|
||||||
|
|
||||||
|
if stats_dict['errors_count'] > 0:
|
||||||
|
logger.warning(f"Errors encountered: {stats_dict['errors_count']}")
|
||||||
|
for error in stats_dict['sample_errors']:
|
||||||
|
logger.warning(f" - {error}")
|
||||||
|
|
||||||
|
if stats_dict['corrected'] > 0:
|
||||||
|
logger.info(f"Addresses corrected via fuzzy matching: {stats_dict['corrected']}")
|
||||||
|
for correction in stats_dict['sample_corrections']:
|
||||||
|
logger.info(f" - {correction}")
|
||||||
|
|
||||||
|
# Calculate success rate
|
||||||
|
if stats_dict['processed'] > 0:
|
||||||
|
success_rate = (stats_dict['updated'] / stats_dict['processed']) * 100
|
||||||
|
logger.info(f"Success rate: {success_rate:.1f}%")
|
||||||
|
stats_dict['success_rate'] = round(success_rate, 1)
|
||||||
|
else:
|
||||||
|
stats_dict['success_rate'] = 0.0
|
||||||
|
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
return stats_dict
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# MAIN ENTRY POINT
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def run(self) -> dict:
|
||||||
|
"""
|
||||||
|
Execute the full agent workflow.
|
||||||
|
|
||||||
|
Runs through all three phases:
|
||||||
|
1. Planning - Query records
|
||||||
|
2. Execution - Process each record
|
||||||
|
3. Reflection - Summarize results
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with batch statistics and message
|
||||||
|
"""
|
||||||
|
logger.info("*" * 60)
|
||||||
|
logger.info("ADDRESS VERIFICATION AGENT STARTING")
|
||||||
|
logger.info("*" * 60)
|
||||||
|
|
||||||
|
self.stats.start_time = datetime.utcnow()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Phase 1: Planning
|
||||||
|
records = self.plan_batch()
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
logger.info("No records to process - batch complete")
|
||||||
|
self.stats.end_time = datetime.utcnow()
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message": "No records needed verification",
|
||||||
|
**self.stats.to_dict(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Phase 2: Execution
|
||||||
|
self.execute_batch(records)
|
||||||
|
|
||||||
|
# Phase 3: Reflection
|
||||||
|
stats = self.reflect()
|
||||||
|
|
||||||
|
logger.info("*" * 60)
|
||||||
|
logger.info("ADDRESS VERIFICATION AGENT COMPLETE")
|
||||||
|
logger.info("*" * 60)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message": f"Batch complete: {stats['updated']} addresses updated",
|
||||||
|
**stats,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Agent failed with error: {e}", exc_info=True)
|
||||||
|
self.stats.end_time = datetime.utcnow()
|
||||||
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": f"Agent failed: {str(e)}",
|
||||||
|
**self.stats.to_dict(),
|
||||||
|
}
|
||||||
184
app/config.py
Normal file
184
app/config.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
"""
|
||||||
|
Configuration settings for eamco_address_checker.
|
||||||
|
|
||||||
|
This module provides configuration with environment-based switching:
|
||||||
|
- DEVELOPMENT: Uses 'eamco' database, localhost CORS origins
|
||||||
|
- PRODUCTION: Uses 'auburnoil' database, production domain CORS origins
|
||||||
|
|
||||||
|
Environment variables are loaded from .env.local or .env.prod depending
|
||||||
|
on the Docker compose file used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables from .env file if present
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ENVIRONMENT MODE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
MODE = os.getenv("MODE", "LOCAL")
|
||||||
|
CURRENT_SETTINGS = os.getenv("CURRENT_SETTINGS", "DEVELOPMENT")
|
||||||
|
|
||||||
|
if CURRENT_SETTINGS == "PRODUCTION":
|
||||||
|
print("USING PRODUCTION APPLICATIONCONFIG!!!!!")
|
||||||
|
else:
|
||||||
|
print("USING DEVELOPMENT APPLICATIONCONFIG!!!!!")
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DATABASE CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Database connection components (can be overridden individually)
|
||||||
|
POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME", "postgres")
|
||||||
|
POSTGRES_PW = os.getenv("POSTGRES_PW", "password")
|
||||||
|
POSTGRES_SERVER = os.getenv("POSTGRES_SERVER", "192.168.1.204")
|
||||||
|
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
|
||||||
|
|
||||||
|
# Database name differs by environment
|
||||||
|
if CURRENT_SETTINGS == "PRODUCTION":
|
||||||
|
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "auburnoil")
|
||||||
|
else:
|
||||||
|
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "eamco")
|
||||||
|
|
||||||
|
# Build connection URI from components (fallback)
|
||||||
|
_DEFAULT_DATABASE_URI = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
|
||||||
|
POSTGRES_USERNAME,
|
||||||
|
POSTGRES_PW,
|
||||||
|
POSTGRES_SERVER,
|
||||||
|
POSTGRES_PORT,
|
||||||
|
POSTGRES_DBNAME
|
||||||
|
)
|
||||||
|
|
||||||
|
# Allow full DATABASE_URL override
|
||||||
|
DATABASE_URL: str = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URI)
|
||||||
|
|
||||||
|
# SQLAlchemy binds (for compatibility)
|
||||||
|
SQLALCHEMY_DATABASE_URI = DATABASE_URL
|
||||||
|
SQLALCHEMY_BINDS = {POSTGRES_DBNAME: SQLALCHEMY_DATABASE_URI}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORS CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Parse CORS origins from environment (comma-separated) or use defaults
|
||||||
|
_cors_env = os.getenv("CORS_ORIGINS", "")
|
||||||
|
|
||||||
|
if _cors_env:
|
||||||
|
CORS_ORIGINS: List[str] = [origin.strip() for origin in _cors_env.split(",")]
|
||||||
|
elif CURRENT_SETTINGS == "PRODUCTION":
|
||||||
|
# Production CORS origins
|
||||||
|
CORS_ORIGINS = [
|
||||||
|
"https://oil.edwineames.com",
|
||||||
|
"https://edwineames.com",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# Development CORS origins
|
||||||
|
CORS_ORIGINS = [
|
||||||
|
"http://localhost:9000",
|
||||||
|
"https://localhost:9513",
|
||||||
|
"http://localhost:9514",
|
||||||
|
"http://localhost:9512",
|
||||||
|
"http://localhost:9511",
|
||||||
|
"http://localhost:5173", # Frontend port
|
||||||
|
"http://localhost:9616", # Authorize service port
|
||||||
|
]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# BATCH PROCESSING CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Maximum records to process in a single batch run
|
||||||
|
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "150"))
|
||||||
|
|
||||||
|
# Records to process before committing to database
|
||||||
|
COMMIT_BATCH_SIZE: int = int(os.getenv("COMMIT_BATCH_SIZE", "20"))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# GEOCODING CONFIGURATION (Nominatim)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# User agent for Nominatim API (required - identifies your application)
|
||||||
|
NOMINATIM_USER_AGENT: str = "Unraid-EamcoAddressChecker/1.0 (eeames214@gmail.com)"
|
||||||
|
|
||||||
|
# Rate limiting: Sleep range between requests (Nominatim requires 1 req/sec max)
|
||||||
|
MIN_SLEEP_SECONDS: float = float(os.getenv("MIN_SLEEP", "1.2"))
|
||||||
|
MAX_SLEEP_SECONDS: float = float(os.getenv("MAX_SLEEP", "1.8"))
|
||||||
|
|
||||||
|
# Geocoding timeout in seconds
|
||||||
|
GEOCODE_TIMEOUT: int = int(os.getenv("GEOCODE_TIMEOUT", "10"))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# STATE MAPPING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Integer -> US State Abbreviation mapping
|
||||||
|
# Replace with proper states table lookup when available
|
||||||
|
STATE_MAPPING: dict[int, str] = {
|
||||||
|
1: "AL", # Alabama
|
||||||
|
2: "AK", # Alaska
|
||||||
|
3: "AS", # American Samoa
|
||||||
|
4: "AZ", # Arizona
|
||||||
|
5: "AR", # Arkansas
|
||||||
|
6: "CA", # California
|
||||||
|
7: "CO", # Colorado
|
||||||
|
8: "CT", # Connecticut
|
||||||
|
9: "DE", # Delaware
|
||||||
|
10: "DC", # District of Columbia
|
||||||
|
11: "FL", # Florida
|
||||||
|
12: "GA", # Georgia
|
||||||
|
13: "GU", # Guam
|
||||||
|
14: "HI", # Hawaii
|
||||||
|
15: "ID", # Idaho
|
||||||
|
16: "IL", # Illinois
|
||||||
|
17: "IN", # Indiana
|
||||||
|
18: "IA", # Iowa
|
||||||
|
19: "KS", # Kansas
|
||||||
|
20: "KY", # Kentucky
|
||||||
|
21: "LA", # Louisiana
|
||||||
|
22: "ME", # Maine
|
||||||
|
23: "MD", # Maryland
|
||||||
|
24: "MA", # Massachusetts
|
||||||
|
25: "MI", # Michigan
|
||||||
|
26: "MN", # Minnesota
|
||||||
|
27: "MS", # Mississippi
|
||||||
|
28: "MO", # Missouri
|
||||||
|
29: "MT", # Montana
|
||||||
|
30: "NE", # Nebraska
|
||||||
|
31: "NV", # Nevada
|
||||||
|
32: "NH", # New Hampshire
|
||||||
|
33: "NJ", # New Jersey
|
||||||
|
34: "NM", # New Mexico
|
||||||
|
35: "NY", # New York
|
||||||
|
36: "NC", # North Carolina
|
||||||
|
37: "ND", # North Dakota
|
||||||
|
38: "OH", # Ohio
|
||||||
|
39: "OK", # Oklahoma
|
||||||
|
40: "OR", # Oregon
|
||||||
|
41: "PA", # Pennsylvania
|
||||||
|
42: "PR", # Puerto Rico
|
||||||
|
43: "RI", # Rhode Island
|
||||||
|
44: "SC", # South Carolina
|
||||||
|
45: "SD", # South Dakota
|
||||||
|
46: "TN", # Tennessee
|
||||||
|
47: "TX", # Texas
|
||||||
|
48: "UT", # Utah
|
||||||
|
49: "VT", # Vermont
|
||||||
|
50: "VA", # Virginia
|
||||||
|
51: "VI", # Virgin Islands
|
||||||
|
52: "WA", # Washington
|
||||||
|
53: "WV", # West Virginia
|
||||||
|
54: "WI", # Wisconsin
|
||||||
|
55: "WY", # Wyoming
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOGGING CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
||||||
|
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
558
app/main.py
Normal file
558
app/main.py
Normal file
@@ -0,0 +1,558 @@
|
|||||||
|
"""
|
||||||
|
eamco_address_checker - FastAPI Address Verification Microservice.
|
||||||
|
|
||||||
|
This microservice provides a batch job endpoint for verifying customer addresses
|
||||||
|
using geocoding. Designed to be triggered via cron from Unraid.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
GET /health - Health check with database connectivity status
|
||||||
|
POST /verify-addresses - Trigger batch address verification
|
||||||
|
POST /reset-verifications - Clear all verification data for re-checking
|
||||||
|
POST /streets/{town}/{state} - Fetch and store streets from OSM for a town
|
||||||
|
GET /streets/{town}/{state} - Get street count for a town
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Development
|
||||||
|
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||||
|
|
||||||
|
# Production (Docker)
|
||||||
|
docker run -p 8000:8000 eamco_address_checker
|
||||||
|
|
||||||
|
# Trigger from cron
|
||||||
|
curl -X POST http://localhost:8000/verify-addresses
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Generator
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Depends, HTTPException
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
from sqlalchemy.orm import sessionmaker, Session
|
||||||
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
||||||
|
from app.config import (
|
||||||
|
DATABASE_URL,
|
||||||
|
CORS_ORIGINS,
|
||||||
|
LOG_LEVEL,
|
||||||
|
LOG_FORMAT,
|
||||||
|
BATCH_SIZE,
|
||||||
|
COMMIT_BATCH_SIZE,
|
||||||
|
)
|
||||||
|
from app.agent import AddressVerificationAgent
|
||||||
|
from app.models import CustomerCustomer, StreetReference, Base
|
||||||
|
from app.streets import (
|
||||||
|
populate_streets_for_town,
|
||||||
|
get_town_street_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOGGING CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
||||||
|
format=LOG_FORMAT,
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DATABASE SETUP
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Create SQLAlchemy engine with connection pooling
|
||||||
|
engine = create_engine(
|
||||||
|
DATABASE_URL,
|
||||||
|
pool_pre_ping=True, # Verify connections before use
|
||||||
|
pool_size=5,
|
||||||
|
max_overflow=10,
|
||||||
|
echo=False, # Set to True for SQL debugging
|
||||||
|
)
|
||||||
|
|
||||||
|
# Session factory
|
||||||
|
SessionLocal = sessionmaker(
|
||||||
|
autocommit=False,
|
||||||
|
autoflush=False,
|
||||||
|
bind=engine,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_db() -> Generator[Session, None, None]:
|
||||||
|
"""
|
||||||
|
Dependency that provides a database session.
|
||||||
|
|
||||||
|
Yields a SQLAlchemy session and ensures proper cleanup.
|
||||||
|
"""
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_db_session() -> Generator[Session, None, None]:
|
||||||
|
"""
|
||||||
|
Context manager for database sessions (non-dependency use).
|
||||||
|
"""
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def check_db_connection() -> bool:
|
||||||
|
"""
|
||||||
|
Test database connectivity.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if database is reachable, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with get_db_session() as db:
|
||||||
|
db.execute(text("SELECT 1"))
|
||||||
|
return True
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
logger.error(f"Database connection failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FASTAPI APPLICATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="eamco_address_checker",
|
||||||
|
description="Address verification microservice using Nominatim geocoding",
|
||||||
|
version="1.0.0",
|
||||||
|
docs_url="/docs",
|
||||||
|
redoc_url="/redoc",
|
||||||
|
)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORS MIDDLEWARE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=CORS_ORIGINS,
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# PYDANTIC MODELS (Response Schemas)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class HealthResponse(BaseModel):
|
||||||
|
"""Health check response schema."""
|
||||||
|
status: str
|
||||||
|
db_connected: bool
|
||||||
|
|
||||||
|
|
||||||
|
class VerificationResponse(BaseModel):
|
||||||
|
"""Address verification batch response schema."""
|
||||||
|
status: str
|
||||||
|
message: str
|
||||||
|
total_queried: int
|
||||||
|
processed: int
|
||||||
|
updated: int
|
||||||
|
corrected: int
|
||||||
|
failed: int
|
||||||
|
skipped: int
|
||||||
|
rate_limited: int
|
||||||
|
duration_seconds: float
|
||||||
|
success_rate: float
|
||||||
|
errors_count: int
|
||||||
|
sample_errors: list
|
||||||
|
sample_corrections: list
|
||||||
|
|
||||||
|
|
||||||
|
class ResetResponse(BaseModel):
|
||||||
|
"""Reset verifications response schema."""
|
||||||
|
status: str
|
||||||
|
message: str
|
||||||
|
records_reset: int
|
||||||
|
|
||||||
|
|
||||||
|
class StreetPopulateResponse(BaseModel):
|
||||||
|
"""Response for street population endpoint."""
|
||||||
|
status: str
|
||||||
|
message: str
|
||||||
|
town: str
|
||||||
|
state: str
|
||||||
|
streets_added: int
|
||||||
|
streets_updated: int
|
||||||
|
total_found: int
|
||||||
|
errors: list
|
||||||
|
|
||||||
|
|
||||||
|
class StreetInfoResponse(BaseModel):
|
||||||
|
"""Response for street info endpoint."""
|
||||||
|
town: str
|
||||||
|
state: str
|
||||||
|
street_count: int
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ENDPOINTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", include_in_schema=False)
|
||||||
|
async def root():
|
||||||
|
"""Root endpoint - redirect to docs."""
|
||||||
|
return {
|
||||||
|
"service": "eamco_address_checker",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"docs": "/docs",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health", response_model=HealthResponse, tags=["Health"])
|
||||||
|
async def health_check():
|
||||||
|
"""
|
||||||
|
Health check endpoint.
|
||||||
|
|
||||||
|
Returns service status and database connectivity.
|
||||||
|
Use this endpoint for container health checks and monitoring.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
HealthResponse with status and db_connected flag
|
||||||
|
"""
|
||||||
|
db_connected = check_db_connection()
|
||||||
|
|
||||||
|
return HealthResponse(
|
||||||
|
status="healthy" if db_connected else "degraded",
|
||||||
|
db_connected=db_connected,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/verify-addresses",
|
||||||
|
response_model=VerificationResponse,
|
||||||
|
tags=["Verification"],
|
||||||
|
)
|
||||||
|
async def verify_addresses(db: Session = Depends(get_db)):
|
||||||
|
"""
|
||||||
|
Trigger batch address verification.
|
||||||
|
|
||||||
|
This endpoint runs a synchronous batch job that:
|
||||||
|
1. Queries records needing verification (max BATCH_SIZE)
|
||||||
|
2. Geocodes each address using Nominatim
|
||||||
|
3. Updates records with lat/long and verification status
|
||||||
|
4. Returns statistics about the batch run
|
||||||
|
|
||||||
|
The batch respects Nominatim rate limits (1 req/sec) so execution
|
||||||
|
time is approximately BATCH_SIZE * 1.5 seconds.
|
||||||
|
|
||||||
|
Use this endpoint from Unraid cron:
|
||||||
|
curl -X POST http://localhost:8000/verify-addresses
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
VerificationResponse with batch statistics
|
||||||
|
"""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("VERIFY-ADDRESSES ENDPOINT CALLED")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info(f"Configuration: BATCH_SIZE={BATCH_SIZE}, COMMIT_SIZE={COMMIT_BATCH_SIZE}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Initialize and run the agent
|
||||||
|
agent = AddressVerificationAgent(
|
||||||
|
session=db,
|
||||||
|
batch_size=BATCH_SIZE,
|
||||||
|
commit_size=COMMIT_BATCH_SIZE,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = agent.run()
|
||||||
|
|
||||||
|
logger.info(f"Batch complete: {result.get('message', 'No message')}")
|
||||||
|
|
||||||
|
return VerificationResponse(
|
||||||
|
status=result.get("status", "unknown"),
|
||||||
|
message=result.get("message", ""),
|
||||||
|
total_queried=result.get("total_queried", 0),
|
||||||
|
processed=result.get("processed", 0),
|
||||||
|
updated=result.get("updated", 0),
|
||||||
|
corrected=result.get("corrected", 0),
|
||||||
|
failed=result.get("failed", 0),
|
||||||
|
skipped=result.get("skipped", 0),
|
||||||
|
rate_limited=result.get("rate_limited", 0),
|
||||||
|
duration_seconds=result.get("duration_seconds", 0.0),
|
||||||
|
success_rate=result.get("success_rate", 0.0),
|
||||||
|
errors_count=result.get("errors_count", 0),
|
||||||
|
sample_errors=result.get("sample_errors", []),
|
||||||
|
sample_corrections=result.get("sample_corrections", []),
|
||||||
|
)
|
||||||
|
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
logger.error(f"Database error during verification: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Database error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error during verification: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Verification failed: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/reset-verifications",
|
||||||
|
response_model=ResetResponse,
|
||||||
|
tags=["Verification"],
|
||||||
|
)
|
||||||
|
async def reset_verifications(db: Session = Depends(get_db)):
|
||||||
|
"""
|
||||||
|
Reset all address verifications for re-checking.
|
||||||
|
|
||||||
|
This endpoint clears verification data for ALL customer records:
|
||||||
|
- Sets correct_address = FALSE
|
||||||
|
- Sets verified_at = NULL
|
||||||
|
- Clears customer_latitude and customer_longitude
|
||||||
|
|
||||||
|
After calling this endpoint, all addresses will be eligible for
|
||||||
|
re-verification on the next /verify-addresses call.
|
||||||
|
|
||||||
|
WARNING: This is a mass update operation. Use with caution.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ResetResponse with count of records reset
|
||||||
|
"""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("RESET-VERIFICATIONS ENDPOINT CALLED")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Count records before update
|
||||||
|
total_records = db.query(CustomerCustomer).count()
|
||||||
|
logger.info(f"Total customer records: {total_records}")
|
||||||
|
|
||||||
|
# Mass update to reset all verification data
|
||||||
|
updated_count = db.query(CustomerCustomer).update(
|
||||||
|
{
|
||||||
|
CustomerCustomer.correct_address: False,
|
||||||
|
CustomerCustomer.verified_at: None,
|
||||||
|
CustomerCustomer.customer_latitude: None,
|
||||||
|
CustomerCustomer.customer_longitude: None,
|
||||||
|
},
|
||||||
|
synchronize_session=False
|
||||||
|
)
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
logger.info(f"Reset {updated_count} records successfully")
|
||||||
|
|
||||||
|
return ResetResponse(
|
||||||
|
status="success",
|
||||||
|
message=f"Reset {updated_count} address verifications. All addresses are now eligible for re-verification.",
|
||||||
|
records_reset=updated_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
db.rollback()
|
||||||
|
logger.error(f"Database error during reset: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Database error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
db.rollback()
|
||||||
|
logger.error(f"Unexpected error during reset: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Reset failed: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# STREET REFERENCE ENDPOINTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/streets/{town}/{state}",
|
||||||
|
response_model=StreetPopulateResponse,
|
||||||
|
tags=["Streets"],
|
||||||
|
)
|
||||||
|
async def populate_streets(
|
||||||
|
town: str,
|
||||||
|
state: str,
|
||||||
|
clear_existing: bool = False,
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Fetch and store all streets for a town from OpenStreetMap.
|
||||||
|
|
||||||
|
This endpoint queries the OSM Overpass API to get all named streets
|
||||||
|
in the specified town and stores them in the street_reference table
|
||||||
|
for fuzzy matching during address verification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
town: Town/city name (e.g., "Boston")
|
||||||
|
state: 2-letter state abbreviation (e.g., "MA")
|
||||||
|
clear_existing: If true, delete existing streets for this town first
|
||||||
|
|
||||||
|
Example:
|
||||||
|
curl -X POST http://localhost:8000/streets/Boston/MA
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
StreetPopulateResponse with count of streets added
|
||||||
|
"""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info(f"POPULATE STREETS: {town}, {state}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
# Validate state abbreviation (2 letters)
|
||||||
|
if len(state) != 2 or not state.isalpha():
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="State must be a 2-letter abbreviation (e.g., MA, NY, CA)"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Ensure the street_reference table exists
|
||||||
|
Base.metadata.create_all(bind=engine, tables=[StreetReference.__table__])
|
||||||
|
|
||||||
|
result = populate_streets_for_town(
|
||||||
|
session=db,
|
||||||
|
town=town,
|
||||||
|
state=state.upper(),
|
||||||
|
clear_existing=clear_existing,
|
||||||
|
)
|
||||||
|
|
||||||
|
return StreetPopulateResponse(
|
||||||
|
status="success" if result.success else "partial",
|
||||||
|
message=result.message,
|
||||||
|
town=town,
|
||||||
|
state=state.upper(),
|
||||||
|
streets_added=result.streets_added,
|
||||||
|
streets_updated=result.streets_updated,
|
||||||
|
total_found=result.total_found,
|
||||||
|
errors=result.errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
db.rollback()
|
||||||
|
logger.error(f"Database error populating streets: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Database error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error populating streets: {e}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Failed to populate streets: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/streets/{town}/{state}",
|
||||||
|
response_model=StreetInfoResponse,
|
||||||
|
tags=["Streets"],
|
||||||
|
)
|
||||||
|
async def get_street_info(
|
||||||
|
town: str,
|
||||||
|
state: str,
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get information about streets stored for a town.
|
||||||
|
|
||||||
|
Returns the count of streets in the reference table for the
|
||||||
|
specified town/state combination.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
town: Town/city name
|
||||||
|
state: 2-letter state abbreviation
|
||||||
|
|
||||||
|
Example:
|
||||||
|
curl http://localhost:8000/streets/Boston/MA
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
StreetInfoResponse with street count
|
||||||
|
"""
|
||||||
|
# Validate state abbreviation
|
||||||
|
if len(state) != 2 or not state.isalpha():
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="State must be a 2-letter abbreviation"
|
||||||
|
)
|
||||||
|
|
||||||
|
count = get_town_street_count(db, town, state.upper())
|
||||||
|
|
||||||
|
if count == 0:
|
||||||
|
message = f"No streets found for {town}, {state}. Use POST to populate."
|
||||||
|
else:
|
||||||
|
message = f"Found {count} streets for {town}, {state}"
|
||||||
|
|
||||||
|
return StreetInfoResponse(
|
||||||
|
town=town,
|
||||||
|
state=state.upper(),
|
||||||
|
street_count=count,
|
||||||
|
message=message,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# STARTUP/SHUTDOWN EVENTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Application startup - log configuration and test DB connection."""
|
||||||
|
logger.info("*" * 60)
|
||||||
|
logger.info("eamco_address_checker STARTING")
|
||||||
|
logger.info("*" * 60)
|
||||||
|
logger.info(f"Database URL: {DATABASE_URL[:50]}...")
|
||||||
|
logger.info(f"CORS Origins: {CORS_ORIGINS}")
|
||||||
|
logger.info(f"Batch Size: {BATCH_SIZE}")
|
||||||
|
logger.info(f"Commit Batch Size: {COMMIT_BATCH_SIZE}")
|
||||||
|
|
||||||
|
# Test database connection
|
||||||
|
if check_db_connection():
|
||||||
|
logger.info("Database connection: OK")
|
||||||
|
else:
|
||||||
|
logger.warning("Database connection: FAILED - service may be degraded")
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown_event():
|
||||||
|
"""Application shutdown - cleanup."""
|
||||||
|
logger.info("eamco_address_checker SHUTTING DOWN")
|
||||||
|
engine.dispose()
|
||||||
|
logger.info("Database connections closed")
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MAIN ENTRY POINT (for direct execution)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
"app.main:app",
|
||||||
|
host="0.0.0.0",
|
||||||
|
port=8000,
|
||||||
|
reload=True,
|
||||||
|
log_level="info",
|
||||||
|
)
|
||||||
127
app/models.py
Normal file
127
app/models.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
"""
|
||||||
|
SQLAlchemy 2.x ORM Models for eamco_address_checker.
|
||||||
|
|
||||||
|
This module defines the database models using SQLAlchemy's DeclarativeBase.
|
||||||
|
|
||||||
|
Models:
|
||||||
|
CustomerCustomer: Customer records with address fields for geocoding
|
||||||
|
StreetReference: Known streets by town/state for fuzzy matching corrections
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sqlalchemy import Column, Integer, String, VARCHAR, TIMESTAMP, BOOLEAN, Index
|
||||||
|
from sqlalchemy.orm import DeclarativeBase
|
||||||
|
|
||||||
|
|
||||||
|
class Base(DeclarativeBase):
|
||||||
|
"""Base class for all SQLAlchemy models."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CustomerCustomer(Base):
|
||||||
|
"""
|
||||||
|
Customer model representing address and contact information.
|
||||||
|
|
||||||
|
The verified_at timestamp tracks when the address was last geocoded.
|
||||||
|
The correct_address boolean indicates if geocoding was successful.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
id: Primary key
|
||||||
|
auth_net_profile_id: Authorize.net customer profile ID
|
||||||
|
account_number: Customer account number (max 25 chars)
|
||||||
|
customer_last_name: Customer's last name (max 250 chars)
|
||||||
|
customer_first_name: Customer's first name (max 250 chars)
|
||||||
|
customer_town: City/town name (max 140 chars)
|
||||||
|
customer_state: Integer mapping to US state abbreviation
|
||||||
|
customer_zip: ZIP code (max 25 chars)
|
||||||
|
customer_first_call: Timestamp of first customer contact
|
||||||
|
customer_email: Customer email address (max 500 chars)
|
||||||
|
customer_automatic: Automatic billing flag
|
||||||
|
customer_phone_number: Phone number (max 25 chars)
|
||||||
|
customer_home_type: Type of residence
|
||||||
|
customer_apt: Apartment/unit number (max 140 chars)
|
||||||
|
customer_address: Street address (max 1000 chars)
|
||||||
|
company_id: Associated company ID
|
||||||
|
customer_latitude: Geocoded latitude as string (max 250 chars)
|
||||||
|
customer_longitude: Geocoded longitude as string (max 250 chars)
|
||||||
|
correct_address: Flag indicating successful geocoding
|
||||||
|
verified_at: Timestamp of last verification attempt
|
||||||
|
"""
|
||||||
|
__tablename__ = "customer_customer"
|
||||||
|
__table_args__ = {"schema": "public"}
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
auth_net_profile_id = Column(String, unique=True, index=True, nullable=True)
|
||||||
|
account_number = Column(VARCHAR(25))
|
||||||
|
customer_last_name = Column(VARCHAR(250))
|
||||||
|
customer_first_name = Column(VARCHAR(250))
|
||||||
|
customer_town = Column(VARCHAR(140))
|
||||||
|
customer_state = Column(Integer) # Integer -> 2-letter US state abbreviation
|
||||||
|
customer_zip = Column(VARCHAR(25))
|
||||||
|
customer_first_call = Column(TIMESTAMP)
|
||||||
|
customer_email = Column(VARCHAR(500))
|
||||||
|
customer_automatic = Column(Integer)
|
||||||
|
customer_phone_number = Column(VARCHAR(25))
|
||||||
|
customer_home_type = Column(Integer)
|
||||||
|
customer_apt = Column(VARCHAR(140))
|
||||||
|
customer_address = Column(VARCHAR(1000))
|
||||||
|
company_id = Column(Integer)
|
||||||
|
customer_latitude = Column(VARCHAR(250))
|
||||||
|
customer_longitude = Column(VARCHAR(250))
|
||||||
|
correct_address = Column(BOOLEAN, default=False, nullable=False)
|
||||||
|
verified_at = Column(TIMESTAMP, nullable=True) # NEW: Tracks verification timestamp
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"<CustomerCustomer(id={self.id}, "
|
||||||
|
f"name='{self.customer_first_name} {self.customer_last_name}', "
|
||||||
|
f"address='{self.customer_address}', "
|
||||||
|
f"verified={self.correct_address})>"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StreetReference(Base):
|
||||||
|
"""
|
||||||
|
Reference table of known streets for fuzzy matching address corrections.
|
||||||
|
|
||||||
|
Streets are populated per town/state from OpenStreetMap data.
|
||||||
|
Used to correct misspellings and wrong street suffixes (rd vs dr, etc.)
|
||||||
|
when geocoding fails.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
id: Primary key
|
||||||
|
street_name: Full street name (e.g., "Main Street")
|
||||||
|
street_name_normalized: Lowercase, cleaned for matching
|
||||||
|
street_number_low: Lowest known street number (if available)
|
||||||
|
street_number_high: Highest known street number (if available)
|
||||||
|
town: Town/city name
|
||||||
|
town_normalized: Lowercase town name for matching
|
||||||
|
state: 2-letter state abbreviation (e.g., "MA")
|
||||||
|
zip_codes: Comma-separated ZIP codes this street spans
|
||||||
|
osm_id: OpenStreetMap way ID for reference
|
||||||
|
created_at: When this record was added
|
||||||
|
"""
|
||||||
|
__tablename__ = "street_reference"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_street_ref_town_state", "town_normalized", "state"),
|
||||||
|
Index("ix_street_ref_name_town", "street_name_normalized", "town_normalized"),
|
||||||
|
{"schema": "public"},
|
||||||
|
)
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
street_name = Column(VARCHAR(500), nullable=False)
|
||||||
|
street_name_normalized = Column(VARCHAR(500), nullable=False, index=True)
|
||||||
|
street_number_low = Column(Integer, nullable=True)
|
||||||
|
street_number_high = Column(Integer, nullable=True)
|
||||||
|
town = Column(VARCHAR(140), nullable=False)
|
||||||
|
town_normalized = Column(VARCHAR(140), nullable=False)
|
||||||
|
state = Column(VARCHAR(2), nullable=False)
|
||||||
|
zip_codes = Column(VARCHAR(100), nullable=True)
|
||||||
|
osm_id = Column(String, nullable=True, index=True)
|
||||||
|
created_at = Column(TIMESTAMP, nullable=False)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"<StreetReference(id={self.id}, "
|
||||||
|
f"street='{self.street_name}', "
|
||||||
|
f"town='{self.town}', state='{self.state}')>"
|
||||||
|
)
|
||||||
572
app/streets.py
Normal file
572
app/streets.py
Normal file
@@ -0,0 +1,572 @@
|
|||||||
|
"""
|
||||||
|
Street reference tools for address correction.
|
||||||
|
|
||||||
|
This module provides functionality to:
|
||||||
|
1. Fetch streets from OpenStreetMap Overpass API for a given town/state
|
||||||
|
2. Store streets in the StreetReference table
|
||||||
|
3. Perform fuzzy matching to correct misspelled addresses
|
||||||
|
|
||||||
|
The fuzzy matching handles common issues like:
|
||||||
|
- Misspelled street names ("Mian St" -> "Main St")
|
||||||
|
- Wrong suffixes ("Main Rd" -> "Main St")
|
||||||
|
- Missing/extra spaces
|
||||||
|
- Abbreviated vs full names ("St" vs "Street")
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from rapidfuzz import fuzz, process
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import STATE_MAPPING
|
||||||
|
from app.models import StreetReference
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Overpass API endpoints (multiple for fallback)
|
||||||
|
OVERPASS_API_URLS = [
|
||||||
|
"https://overpass-api.de/api/interpreter",
|
||||||
|
"https://overpass.kumi.systems/api/interpreter",
|
||||||
|
"https://maps.mail.ru/osm/tools/overpass/api/interpreter",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Common street suffix variations for normalization
|
||||||
|
STREET_SUFFIXES = {
|
||||||
|
# Standard -> variations
|
||||||
|
"street": ["st", "str", "strt"],
|
||||||
|
"avenue": ["ave", "av", "aven"],
|
||||||
|
"road": ["rd", "rod"],
|
||||||
|
"drive": ["dr", "drv", "driv"],
|
||||||
|
"lane": ["ln", "lne"],
|
||||||
|
"court": ["ct", "crt", "cour"],
|
||||||
|
"circle": ["cir", "circ", "crcl"],
|
||||||
|
"boulevard": ["blvd", "boul", "blv"],
|
||||||
|
"place": ["pl", "plc"],
|
||||||
|
"terrace": ["ter", "terr", "trc"],
|
||||||
|
"way": ["wy"],
|
||||||
|
"highway": ["hwy", "hiway", "hgwy"],
|
||||||
|
"parkway": ["pkwy", "pky", "pkway"],
|
||||||
|
"square": ["sq", "sqr"],
|
||||||
|
"trail": ["trl", "tr"],
|
||||||
|
"crossing": ["xing", "crssng"],
|
||||||
|
"heights": ["hts", "hgts"],
|
||||||
|
"point": ["pt", "pnt"],
|
||||||
|
"ridge": ["rdg", "rdge"],
|
||||||
|
"valley": ["vly", "vlly"],
|
||||||
|
"view": ["vw", "viw"],
|
||||||
|
"center": ["ctr", "cntr", "centre"],
|
||||||
|
"north": ["n"],
|
||||||
|
"south": ["s"],
|
||||||
|
"east": ["e"],
|
||||||
|
"west": ["w"],
|
||||||
|
"northeast": ["ne"],
|
||||||
|
"northwest": ["nw"],
|
||||||
|
"southeast": ["se"],
|
||||||
|
"southwest": ["sw"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build reverse lookup: abbreviation -> full form
|
||||||
|
SUFFIX_TO_FULL = {}
|
||||||
|
for full, abbrevs in STREET_SUFFIXES.items():
|
||||||
|
for abbr in abbrevs:
|
||||||
|
SUFFIX_TO_FULL[abbr] = full
|
||||||
|
SUFFIX_TO_FULL[full] = full # Also map full to itself
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StreetMatch:
|
||||||
|
"""Result of fuzzy street matching."""
|
||||||
|
original_street: str
|
||||||
|
matched_street: str
|
||||||
|
confidence_score: float
|
||||||
|
town: str
|
||||||
|
state: str
|
||||||
|
street_ref_id: int
|
||||||
|
corrected_address: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FetchResult:
|
||||||
|
"""Result of fetching streets from OSM."""
|
||||||
|
success: bool
|
||||||
|
streets_added: int
|
||||||
|
streets_updated: int
|
||||||
|
total_found: int
|
||||||
|
message: str
|
||||||
|
errors: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_street_name(street: str) -> str:
|
||||||
|
"""
|
||||||
|
Normalize a street name for fuzzy matching.
|
||||||
|
|
||||||
|
- Lowercase
|
||||||
|
- Remove extra whitespace
|
||||||
|
- Expand common abbreviations to full form
|
||||||
|
- Remove punctuation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
street: Raw street name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized street name
|
||||||
|
"""
|
||||||
|
if not street:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Lowercase and strip
|
||||||
|
normalized = street.lower().strip()
|
||||||
|
|
||||||
|
# Remove punctuation except hyphens
|
||||||
|
normalized = re.sub(r"[.,']", "", normalized)
|
||||||
|
|
||||||
|
# Normalize whitespace
|
||||||
|
normalized = re.sub(r"\s+", " ", normalized)
|
||||||
|
|
||||||
|
# Split into words and expand abbreviations
|
||||||
|
words = normalized.split()
|
||||||
|
expanded_words = []
|
||||||
|
for word in words:
|
||||||
|
if word in SUFFIX_TO_FULL:
|
||||||
|
expanded_words.append(SUFFIX_TO_FULL[word])
|
||||||
|
else:
|
||||||
|
expanded_words.append(word)
|
||||||
|
|
||||||
|
return " ".join(expanded_words)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_street_number(address: str) -> Tuple[Optional[str], str]:
|
||||||
|
"""
|
||||||
|
Extract street number from an address string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
address: Full address like "123 Main Street"
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (street_number, remaining_address)
|
||||||
|
"""
|
||||||
|
if not address:
|
||||||
|
return None, ""
|
||||||
|
|
||||||
|
# Match leading number (possibly with letter suffix like "123A")
|
||||||
|
match = re.match(r"^(\d+[A-Za-z]?)\s+(.+)$", address.strip())
|
||||||
|
if match:
|
||||||
|
return match.group(1), match.group(2)
|
||||||
|
|
||||||
|
return None, address.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_state_name(state_abbr: str) -> str:
|
||||||
|
"""
|
||||||
|
Get full state name from abbreviation for Overpass query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_abbr: 2-letter state abbreviation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full state name
|
||||||
|
"""
|
||||||
|
state_names = {
|
||||||
|
"AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
|
||||||
|
"CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
|
||||||
|
"DC": "District of Columbia", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii",
|
||||||
|
"ID": "Idaho", "IL": "Illinois", "IN": "Indiana", "IA": "Iowa",
|
||||||
|
"KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine",
|
||||||
|
"MD": "Maryland", "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota",
|
||||||
|
"MS": "Mississippi", "MO": "Missouri", "MT": "Montana", "NE": "Nebraska",
|
||||||
|
"NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico",
|
||||||
|
"NY": "New York", "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio",
|
||||||
|
"OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island",
|
||||||
|
"SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas",
|
||||||
|
"UT": "Utah", "VT": "Vermont", "VA": "Virginia", "WA": "Washington",
|
||||||
|
"WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming",
|
||||||
|
"PR": "Puerto Rico", "VI": "Virgin Islands", "GU": "Guam", "AS": "American Samoa",
|
||||||
|
}
|
||||||
|
return state_names.get(state_abbr.upper(), state_abbr)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_streets_from_osm(town: str, state: str) -> Tuple[List[dict], str]:
|
||||||
|
"""
|
||||||
|
Fetch all streets in a town from OpenStreetMap using Overpass API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
town: Town/city name
|
||||||
|
state: 2-letter state abbreviation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (list of street dicts, error message or empty string)
|
||||||
|
"""
|
||||||
|
state_name = get_state_name(state)
|
||||||
|
state_upper = state.upper()
|
||||||
|
|
||||||
|
# Simpler, more reliable Overpass query
|
||||||
|
# Uses geocodeArea which is optimized for place lookups
|
||||||
|
query = f"""
|
||||||
|
[out:json][timeout:120];
|
||||||
|
|
||||||
|
// Use geocodeArea for reliable city lookup with state context
|
||||||
|
{{geocodeArea:{town}, {state_name}, United States}}->.city;
|
||||||
|
|
||||||
|
// Get all named streets in the city
|
||||||
|
way["highway"]["name"](area.city);
|
||||||
|
out tags;
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Alternative query if geocodeArea fails (more explicit)
|
||||||
|
fallback_query = f"""
|
||||||
|
[out:json][timeout:120];
|
||||||
|
|
||||||
|
// Find state by ISO code
|
||||||
|
area["ISO3166-2"="US-{state_upper}"]->.state;
|
||||||
|
|
||||||
|
// Find city/town within state
|
||||||
|
(
|
||||||
|
relation["name"="{town}"]["type"="boundary"](area.state);
|
||||||
|
way["name"="{town}"]["place"](area.state);
|
||||||
|
node["name"="{town}"]["place"](area.state);
|
||||||
|
);
|
||||||
|
map_to_area->.city;
|
||||||
|
|
||||||
|
// Get streets
|
||||||
|
way["highway"]["name"](area.city);
|
||||||
|
out tags;
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Most reliable: search by name within bounding box of state
|
||||||
|
# This uses Nominatim-style search which is very reliable
|
||||||
|
simple_query = f"""
|
||||||
|
[out:json][timeout:60];
|
||||||
|
area["name"="{state_name}"]["boundary"="administrative"]["admin_level"="4"]->.state;
|
||||||
|
area["name"="{town}"](area.state)->.city;
|
||||||
|
way["highway"]["name"](area.city);
|
||||||
|
out tags;
|
||||||
|
"""
|
||||||
|
|
||||||
|
queries = [simple_query, query, fallback_query]
|
||||||
|
query_names = ["simple", "geocodeArea", "fallback"]
|
||||||
|
|
||||||
|
logger.info(f"Fetching streets from OSM for {town}, {state_name}")
|
||||||
|
|
||||||
|
last_error = ""
|
||||||
|
|
||||||
|
for api_url in OVERPASS_API_URLS:
|
||||||
|
for q, q_name in zip(queries, query_names):
|
||||||
|
try:
|
||||||
|
logger.info(f"Trying {q_name} query on {api_url.split('/')[2]}...")
|
||||||
|
logger.debug(f"Query: {q}")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
api_url,
|
||||||
|
data={"data": q},
|
||||||
|
timeout=120,
|
||||||
|
headers={"User-Agent": "EamcoAddressChecker/1.0"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
logger.warning("Rate limited, waiting 30s...")
|
||||||
|
time.sleep(30)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if response.status_code == 504:
|
||||||
|
logger.warning(f"Timeout on {q_name} query, trying next...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
elements = data.get("elements", [])
|
||||||
|
|
||||||
|
if elements:
|
||||||
|
logger.info(f"Success with {q_name} query: {len(elements)} street segments")
|
||||||
|
# Process and return results
|
||||||
|
streets = []
|
||||||
|
seen_names = set()
|
||||||
|
|
||||||
|
for element in elements:
|
||||||
|
tags = element.get("tags", {})
|
||||||
|
name = tags.get("name")
|
||||||
|
|
||||||
|
if name and name.lower() not in seen_names:
|
||||||
|
seen_names.add(name.lower())
|
||||||
|
streets.append({
|
||||||
|
"name": name,
|
||||||
|
"osm_id": str(element.get("id", "")),
|
||||||
|
"highway_type": tags.get("highway", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(f"Extracted {len(streets)} unique street names")
|
||||||
|
return streets, ""
|
||||||
|
else:
|
||||||
|
logger.debug(f"No results from {q_name} query")
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
last_error = f"Timeout on {api_url}"
|
||||||
|
logger.warning(last_error)
|
||||||
|
continue
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
last_error = f"Request error: {str(e)}"
|
||||||
|
logger.warning(last_error)
|
||||||
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
last_error = f"Error: {str(e)}"
|
||||||
|
logger.warning(last_error)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# All attempts failed
|
||||||
|
error = f"All Overpass queries failed for {town}, {state}. Last error: {last_error}"
|
||||||
|
logger.error(error)
|
||||||
|
return [], error
|
||||||
|
|
||||||
|
|
||||||
|
def populate_streets_for_town(
|
||||||
|
session: Session,
|
||||||
|
town: str,
|
||||||
|
state: str,
|
||||||
|
clear_existing: bool = False
|
||||||
|
) -> FetchResult:
|
||||||
|
"""
|
||||||
|
Fetch streets from OSM and populate the StreetReference table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: SQLAlchemy session
|
||||||
|
town: Town/city name
|
||||||
|
state: 2-letter state abbreviation
|
||||||
|
clear_existing: If True, delete existing streets for this town first
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FetchResult with statistics
|
||||||
|
"""
|
||||||
|
state = state.upper()
|
||||||
|
town_normalized = town.lower().strip()
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
logger.info(f"Populating streets for {town}, {state}")
|
||||||
|
|
||||||
|
# Optionally clear existing streets for this town
|
||||||
|
if clear_existing:
|
||||||
|
deleted = session.query(StreetReference).filter(
|
||||||
|
StreetReference.town_normalized == town_normalized,
|
||||||
|
StreetReference.state == state
|
||||||
|
).delete(synchronize_session=False)
|
||||||
|
session.commit()
|
||||||
|
logger.info(f"Cleared {deleted} existing street records")
|
||||||
|
|
||||||
|
# Fetch from OSM
|
||||||
|
streets, error = fetch_streets_from_osm(town, state)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
errors.append(error)
|
||||||
|
|
||||||
|
if not streets:
|
||||||
|
return FetchResult(
|
||||||
|
success=len(errors) == 0,
|
||||||
|
streets_added=0,
|
||||||
|
streets_updated=0,
|
||||||
|
total_found=0,
|
||||||
|
message=f"No streets found for {town}, {state}",
|
||||||
|
errors=errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for existing streets to avoid duplicates
|
||||||
|
existing_streets = session.query(StreetReference).filter(
|
||||||
|
StreetReference.town_normalized == town_normalized,
|
||||||
|
StreetReference.state == state
|
||||||
|
).all()
|
||||||
|
|
||||||
|
existing_names = {s.street_name_normalized for s in existing_streets}
|
||||||
|
|
||||||
|
added = 0
|
||||||
|
now = datetime.utcnow()
|
||||||
|
|
||||||
|
for street_data in streets:
|
||||||
|
name = street_data["name"]
|
||||||
|
name_normalized = normalize_street_name(name)
|
||||||
|
|
||||||
|
if name_normalized in existing_names:
|
||||||
|
continue
|
||||||
|
|
||||||
|
street_ref = StreetReference(
|
||||||
|
street_name=name,
|
||||||
|
street_name_normalized=name_normalized,
|
||||||
|
town=town,
|
||||||
|
town_normalized=town_normalized,
|
||||||
|
state=state,
|
||||||
|
osm_id=street_data.get("osm_id"),
|
||||||
|
created_at=now,
|
||||||
|
)
|
||||||
|
session.add(street_ref)
|
||||||
|
existing_names.add(name_normalized)
|
||||||
|
added += 1
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
logger.info(f"Added {added} new streets for {town}, {state}")
|
||||||
|
|
||||||
|
return FetchResult(
|
||||||
|
success=True,
|
||||||
|
streets_added=added,
|
||||||
|
streets_updated=0,
|
||||||
|
total_found=len(streets),
|
||||||
|
message=f"Successfully added {added} streets for {town}, {state}",
|
||||||
|
errors=errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def find_matching_street(
|
||||||
|
session: Session,
|
||||||
|
street_input: str,
|
||||||
|
town: str,
|
||||||
|
state: str,
|
||||||
|
min_confidence: float = 70.0
|
||||||
|
) -> Optional[StreetMatch]:
|
||||||
|
"""
|
||||||
|
Find the best matching street for a potentially misspelled input.
|
||||||
|
|
||||||
|
Uses fuzzy string matching with rapidfuzz to find the closest
|
||||||
|
match in the StreetReference table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: SQLAlchemy session
|
||||||
|
street_input: The street name to match (may be misspelled)
|
||||||
|
town: Town/city to search within
|
||||||
|
state: State abbreviation
|
||||||
|
min_confidence: Minimum match confidence (0-100)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
StreetMatch if found above threshold, None otherwise
|
||||||
|
"""
|
||||||
|
state = state.upper()
|
||||||
|
town_normalized = town.lower().strip()
|
||||||
|
|
||||||
|
# Normalize the input for matching
|
||||||
|
input_normalized = normalize_street_name(street_input)
|
||||||
|
|
||||||
|
# Get all streets for this town
|
||||||
|
streets = session.query(StreetReference).filter(
|
||||||
|
StreetReference.town_normalized == town_normalized,
|
||||||
|
StreetReference.state == state
|
||||||
|
).all()
|
||||||
|
|
||||||
|
if not streets:
|
||||||
|
logger.debug(f"No reference streets found for {town}, {state}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Build list of (normalized_name, street_object) for matching
|
||||||
|
choices = [(s.street_name_normalized, s) for s in streets]
|
||||||
|
|
||||||
|
# Use rapidfuzz to find best match
|
||||||
|
# We use token_set_ratio which handles word order differences well
|
||||||
|
best_match = None
|
||||||
|
best_score = 0
|
||||||
|
|
||||||
|
for normalized_name, street_obj in choices:
|
||||||
|
# Try multiple scoring methods and take the best
|
||||||
|
scores = [
|
||||||
|
fuzz.ratio(input_normalized, normalized_name),
|
||||||
|
fuzz.partial_ratio(input_normalized, normalized_name),
|
||||||
|
fuzz.token_sort_ratio(input_normalized, normalized_name),
|
||||||
|
fuzz.token_set_ratio(input_normalized, normalized_name),
|
||||||
|
]
|
||||||
|
score = max(scores)
|
||||||
|
|
||||||
|
if score > best_score:
|
||||||
|
best_score = score
|
||||||
|
best_match = street_obj
|
||||||
|
|
||||||
|
if best_match and best_score >= min_confidence:
|
||||||
|
logger.info(
|
||||||
|
f"Fuzzy match: '{street_input}' -> '{best_match.street_name}' "
|
||||||
|
f"(confidence: {best_score:.1f}%)"
|
||||||
|
)
|
||||||
|
return StreetMatch(
|
||||||
|
original_street=street_input,
|
||||||
|
matched_street=best_match.street_name,
|
||||||
|
confidence_score=best_score,
|
||||||
|
town=best_match.town,
|
||||||
|
state=best_match.state,
|
||||||
|
street_ref_id=best_match.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"No confident match for '{street_input}' "
|
||||||
|
f"(best: {best_score:.1f}%, threshold: {min_confidence}%)"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def correct_address(
|
||||||
|
session: Session,
|
||||||
|
full_address: str,
|
||||||
|
town: str,
|
||||||
|
state: str,
|
||||||
|
min_confidence: float = 75.0
|
||||||
|
) -> Optional[StreetMatch]:
|
||||||
|
"""
|
||||||
|
Attempt to correct a full address using fuzzy street matching.
|
||||||
|
|
||||||
|
Extracts the street portion, finds a match, and returns
|
||||||
|
a corrected address with the matched street name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: SQLAlchemy session
|
||||||
|
full_address: Full street address (e.g., "123 Mian St")
|
||||||
|
town: Town/city name
|
||||||
|
state: State abbreviation
|
||||||
|
min_confidence: Minimum match confidence
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
StreetMatch with corrected_address if match found, None otherwise
|
||||||
|
"""
|
||||||
|
# Extract street number and street name
|
||||||
|
street_number, street_name = extract_street_number(full_address)
|
||||||
|
|
||||||
|
if not street_name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find matching street
|
||||||
|
match = find_matching_street(
|
||||||
|
session=session,
|
||||||
|
street_input=street_name,
|
||||||
|
town=town,
|
||||||
|
state=state,
|
||||||
|
min_confidence=min_confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
# Build corrected address
|
||||||
|
if street_number:
|
||||||
|
match.corrected_address = f"{street_number} {match.matched_street}"
|
||||||
|
else:
|
||||||
|
match.corrected_address = match.matched_street
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Address correction: '{full_address}' -> '{match.corrected_address}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
return match
|
||||||
|
|
||||||
|
|
||||||
|
def get_town_street_count(session: Session, town: str, state: str) -> int:
|
||||||
|
"""
|
||||||
|
Get the number of streets in the reference table for a town.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: SQLAlchemy session
|
||||||
|
town: Town/city name
|
||||||
|
state: State abbreviation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of streets in the reference table
|
||||||
|
"""
|
||||||
|
return session.query(StreetReference).filter(
|
||||||
|
StreetReference.town_normalized == town.lower().strip(),
|
||||||
|
StreetReference.state == state.upper()
|
||||||
|
).count()
|
||||||
389
app/tools.py
Normal file
389
app/tools.py
Normal file
@@ -0,0 +1,389 @@
|
|||||||
|
"""
|
||||||
|
Geocoding tools for eamco_address_checker.
|
||||||
|
|
||||||
|
This module provides modular tool functions for the agentic address verification
|
||||||
|
workflow. Each function represents a discrete action in the ReAct-style pipeline.
|
||||||
|
|
||||||
|
Tools:
|
||||||
|
- build_address(): Constructs full US address string from components
|
||||||
|
- validate_address_components(): Validates required address fields
|
||||||
|
- geocode_address(): Calls Nominatim API to get lat/long
|
||||||
|
- validate_geocode_result(): Checks quality of geocoding result
|
||||||
|
- update_record(): Updates database record with geocoding results
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
from geopy.geocoders import Nominatim
|
||||||
|
from geopy.exc import GeocoderTimedOut, GeocoderServiceError, GeocoderUnavailable
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import (
|
||||||
|
NOMINATIM_USER_AGENT,
|
||||||
|
MIN_SLEEP_SECONDS,
|
||||||
|
MAX_SLEEP_SECONDS,
|
||||||
|
GEOCODE_TIMEOUT,
|
||||||
|
STATE_MAPPING,
|
||||||
|
)
|
||||||
|
from app.models import CustomerCustomer
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GeocodeResult:
|
||||||
|
"""Result from geocoding operation."""
|
||||||
|
success: bool
|
||||||
|
latitude: Optional[str] = None
|
||||||
|
longitude: Optional[str] = None
|
||||||
|
raw_address: Optional[str] = None
|
||||||
|
country_code: Optional[str] = None
|
||||||
|
error_message: Optional[str] = None
|
||||||
|
skipped: bool = False
|
||||||
|
skip_reason: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AddressComponents:
|
||||||
|
"""Structured address components for geocoding."""
|
||||||
|
street: Optional[str]
|
||||||
|
apt: Optional[str]
|
||||||
|
city: Optional[str]
|
||||||
|
state: Optional[str]
|
||||||
|
zip_code: Optional[str]
|
||||||
|
is_valid: bool = True
|
||||||
|
validation_error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_state_abbreviation(state_id: Optional[int]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Convert state integer ID to 2-letter US state abbreviation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_id: Integer ID from database
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
2-letter state abbreviation or None if not found
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Replace with proper states table lookup when available
|
||||||
|
"""
|
||||||
|
if state_id is None:
|
||||||
|
return None
|
||||||
|
return STATE_MAPPING.get(state_id)
|
||||||
|
|
||||||
|
|
||||||
|
def build_address(customer: CustomerCustomer) -> AddressComponents:
|
||||||
|
"""
|
||||||
|
TOOL: Build full US address string from customer record components.
|
||||||
|
|
||||||
|
Constructs a normalized address string suitable for geocoding.
|
||||||
|
Format: "street, apt, city, state zip"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
customer: CustomerCustomer record with address fields
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AddressComponents dataclass with parsed components and validation status
|
||||||
|
"""
|
||||||
|
# Extract and clean components
|
||||||
|
street = (customer.customer_address or "").strip()
|
||||||
|
apt = (customer.customer_apt or "").strip()
|
||||||
|
city = (customer.customer_town or "").strip()
|
||||||
|
state = get_state_abbreviation(customer.customer_state)
|
||||||
|
zip_code = (customer.customer_zip or "").strip()
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Building address",
|
||||||
|
extra={
|
||||||
|
"customer_id": customer.id,
|
||||||
|
"street": street,
|
||||||
|
"apt": apt,
|
||||||
|
"city": city,
|
||||||
|
"state": state,
|
||||||
|
"zip": zip_code,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return AddressComponents(
|
||||||
|
street=street if street else None,
|
||||||
|
apt=apt if apt else None,
|
||||||
|
city=city if city else None,
|
||||||
|
state=state,
|
||||||
|
zip_code=zip_code if zip_code else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_address_components(components: AddressComponents) -> AddressComponents:
|
||||||
|
"""
|
||||||
|
TOOL: Validate that address has minimum required components.
|
||||||
|
|
||||||
|
An address is considered valid for geocoding if it has:
|
||||||
|
- Street address (required)
|
||||||
|
- City (required)
|
||||||
|
- ZIP code (required)
|
||||||
|
- State is recommended but not strictly required
|
||||||
|
|
||||||
|
Args:
|
||||||
|
components: AddressComponents to validate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated AddressComponents with is_valid flag and validation_error
|
||||||
|
"""
|
||||||
|
missing = []
|
||||||
|
|
||||||
|
if not components.street:
|
||||||
|
missing.append("street")
|
||||||
|
if not components.city:
|
||||||
|
missing.append("city")
|
||||||
|
if not components.zip_code:
|
||||||
|
missing.append("zip")
|
||||||
|
|
||||||
|
if missing:
|
||||||
|
components.is_valid = False
|
||||||
|
components.validation_error = f"Missing required fields: {', '.join(missing)}"
|
||||||
|
logger.debug(f"Address validation failed: {components.validation_error}")
|
||||||
|
else:
|
||||||
|
components.is_valid = True
|
||||||
|
logger.debug("Address validation passed")
|
||||||
|
|
||||||
|
return components
|
||||||
|
|
||||||
|
|
||||||
|
def format_address_string(components: AddressComponents) -> str:
|
||||||
|
"""
|
||||||
|
Format address components into a single string for geocoding.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
components: Validated AddressComponents
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted address string
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
# Street + Apt
|
||||||
|
if components.street:
|
||||||
|
if components.apt:
|
||||||
|
parts.append(f"{components.street}, {components.apt}")
|
||||||
|
else:
|
||||||
|
parts.append(components.street)
|
||||||
|
|
||||||
|
# City
|
||||||
|
if components.city:
|
||||||
|
parts.append(components.city)
|
||||||
|
|
||||||
|
# State + ZIP
|
||||||
|
if components.state and components.zip_code:
|
||||||
|
parts.append(f"{components.state} {components.zip_code}")
|
||||||
|
elif components.state:
|
||||||
|
parts.append(components.state)
|
||||||
|
elif components.zip_code:
|
||||||
|
parts.append(components.zip_code)
|
||||||
|
|
||||||
|
# Add country for better accuracy
|
||||||
|
parts.append("USA")
|
||||||
|
|
||||||
|
return ", ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def geocode_address(
|
||||||
|
address_string: str,
|
||||||
|
geocoder: Optional[Nominatim] = None
|
||||||
|
) -> GeocodeResult:
|
||||||
|
"""
|
||||||
|
TOOL: Call Nominatim API to geocode an address.
|
||||||
|
|
||||||
|
Uses geopy's Nominatim geocoder with proper rate limiting.
|
||||||
|
Respects Nominatim's 1 request/second policy.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
address_string: Full formatted address to geocode
|
||||||
|
geocoder: Optional pre-initialized Nominatim instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
GeocodeResult with lat/long or error information
|
||||||
|
"""
|
||||||
|
if geocoder is None:
|
||||||
|
geocoder = Nominatim(user_agent=NOMINATIM_USER_AGENT)
|
||||||
|
|
||||||
|
logger.info(f"Geocoding address: {address_string}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Call Nominatim API with timeout
|
||||||
|
location = geocoder.geocode(
|
||||||
|
address_string,
|
||||||
|
timeout=GEOCODE_TIMEOUT,
|
||||||
|
addressdetails=True,
|
||||||
|
country_codes="us", # Limit to USA
|
||||||
|
)
|
||||||
|
|
||||||
|
if location is None:
|
||||||
|
logger.warning(f"No geocoding result for: {address_string}")
|
||||||
|
return GeocodeResult(
|
||||||
|
success=False,
|
||||||
|
error_message="No location found for address"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract country code from raw response if available
|
||||||
|
country_code = None
|
||||||
|
if hasattr(location, 'raw') and 'address' in location.raw:
|
||||||
|
country_code = location.raw['address'].get('country_code', '').upper()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Geocoding successful: lat={location.latitude}, lon={location.longitude}",
|
||||||
|
extra={
|
||||||
|
"latitude": location.latitude,
|
||||||
|
"longitude": location.longitude,
|
||||||
|
"raw_address": location.address,
|
||||||
|
"country_code": country_code,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return GeocodeResult(
|
||||||
|
success=True,
|
||||||
|
latitude=str(location.latitude),
|
||||||
|
longitude=str(location.longitude),
|
||||||
|
raw_address=location.address,
|
||||||
|
country_code=country_code,
|
||||||
|
)
|
||||||
|
|
||||||
|
except GeocoderTimedOut as e:
|
||||||
|
logger.error(f"Geocoding timeout: {e}")
|
||||||
|
return GeocodeResult(
|
||||||
|
success=False,
|
||||||
|
error_message=f"Geocoding timed out after {GEOCODE_TIMEOUT}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
except GeocoderServiceError as e:
|
||||||
|
logger.error(f"Geocoder service error: {e}")
|
||||||
|
return GeocodeResult(
|
||||||
|
success=False,
|
||||||
|
error_message=f"Geocoder service error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except GeocoderUnavailable as e:
|
||||||
|
logger.error(f"Geocoder unavailable: {e}")
|
||||||
|
return GeocodeResult(
|
||||||
|
success=False,
|
||||||
|
error_message=f"Geocoder unavailable: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected geocoding error: {e}", exc_info=True)
|
||||||
|
return GeocodeResult(
|
||||||
|
success=False,
|
||||||
|
error_message=f"Unexpected error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_geocode_result(result: GeocodeResult) -> Tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
TOOL: Validate quality of geocoding result.
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
- Result was successful
|
||||||
|
- Country is USA (if available)
|
||||||
|
- Coordinates are within reasonable US bounds
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: GeocodeResult to validate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_valid, reason_string)
|
||||||
|
"""
|
||||||
|
if not result.success:
|
||||||
|
return False, f"Geocoding failed: {result.error_message}"
|
||||||
|
|
||||||
|
# Check country code if available
|
||||||
|
if result.country_code and result.country_code != "US":
|
||||||
|
logger.warning(f"Non-US country code: {result.country_code}")
|
||||||
|
return False, f"Result is outside USA (country: {result.country_code})"
|
||||||
|
|
||||||
|
# Basic bounds check for continental US + Alaska + Hawaii
|
||||||
|
try:
|
||||||
|
lat = float(result.latitude)
|
||||||
|
lon = float(result.longitude)
|
||||||
|
|
||||||
|
# Rough US bounds (including Alaska and Hawaii)
|
||||||
|
if not (18.0 <= lat <= 72.0):
|
||||||
|
return False, f"Latitude {lat} outside US bounds"
|
||||||
|
if not (-180.0 <= lon <= -65.0):
|
||||||
|
return False, f"Longitude {lon} outside US bounds"
|
||||||
|
|
||||||
|
except (ValueError, TypeError) as e:
|
||||||
|
return False, f"Invalid coordinates: {e}"
|
||||||
|
|
||||||
|
return True, "Valid US geocode result"
|
||||||
|
|
||||||
|
|
||||||
|
def update_record(
|
||||||
|
session: Session,
|
||||||
|
customer: CustomerCustomer,
|
||||||
|
geocode_result: GeocodeResult,
|
||||||
|
is_valid: bool
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
TOOL: Update customer record with geocoding results.
|
||||||
|
|
||||||
|
Sets latitude, longitude, correct_address flag, and verified_at timestamp.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: SQLAlchemy session
|
||||||
|
customer: CustomerCustomer record to update
|
||||||
|
geocode_result: Result from geocoding operation
|
||||||
|
is_valid: Whether the geocode result passed validation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if update successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
now = datetime.utcnow()
|
||||||
|
|
||||||
|
if is_valid and geocode_result.success:
|
||||||
|
# Successful geocoding - update all fields
|
||||||
|
customer.customer_latitude = geocode_result.latitude
|
||||||
|
customer.customer_longitude = geocode_result.longitude
|
||||||
|
customer.correct_address = True
|
||||||
|
customer.verified_at = now
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Updated record {customer.id}: lat={geocode_result.latitude}, "
|
||||||
|
f"lon={geocode_result.longitude}, correct_address=True"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Failed geocoding - mark as verified but not correct
|
||||||
|
customer.correct_address = False
|
||||||
|
customer.verified_at = now
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Updated record {customer.id}: correct_address=False "
|
||||||
|
f"(reason: {geocode_result.error_message or 'validation failed'})"
|
||||||
|
)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to update record {customer.id}: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def rate_limit_sleep() -> float:
|
||||||
|
"""
|
||||||
|
Sleep for a random duration to respect Nominatim rate limits.
|
||||||
|
|
||||||
|
Nominatim requires max 1 request per second. We sleep between
|
||||||
|
MIN_SLEEP_SECONDS and MAX_SLEEP_SECONDS (default 1.2-1.8s).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Actual sleep duration in seconds
|
||||||
|
"""
|
||||||
|
sleep_time = random.uniform(MIN_SLEEP_SECONDS, MAX_SLEEP_SECONDS)
|
||||||
|
logger.debug(f"Rate limiting: sleeping {sleep_time:.2f}s")
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
return sleep_time
|
||||||
23
requirements.txt
Normal file
23
requirements.txt
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# eamco_address_checker dependencies
|
||||||
|
# FastAPI web framework and server
|
||||||
|
fastapi>=0.109.0,<1.0.0
|
||||||
|
uvicorn[standard]>=0.27.0,<1.0.0
|
||||||
|
pydantic>=2.5.0,<3.0.0
|
||||||
|
|
||||||
|
# Database
|
||||||
|
sqlalchemy>=2.0.0,<3.0.0
|
||||||
|
psycopg2-binary>=2.9.9,<3.0.0
|
||||||
|
|
||||||
|
# Geocoding
|
||||||
|
geopy>=2.4.1,<3.0.0
|
||||||
|
|
||||||
|
# Fuzzy string matching for address correction
|
||||||
|
rapidfuzz>=3.5.0,<4.0.0
|
||||||
|
|
||||||
|
# HTTP client (for OSM Overpass API and geopy)
|
||||||
|
requests>=2.31.0,<3.0.0
|
||||||
|
urllib3>=2.0.0,<3.0.0
|
||||||
|
certifi>=2023.0.0
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
python-dotenv>=1.0.0,<2.0.0
|
||||||
Reference in New Issue
Block a user