first commit

2026-01-18 17:53:26 -05:00
commit 0b9c0915a1
15 changed files with 2692 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,39 @@
+# =============================================================================
+# eamco_address_checker Environment Configuration
+# =============================================================================
+# Copy this file to .env and adjust values as needed.
+# All values have sensible defaults; only override what you need.
+
+# =============================================================================
+# DATABASE
+# =============================================================================
+# Override the default PostgreSQL connection string
+# Default: postgresql+psycopg2://postgres:password@192.168.1.204/eamco
+# DATABASE_URL=postgresql+psycopg2://user:pass@host:5432/database
+
+# =============================================================================
+# BATCH PROCESSING
+# =============================================================================
+# Maximum records to process per batch run (default: 150)
+# BATCH_SIZE=150
+
+# Number of records to process before committing to database (default: 20)
+# COMMIT_BATCH_SIZE=20
+
+# =============================================================================
+# RATE LIMITING (Nominatim)
+# =============================================================================
+# Minimum sleep between geocoding requests in seconds (default: 1.2)
+# MIN_SLEEP=1.2
+
+# Maximum sleep between geocoding requests in seconds (default: 1.8)
+# MAX_SLEEP=1.8
+
+# Geocoding request timeout in seconds (default: 10)
+# GEOCODE_TIMEOUT=10
+
+# =============================================================================
+# LOGGING
+# =============================================================================
+# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO)
+# LOG_LEVEL=INFO
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,132 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having sub-dependencies with platform-specific binaries, it is better to ignore the Pipfile.lock.
+# Pipfile.lock
+
+# PEP 582; __pypackages__
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyderworkspace
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Environments
+.env.local
+.env.prod
--- a/45
+++ b/45
@@ -0,0 +1,45 @@
+# eamco_address_checker Dockerfile
+# Lightweight Python 3.11 image for Unraid Docker deployment
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies (psycopg2 requirements)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libpq-dev \
+    gcc \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+# Copy requirements first (for better layer caching)
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY app/ ./app/
+
+# Create non-root user for security
+RUN useradd --create-home --shell /bin/bash appuser && \
+    chown -R appuser:appuser /app
+USER appuser
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -0,0 +1,14 @@
+FROM python:3.11
+
+ENV PYTHONFAULTHANDLER=1
+ENV PYTHONUNBUFFERED=1
+ENV MODE="DEVELOPMENT"
+
+WORKDIR /app
+
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+
+COPY . .
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/Dockerfile.local
+++ b/Dockerfile.local
@@ -0,0 +1,40 @@
+# eamco_address_checker - DEVELOPMENT Dockerfile
+# Used by docker-compose.local.yml
+# Features: Hot reload via volume mount, debug logging
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies (psycopg2 requirements)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libpq-dev \
+    gcc \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+# Copy requirements first (for better layer caching)
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy environment file for local development
+COPY .env.local .env
+
+# Copy application code (will be overridden by volume mount in compose)
+COPY app/ ./app/
+
+# Expose port
+EXPOSE 8000
+
+# Development: Run with reload enabled
+CMD ["uvicorn", "app.main:app", "--reload", "--host", "0.0.0.0", "--port", "8000"]
--- a/Dockerfile.prod
+++ b/Dockerfile.prod
@@ -0,0 +1,49 @@
+# eamco_address_checker - PRODUCTION Dockerfile
+# Used by docker-compose.prod.yml
+# Features: Optimized for production, non-root user, health checks
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies (psycopg2 requirements)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libpq-dev \
+    gcc \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+# Copy requirements first (for better layer caching)
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy environment file for production
+COPY .env.prod .env
+
+# Copy application code
+COPY app/ ./app/
+
+# Create non-root user for security
+RUN useradd --create-home --shell /bin/bash appuser && \
+    chown -R appuser:appuser /app
+USER appuser
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Production: Run without reload, with workers
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
+# EAMCO Address Checker
+
+This service checks addresses.
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1 @@
+# eamco_address_checker app package
--- a/app/agent.py
+++ b/app/agent.py
@@ -0,0 +1,516 @@
+"""
+Agentic Address Verification Orchestrator.
+
+This module implements a lightweight ReAct-inspired autonomous agent for batch
+address verification. The agent follows a structured workflow:
+
+1. PLANNING PHASE: Query records needing verification
+2. EXECUTION PHASE: For each record, follow think-act-observe-reflect cycle
+   - If geocoding fails, attempt fuzzy matching to correct misspellings
+   - Retry geocoding with corrected address
+3. REFLECTION PHASE: Summarize batch results and statistics
+
+The agent is designed for resilience - individual record failures don't stop
+the batch, and progress is committed incrementally.
+"""
+
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, date
+from typing import List, Optional
+
+from geopy.geocoders import Nominatim
+from sqlalchemy import or_, func
+from sqlalchemy.orm import Session
+
+from app.config import (
+    BATCH_SIZE,
+    COMMIT_BATCH_SIZE,
+    NOMINATIM_USER_AGENT,
+)
+from app.models import CustomerCustomer
+from app.tools import (
+    build_address,
+    validate_address_components,
+    format_address_string,
+    geocode_address,
+    validate_geocode_result,
+    update_record,
+    rate_limit_sleep,
+    GeocodeResult,
+    get_state_abbreviation,
+)
+from app.streets import correct_address, get_town_street_count
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class BatchStats:
+    """Statistics for a batch verification run."""
+    total_queried: int = 0
+    processed: int = 0
+    updated: int = 0
+    corrected: int = 0  # Addresses fixed via fuzzy matching
+    failed: int = 0
+    skipped: int = 0
+    rate_limited: int = 0
+    errors: List[str] = field(default_factory=list)
+    corrections: List[str] = field(default_factory=list)  # Log of corrections made
+    start_time: Optional[datetime] = None
+    end_time: Optional[datetime] = None
+
+    @property
+    def duration_seconds(self) -> float:
+        """Calculate batch duration in seconds."""
+        if self.start_time and self.end_time:
+            return (self.end_time - self.start_time).total_seconds()
+        return 0.0
+
+    def to_dict(self) -> dict:
+        """Convert stats to dictionary for JSON response."""
+        return {
+            "total_queried": self.total_queried,
+            "processed": self.processed,
+            "updated": self.updated,
+            "corrected": self.corrected,
+            "failed": self.failed,
+            "skipped": self.skipped,
+            "rate_limited": self.rate_limited,
+            "duration_seconds": round(self.duration_seconds, 2),
+            "errors_count": len(self.errors),
+            "sample_errors": self.errors[:5] if self.errors else [],
+            "sample_corrections": self.corrections[:5] if self.corrections else [],
+        }
+
+
+class AddressVerificationAgent:
+    """
+    Lightweight autonomous agent for address verification.
+
+    Implements a ReAct-inspired workflow where each record goes through:
+    - OBSERVE: Examine the address data
+    - THINK: Decide if geocoding should be attempted
+    - ACT: Call geocoding API
+    - OBSERVE: Examine the result
+    - REFLECT: Log decision and update database
+
+    Attributes:
+        session: SQLAlchemy database session
+        batch_size: Maximum records per batch
+        commit_size: Records between commits
+        stats: Running statistics for the batch
+        geocoder: Nominatim geocoder instance
+    """
+
+    def __init__(
+        self,
+        session: Session,
+        batch_size: int = BATCH_SIZE,
+        commit_size: int = COMMIT_BATCH_SIZE,
+    ):
+        """
+        Initialize the address verification agent.
+
+        Args:
+            session: SQLAlchemy session for database operations
+            batch_size: Max records to process (default from config)
+            commit_size: Records before intermediate commit
+        """
+        self.session = session
+        self.batch_size = batch_size
+        self.commit_size = commit_size
+        self.stats = BatchStats()
+        self.geocoder = Nominatim(user_agent=NOMINATIM_USER_AGENT)
+
+        logger.info(
+            f"Agent initialized: batch_size={batch_size}, commit_size={commit_size}"
+        )
+
+    # =========================================================================
+    # PHASE 1: PLANNING
+    # =========================================================================
+
+    def plan_batch(self) -> List[CustomerCustomer]:
+        """
+        PLANNING PHASE: Query records that need address verification.
+
+        Criteria for selection:
+        - correct_address = FALSE, OR
+        - verified_at IS NULL, OR
+        - verified_at < today (not verified today)
+
+        Returns:
+            List of CustomerCustomer records to process
+        """
+        logger.info("=" * 60)
+        logger.info("PLANNING PHASE: Querying records needing verification")
+        logger.info("=" * 60)
+
+        today = date.today()
+
+        # Build query for records needing verification
+        query = self.session.query(CustomerCustomer).filter(
+            or_(
+                CustomerCustomer.correct_address == False,  # noqa: E712
+                CustomerCustomer.verified_at.is_(None),
+                func.date(CustomerCustomer.verified_at) < today,
+            )
+        ).limit(self.batch_size)
+
+        records = query.all()
+        self.stats.total_queried = len(records)
+
+        logger.info(
+            f"PLAN RESULT: Found {len(records)} records needing verification",
+            extra={"record_count": len(records), "batch_limit": self.batch_size}
+        )
+
+        # Log sample of record IDs for debugging
+        if records:
+            sample_ids = [r.id for r in records[:10]]
+            logger.debug(f"Sample record IDs: {sample_ids}")
+
+        return records
+
+    # =========================================================================
+    # PHASE 2: EXECUTION (ReAct-style per record)
+    # =========================================================================
+
+    def process_record(self, customer: CustomerCustomer) -> bool:
+        """
+        EXECUTION PHASE: Process a single record with ReAct-style workflow.
+
+        Steps:
+        1. OBSERVE: Build address from record components
+        2. THINK: Validate address - skip if obviously invalid
+        3. ACT: Call Nominatim geocoder
+        4. OBSERVE: Examine geocoding result
+        5. REFLECT: Log decision and update database
+
+        Args:
+            customer: CustomerCustomer record to process
+
+        Returns:
+            True if record was successfully updated, False otherwise
+        """
+        logger.info("-" * 40)
+        logger.info(f"Processing record ID: {customer.id}")
+
+        # -----------------------------------------------------------------
+        # STEP 1: OBSERVE - Build address from components
+        # -----------------------------------------------------------------
+        logger.debug(f"[OBSERVE] Building address for customer {customer.id}")
+        address_components = build_address(customer)
+
+        # -----------------------------------------------------------------
+        # STEP 2: THINK - Validate address components
+        # -----------------------------------------------------------------
+        logger.debug(f"[THINK] Validating address components")
+        address_components = validate_address_components(address_components)
+
+        if not address_components.is_valid:
+            # REFLECT: Skip invalid addresses
+            logger.info(
+                f"[REFLECT] Skipping record {customer.id}: "
+                f"{address_components.validation_error}"
+            )
+            self.stats.skipped += 1
+
+            # Still update the record to mark it as processed
+            geocode_result = GeocodeResult(
+                success=False,
+                skipped=True,
+                skip_reason=address_components.validation_error,
+                error_message=address_components.validation_error,
+            )
+            update_record(self.session, customer, geocode_result, is_valid=False)
+            return False
+
+        # Format address for geocoding
+        address_string = format_address_string(address_components)
+        logger.debug(f"[THINK] Formatted address: {address_string}")
+
+        # -----------------------------------------------------------------
+        # STEP 3: ACT - Call geocoding API
+        # -----------------------------------------------------------------
+        logger.debug(f"[ACT] Calling Nominatim geocoder")
+        geocode_result = geocode_address(address_string, self.geocoder)
+
+        # -----------------------------------------------------------------
+        # STEP 4: OBSERVE - Examine geocoding result
+        # -----------------------------------------------------------------
+        logger.debug(f"[OBSERVE] Geocoding result: success={geocode_result.success}")
+
+        if not geocode_result.success:
+            # -----------------------------------------------------------------
+            # STEP 4a: THINK - Try fuzzy matching to correct address
+            # -----------------------------------------------------------------
+            logger.info(
+                f"[THINK] Geocoding failed, attempting fuzzy street matching..."
+            )
+
+            # Get state abbreviation for fuzzy matching
+            state_abbr = get_state_abbreviation(customer.customer_state)
+            town = address_components.city
+
+            if state_abbr and town:
+                # Check if we have street data for this town
+                street_count = get_town_street_count(self.session, town, state_abbr)
+
+                if street_count > 0:
+                    # Try to correct the address
+                    match = correct_address(
+                        session=self.session,
+                        full_address=address_components.street or "",
+                        town=town,
+                        state=state_abbr,
+                        min_confidence=75.0,
+                    )
+
+                    if match and match.corrected_address:
+                        logger.info(
+                            f"[ACT] Found correction: '{address_components.street}' "
+                            f"-> '{match.corrected_address}' "
+                            f"(confidence: {match.confidence_score:.1f}%)"
+                        )
+
+                        # Build corrected address string
+                        corrected_components = address_components
+                        corrected_components.street = match.corrected_address
+                        corrected_address_string = format_address_string(corrected_components)
+
+                        logger.info(f"[ACT] Retrying with corrected address: {corrected_address_string}")
+
+                        # Rate limit before retry
+                        rate_limit_sleep()
+
+                        # Retry geocoding with corrected address
+                        geocode_result = geocode_address(corrected_address_string, self.geocoder)
+
+                        if geocode_result.success:
+                            logger.info(
+                                f"[OBSERVE] Corrected address geocoded successfully!"
+                            )
+                            self.stats.corrected += 1
+                            self.stats.corrections.append(
+                                f"ID {customer.id}: '{address_components.street}' "
+                                f"-> '{match.corrected_address}'"
+                            )
+                        else:
+                            logger.info(
+                                f"[OBSERVE] Corrected address still failed to geocode"
+                            )
+                    else:
+                        logger.debug(
+                            f"[THINK] No confident fuzzy match found"
+                        )
+                else:
+                    logger.debug(
+                        f"[THINK] No street reference data for {town}, {state_abbr}. "
+                        f"Use POST /streets/{town}/{state_abbr} to populate."
+                    )
+
+        # If still failed after correction attempt
+        if not geocode_result.success:
+            # -----------------------------------------------------------------
+            # STEP 5a: REFLECT - Handle failed geocoding
+            # -----------------------------------------------------------------
+            logger.info(
+                f"[REFLECT] Geocoding failed for record {customer.id}: "
+                f"{geocode_result.error_message}"
+            )
+            self.stats.failed += 1
+            self.stats.errors.append(
+                f"ID {customer.id}: {geocode_result.error_message}"
+            )
+
+            update_record(self.session, customer, geocode_result, is_valid=False)
+            return False
+
+        # Validate geocode result quality
+        is_valid, validation_reason = validate_geocode_result(geocode_result)
+        logger.debug(f"[OBSERVE] Validation: valid={is_valid}, reason={validation_reason}")
+
+        # -----------------------------------------------------------------
+        # STEP 5b: REFLECT - Update database with result
+        # -----------------------------------------------------------------
+        if is_valid:
+            logger.info(
+                f"[REFLECT] Success for record {customer.id}: "
+                f"lat={geocode_result.latitude}, lon={geocode_result.longitude}"
+            )
+            self.stats.updated += 1
+        else:
+            logger.info(
+                f"[REFLECT] Invalid result for record {customer.id}: {validation_reason}"
+            )
+            self.stats.failed += 1
+            self.stats.errors.append(f"ID {customer.id}: {validation_reason}")
+
+        update_record(self.session, customer, geocode_result, is_valid=is_valid)
+        return is_valid
+
+    def execute_batch(self, records: List[CustomerCustomer]) -> None:
+        """
+        Execute the batch processing loop with rate limiting.
+
+        Processes records sequentially with proper rate limiting between
+        geocoding calls. Commits to database periodically.
+
+        Args:
+            records: List of CustomerCustomer records to process
+        """
+        logger.info("=" * 60)
+        logger.info("EXECUTION PHASE: Processing records")
+        logger.info("=" * 60)
+
+        uncommitted_count = 0
+
+        for i, customer in enumerate(records):
+            try:
+                # Process the record
+                self.process_record(customer)
+                self.stats.processed += 1
+                uncommitted_count += 1
+
+                # Commit in batches
+                if uncommitted_count >= self.commit_size:
+                    logger.info(f"Committing batch of {uncommitted_count} records")
+                    self.session.commit()
+                    uncommitted_count = 0
+
+                # Rate limiting (skip on last record)
+                if i < len(records) - 1:
+                    rate_limit_sleep()
+
+            except Exception as e:
+                # Handle unexpected errors - continue processing
+                logger.error(
+                    f"Unexpected error processing record {customer.id}: {e}",
+                    exc_info=True
+                )
+                self.stats.failed += 1
+                self.stats.errors.append(f"ID {customer.id}: Unexpected error: {str(e)}")
+                self.stats.processed += 1
+
+                # Rollback the current transaction and continue
+                self.session.rollback()
+                uncommitted_count = 0
+
+        # Final commit for any remaining records
+        if uncommitted_count > 0:
+            logger.info(f"Final commit of {uncommitted_count} records")
+            self.session.commit()
+
+    # =========================================================================
+    # PHASE 3: REFLECTION
+    # =========================================================================
+
+    def reflect(self) -> dict:
+        """
+        REFLECTION PHASE: Summarize batch results and statistics.
+
+        Logs comprehensive statistics about the batch run and returns
+        a summary dictionary suitable for API response.
+
+        Returns:
+            Dictionary with batch statistics
+        """
+        self.stats.end_time = datetime.utcnow()
+
+        logger.info("=" * 60)
+        logger.info("REFLECTION PHASE: Batch Summary")
+        logger.info("=" * 60)
+
+        stats_dict = self.stats.to_dict()
+
+        logger.info(f"Total queried:    {stats_dict['total_queried']}")
+        logger.info(f"Processed:        {stats_dict['processed']}")
+        logger.info(f"Updated (valid):  {stats_dict['updated']}")
+        logger.info(f"Corrected:        {stats_dict['corrected']}")
+        logger.info(f"Failed:           {stats_dict['failed']}")
+        logger.info(f"Skipped:          {stats_dict['skipped']}")
+        logger.info(f"Duration:         {stats_dict['duration_seconds']}s")
+
+        if stats_dict['errors_count'] > 0:
+            logger.warning(f"Errors encountered: {stats_dict['errors_count']}")
+            for error in stats_dict['sample_errors']:
+                logger.warning(f"  - {error}")
+
+        if stats_dict['corrected'] > 0:
+            logger.info(f"Addresses corrected via fuzzy matching: {stats_dict['corrected']}")
+            for correction in stats_dict['sample_corrections']:
+                logger.info(f"  - {correction}")
+
+        # Calculate success rate
+        if stats_dict['processed'] > 0:
+            success_rate = (stats_dict['updated'] / stats_dict['processed']) * 100
+            logger.info(f"Success rate:     {success_rate:.1f}%")
+            stats_dict['success_rate'] = round(success_rate, 1)
+        else:
+            stats_dict['success_rate'] = 0.0
+
+        logger.info("=" * 60)
+
+        return stats_dict
+
+    # =========================================================================
+    # MAIN ENTRY POINT
+    # =========================================================================
+
+    def run(self) -> dict:
+        """
+        Execute the full agent workflow.
+
+        Runs through all three phases:
+        1. Planning - Query records
+        2. Execution - Process each record
+        3. Reflection - Summarize results
+
+        Returns:
+            Dictionary with batch statistics and message
+        """
+        logger.info("*" * 60)
+        logger.info("ADDRESS VERIFICATION AGENT STARTING")
+        logger.info("*" * 60)
+
+        self.stats.start_time = datetime.utcnow()
+
+        try:
+            # Phase 1: Planning
+            records = self.plan_batch()
+
+            if not records:
+                logger.info("No records to process - batch complete")
+                self.stats.end_time = datetime.utcnow()
+                return {
+                    "status": "success",
+                    "message": "No records needed verification",
+                    **self.stats.to_dict(),
+                }
+
+            # Phase 2: Execution
+            self.execute_batch(records)
+
+            # Phase 3: Reflection
+            stats = self.reflect()
+
+            logger.info("*" * 60)
+            logger.info("ADDRESS VERIFICATION AGENT COMPLETE")
+            logger.info("*" * 60)
+
+            return {
+                "status": "success",
+                "message": f"Batch complete: {stats['updated']} addresses updated",
+                **stats,
+            }
+
+        except Exception as e:
+            logger.error(f"Agent failed with error: {e}", exc_info=True)
+            self.stats.end_time = datetime.utcnow()
+            return {
+                "status": "error",
+                "message": f"Agent failed: {str(e)}",
+                **self.stats.to_dict(),
+            }
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,184 @@
+"""
+Configuration settings for eamco_address_checker.
+
+This module provides configuration with environment-based switching:
+- DEVELOPMENT: Uses 'eamco' database, localhost CORS origins
+- PRODUCTION: Uses 'auburnoil' database, production domain CORS origins
+
+Environment variables are loaded from .env.local or .env.prod depending
+on the Docker compose file used.
+"""
+
+import os
+from typing import List
+
+from dotenv import load_dotenv
+
+# Load environment variables from .env file if present
+load_dotenv()
+
+# =============================================================================
+# ENVIRONMENT MODE
+# =============================================================================
+
+MODE = os.getenv("MODE", "LOCAL")
+CURRENT_SETTINGS = os.getenv("CURRENT_SETTINGS", "DEVELOPMENT")
+
+if CURRENT_SETTINGS == "PRODUCTION":
+    print("USING PRODUCTION APPLICATIONCONFIG!!!!!")
+else:
+    print("USING DEVELOPMENT APPLICATIONCONFIG!!!!!")
+
+# =============================================================================
+# DATABASE CONFIGURATION
+# =============================================================================
+
+# Database connection components (can be overridden individually)
+POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME", "postgres")
+POSTGRES_PW = os.getenv("POSTGRES_PW", "password")
+POSTGRES_SERVER = os.getenv("POSTGRES_SERVER", "192.168.1.204")
+POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
+
+# Database name differs by environment
+if CURRENT_SETTINGS == "PRODUCTION":
+    POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "auburnoil")
+else:
+    POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME", "eamco")
+
+# Build connection URI from components (fallback)
+_DEFAULT_DATABASE_URI = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
+    POSTGRES_USERNAME,
+    POSTGRES_PW,
+    POSTGRES_SERVER,
+    POSTGRES_PORT,
+    POSTGRES_DBNAME
+)
+
+# Allow full DATABASE_URL override
+DATABASE_URL: str = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URI)
+
+# SQLAlchemy binds (for compatibility)
+SQLALCHEMY_DATABASE_URI = DATABASE_URL
+SQLALCHEMY_BINDS = {POSTGRES_DBNAME: SQLALCHEMY_DATABASE_URI}
+
+# =============================================================================
+# CORS CONFIGURATION
+# =============================================================================
+
+# Parse CORS origins from environment (comma-separated) or use defaults
+_cors_env = os.getenv("CORS_ORIGINS", "")
+
+if _cors_env:
+    CORS_ORIGINS: List[str] = [origin.strip() for origin in _cors_env.split(",")]
+elif CURRENT_SETTINGS == "PRODUCTION":
+    # Production CORS origins
+    CORS_ORIGINS = [
+        "https://oil.edwineames.com",
+        "https://edwineames.com",
+    ]
+else:
+    # Development CORS origins
+    CORS_ORIGINS = [
+        "http://localhost:9000",
+        "https://localhost:9513",
+        "http://localhost:9514",
+        "http://localhost:9512",
+        "http://localhost:9511",
+        "http://localhost:5173",  # Frontend port
+        "http://localhost:9616",  # Authorize service port
+    ]
+
+# =============================================================================
+# BATCH PROCESSING CONFIGURATION
+# =============================================================================
+
+# Maximum records to process in a single batch run
+BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "150"))
+
+# Records to process before committing to database
+COMMIT_BATCH_SIZE: int = int(os.getenv("COMMIT_BATCH_SIZE", "20"))
+
+# =============================================================================
+# GEOCODING CONFIGURATION (Nominatim)
+# =============================================================================
+
+# User agent for Nominatim API (required - identifies your application)
+NOMINATIM_USER_AGENT: str = "Unraid-EamcoAddressChecker/1.0 (eeames214@gmail.com)"
+
+# Rate limiting: Sleep range between requests (Nominatim requires 1 req/sec max)
+MIN_SLEEP_SECONDS: float = float(os.getenv("MIN_SLEEP", "1.2"))
+MAX_SLEEP_SECONDS: float = float(os.getenv("MAX_SLEEP", "1.8"))
+
+# Geocoding timeout in seconds
+GEOCODE_TIMEOUT: int = int(os.getenv("GEOCODE_TIMEOUT", "10"))
+
+# =============================================================================
+# STATE MAPPING
+# =============================================================================
+
+# Integer -> US State Abbreviation mapping
+# Replace with proper states table lookup when available
+STATE_MAPPING: dict[int, str] = {
+    1: "AL",   # Alabama
+    2: "AK",   # Alaska
+    3: "AS",   # American Samoa
+    4: "AZ",   # Arizona
+    5: "AR",   # Arkansas
+    6: "CA",   # California
+    7: "CO",   # Colorado
+    8: "CT",   # Connecticut
+    9: "DE",   # Delaware
+    10: "DC",  # District of Columbia
+    11: "FL",  # Florida
+    12: "GA",  # Georgia
+    13: "GU",  # Guam
+    14: "HI",  # Hawaii
+    15: "ID",  # Idaho
+    16: "IL",  # Illinois
+    17: "IN",  # Indiana
+    18: "IA",  # Iowa
+    19: "KS",  # Kansas
+    20: "KY",  # Kentucky
+    21: "LA",  # Louisiana
+    22: "ME",  # Maine
+    23: "MD",  # Maryland
+    24: "MA",  # Massachusetts
+    25: "MI",  # Michigan
+    26: "MN",  # Minnesota
+    27: "MS",  # Mississippi
+    28: "MO",  # Missouri
+    29: "MT",  # Montana
+    30: "NE",  # Nebraska
+    31: "NV",  # Nevada
+    32: "NH",  # New Hampshire
+    33: "NJ",  # New Jersey
+    34: "NM",  # New Mexico
+    35: "NY",  # New York
+    36: "NC",  # North Carolina
+    37: "ND",  # North Dakota
+    38: "OH",  # Ohio
+    39: "OK",  # Oklahoma
+    40: "OR",  # Oregon
+    41: "PA",  # Pennsylvania
+    42: "PR",  # Puerto Rico
+    43: "RI",  # Rhode Island
+    44: "SC",  # South Carolina
+    45: "SD",  # South Dakota
+    46: "TN",  # Tennessee
+    47: "TX",  # Texas
+    48: "UT",  # Utah
+    49: "VT",  # Vermont
+    50: "VA",  # Virginia
+    51: "VI",  # Virgin Islands
+    52: "WA",  # Washington
+    53: "WV",  # West Virginia
+    54: "WI",  # Wisconsin
+    55: "WY",  # Wyoming
+}
+
+# =============================================================================
+# LOGGING CONFIGURATION
+# =============================================================================
+
+LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
+LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,558 @@
+"""
+eamco_address_checker - FastAPI Address Verification Microservice.
+
+This microservice provides a batch job endpoint for verifying customer addresses
+using geocoding. Designed to be triggered via cron from Unraid.
+
+Endpoints:
+    GET  /health              - Health check with database connectivity status
+    POST /verify-addresses    - Trigger batch address verification
+    POST /reset-verifications - Clear all verification data for re-checking
+    POST /streets/{town}/{state} - Fetch and store streets from OSM for a town
+    GET  /streets/{town}/{state} - Get street count for a town
+
+Usage:
+    # Development
+    uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
+
+    # Production (Docker)
+    docker run -p 8000:8000 eamco_address_checker
+
+    # Trigger from cron
+    curl -X POST http://localhost:8000/verify-addresses
+"""
+
+import logging
+import sys
+from contextlib import contextmanager
+from typing import Generator
+
+from fastapi import FastAPI, Depends, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from sqlalchemy import create_engine, text
+from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.exc import SQLAlchemyError
+
+from app.config import (
+    DATABASE_URL,
+    CORS_ORIGINS,
+    LOG_LEVEL,
+    LOG_FORMAT,
+    BATCH_SIZE,
+    COMMIT_BATCH_SIZE,
+)
+from app.agent import AddressVerificationAgent
+from app.models import CustomerCustomer, StreetReference, Base
+from app.streets import (
+    populate_streets_for_town,
+    get_town_street_count,
+)
+
+# =============================================================================
+# LOGGING CONFIGURATION
+# =============================================================================
+
+logging.basicConfig(
+    level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
+    format=LOG_FORMAT,
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# =============================================================================
+# DATABASE SETUP
+# =============================================================================
+
+# Create SQLAlchemy engine with connection pooling
+engine = create_engine(
+    DATABASE_URL,
+    pool_pre_ping=True,  # Verify connections before use
+    pool_size=5,
+    max_overflow=10,
+    echo=False,  # Set to True for SQL debugging
+)
+
+# Session factory
+SessionLocal = sessionmaker(
+    autocommit=False,
+    autoflush=False,
+    bind=engine,
+)
+
+
+def get_db() -> Generator[Session, None, None]:
+    """
+    Dependency that provides a database session.
+
+    Yields a SQLAlchemy session and ensures proper cleanup.
+    """
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+@contextmanager
+def get_db_session() -> Generator[Session, None, None]:
+    """
+    Context manager for database sessions (non-dependency use).
+    """
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+def check_db_connection() -> bool:
+    """
+    Test database connectivity.
+
+    Returns:
+        True if database is reachable, False otherwise
+    """
+    try:
+        with get_db_session() as db:
+            db.execute(text("SELECT 1"))
+        return True
+    except SQLAlchemyError as e:
+        logger.error(f"Database connection failed: {e}")
+        return False
+
+
+# =============================================================================
+# FASTAPI APPLICATION
+# =============================================================================
+
+app = FastAPI(
+    title="eamco_address_checker",
+    description="Address verification microservice using Nominatim geocoding",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+)
+
+# =============================================================================
+# CORS MIDDLEWARE
+# =============================================================================
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# =============================================================================
+# PYDANTIC MODELS (Response Schemas)
+# =============================================================================
+
+
+class HealthResponse(BaseModel):
+    """Health check response schema."""
+    status: str
+    db_connected: bool
+
+
+class VerificationResponse(BaseModel):
+    """Address verification batch response schema."""
+    status: str
+    message: str
+    total_queried: int
+    processed: int
+    updated: int
+    corrected: int
+    failed: int
+    skipped: int
+    rate_limited: int
+    duration_seconds: float
+    success_rate: float
+    errors_count: int
+    sample_errors: list
+    sample_corrections: list
+
+
+class ResetResponse(BaseModel):
+    """Reset verifications response schema."""
+    status: str
+    message: str
+    records_reset: int
+
+
+class StreetPopulateResponse(BaseModel):
+    """Response for street population endpoint."""
+    status: str
+    message: str
+    town: str
+    state: str
+    streets_added: int
+    streets_updated: int
+    total_found: int
+    errors: list
+
+
+class StreetInfoResponse(BaseModel):
+    """Response for street info endpoint."""
+    town: str
+    state: str
+    street_count: int
+    message: str
+
+
+# =============================================================================
+# ENDPOINTS
+# =============================================================================
+
+
+@app.get("/", include_in_schema=False)
+async def root():
+    """Root endpoint - redirect to docs."""
+    return {
+        "service": "eamco_address_checker",
+        "version": "1.0.0",
+        "docs": "/docs",
+    }
+
+
+@app.get("/health", response_model=HealthResponse, tags=["Health"])
+async def health_check():
+    """
+    Health check endpoint.
+
+    Returns service status and database connectivity.
+    Use this endpoint for container health checks and monitoring.
+
+    Returns:
+        HealthResponse with status and db_connected flag
+    """
+    db_connected = check_db_connection()
+
+    return HealthResponse(
+        status="healthy" if db_connected else "degraded",
+        db_connected=db_connected,
+    )
+
+
+@app.post(
+    "/verify-addresses",
+    response_model=VerificationResponse,
+    tags=["Verification"],
+)
+async def verify_addresses(db: Session = Depends(get_db)):
+    """
+    Trigger batch address verification.
+
+    This endpoint runs a synchronous batch job that:
+    1. Queries records needing verification (max BATCH_SIZE)
+    2. Geocodes each address using Nominatim
+    3. Updates records with lat/long and verification status
+    4. Returns statistics about the batch run
+
+    The batch respects Nominatim rate limits (1 req/sec) so execution
+    time is approximately BATCH_SIZE * 1.5 seconds.
+
+    Use this endpoint from Unraid cron:
+        curl -X POST http://localhost:8000/verify-addresses
+
+    Returns:
+        VerificationResponse with batch statistics
+    """
+    logger.info("=" * 60)
+    logger.info("VERIFY-ADDRESSES ENDPOINT CALLED")
+    logger.info("=" * 60)
+    logger.info(f"Configuration: BATCH_SIZE={BATCH_SIZE}, COMMIT_SIZE={COMMIT_BATCH_SIZE}")
+
+    try:
+        # Initialize and run the agent
+        agent = AddressVerificationAgent(
+            session=db,
+            batch_size=BATCH_SIZE,
+            commit_size=COMMIT_BATCH_SIZE,
+        )
+
+        result = agent.run()
+
+        logger.info(f"Batch complete: {result.get('message', 'No message')}")
+
+        return VerificationResponse(
+            status=result.get("status", "unknown"),
+            message=result.get("message", ""),
+            total_queried=result.get("total_queried", 0),
+            processed=result.get("processed", 0),
+            updated=result.get("updated", 0),
+            corrected=result.get("corrected", 0),
+            failed=result.get("failed", 0),
+            skipped=result.get("skipped", 0),
+            rate_limited=result.get("rate_limited", 0),
+            duration_seconds=result.get("duration_seconds", 0.0),
+            success_rate=result.get("success_rate", 0.0),
+            errors_count=result.get("errors_count", 0),
+            sample_errors=result.get("sample_errors", []),
+            sample_corrections=result.get("sample_corrections", []),
+        )
+
+    except SQLAlchemyError as e:
+        logger.error(f"Database error during verification: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Database error: {str(e)}"
+        )
+
+    except Exception as e:
+        logger.error(f"Unexpected error during verification: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Verification failed: {str(e)}"
+        )
+
+
+@app.post(
+    "/reset-verifications",
+    response_model=ResetResponse,
+    tags=["Verification"],
+)
+async def reset_verifications(db: Session = Depends(get_db)):
+    """
+    Reset all address verifications for re-checking.
+
+    This endpoint clears verification data for ALL customer records:
+    - Sets correct_address = FALSE
+    - Sets verified_at = NULL
+    - Clears customer_latitude and customer_longitude
+
+    After calling this endpoint, all addresses will be eligible for
+    re-verification on the next /verify-addresses call.
+
+    WARNING: This is a mass update operation. Use with caution.
+
+    Returns:
+        ResetResponse with count of records reset
+    """
+    logger.info("=" * 60)
+    logger.info("RESET-VERIFICATIONS ENDPOINT CALLED")
+    logger.info("=" * 60)
+
+    try:
+        # Count records before update
+        total_records = db.query(CustomerCustomer).count()
+        logger.info(f"Total customer records: {total_records}")
+
+        # Mass update to reset all verification data
+        updated_count = db.query(CustomerCustomer).update(
+            {
+                CustomerCustomer.correct_address: False,
+                CustomerCustomer.verified_at: None,
+                CustomerCustomer.customer_latitude: None,
+                CustomerCustomer.customer_longitude: None,
+            },
+            synchronize_session=False
+        )
+
+        db.commit()
+
+        logger.info(f"Reset {updated_count} records successfully")
+
+        return ResetResponse(
+            status="success",
+            message=f"Reset {updated_count} address verifications. All addresses are now eligible for re-verification.",
+            records_reset=updated_count,
+        )
+
+    except SQLAlchemyError as e:
+        db.rollback()
+        logger.error(f"Database error during reset: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Database error: {str(e)}"
+        )
+
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Unexpected error during reset: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Reset failed: {str(e)}"
+        )
+
+
+# =============================================================================
+# STREET REFERENCE ENDPOINTS
+# =============================================================================
+
+
+@app.post(
+    "/streets/{town}/{state}",
+    response_model=StreetPopulateResponse,
+    tags=["Streets"],
+)
+async def populate_streets(
+    town: str,
+    state: str,
+    clear_existing: bool = False,
+    db: Session = Depends(get_db)
+):
+    """
+    Fetch and store all streets for a town from OpenStreetMap.
+
+    This endpoint queries the OSM Overpass API to get all named streets
+    in the specified town and stores them in the street_reference table
+    for fuzzy matching during address verification.
+
+    Args:
+        town: Town/city name (e.g., "Boston")
+        state: 2-letter state abbreviation (e.g., "MA")
+        clear_existing: If true, delete existing streets for this town first
+
+    Example:
+        curl -X POST http://localhost:8000/streets/Boston/MA
+
+    Returns:
+        StreetPopulateResponse with count of streets added
+    """
+    logger.info("=" * 60)
+    logger.info(f"POPULATE STREETS: {town}, {state}")
+    logger.info("=" * 60)
+
+    # Validate state abbreviation (2 letters)
+    if len(state) != 2 or not state.isalpha():
+        raise HTTPException(
+            status_code=400,
+            detail="State must be a 2-letter abbreviation (e.g., MA, NY, CA)"
+        )
+
+    try:
+        # Ensure the street_reference table exists
+        Base.metadata.create_all(bind=engine, tables=[StreetReference.__table__])
+
+        result = populate_streets_for_town(
+            session=db,
+            town=town,
+            state=state.upper(),
+            clear_existing=clear_existing,
+        )
+
+        return StreetPopulateResponse(
+            status="success" if result.success else "partial",
+            message=result.message,
+            town=town,
+            state=state.upper(),
+            streets_added=result.streets_added,
+            streets_updated=result.streets_updated,
+            total_found=result.total_found,
+            errors=result.errors,
+        )
+
+    except SQLAlchemyError as e:
+        db.rollback()
+        logger.error(f"Database error populating streets: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Database error: {str(e)}"
+        )
+
+    except Exception as e:
+        logger.error(f"Error populating streets: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to populate streets: {str(e)}"
+        )
+
+
+@app.get(
+    "/streets/{town}/{state}",
+    response_model=StreetInfoResponse,
+    tags=["Streets"],
+)
+async def get_street_info(
+    town: str,
+    state: str,
+    db: Session = Depends(get_db)
+):
+    """
+    Get information about streets stored for a town.
+
+    Returns the count of streets in the reference table for the
+    specified town/state combination.
+
+    Args:
+        town: Town/city name
+        state: 2-letter state abbreviation
+
+    Example:
+        curl http://localhost:8000/streets/Boston/MA
+
+    Returns:
+        StreetInfoResponse with street count
+    """
+    # Validate state abbreviation
+    if len(state) != 2 or not state.isalpha():
+        raise HTTPException(
+            status_code=400,
+            detail="State must be a 2-letter abbreviation"
+        )
+
+    count = get_town_street_count(db, town, state.upper())
+
+    if count == 0:
+        message = f"No streets found for {town}, {state}. Use POST to populate."
+    else:
+        message = f"Found {count} streets for {town}, {state}"
+
+    return StreetInfoResponse(
+        town=town,
+        state=state.upper(),
+        street_count=count,
+        message=message,
+    )
+
+
+# =============================================================================
+# STARTUP/SHUTDOWN EVENTS
+# =============================================================================
+
+
+@app.on_event("startup")
+async def startup_event():
+    """Application startup - log configuration and test DB connection."""
+    logger.info("*" * 60)
+    logger.info("eamco_address_checker STARTING")
+    logger.info("*" * 60)
+    logger.info(f"Database URL: {DATABASE_URL[:50]}...")
+    logger.info(f"CORS Origins: {CORS_ORIGINS}")
+    logger.info(f"Batch Size: {BATCH_SIZE}")
+    logger.info(f"Commit Batch Size: {COMMIT_BATCH_SIZE}")
+
+    # Test database connection
+    if check_db_connection():
+        logger.info("Database connection: OK")
+    else:
+        logger.warning("Database connection: FAILED - service may be degraded")
+
+
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Application shutdown - cleanup."""
+    logger.info("eamco_address_checker SHUTTING DOWN")
+    engine.dispose()
+    logger.info("Database connections closed")
+
+
+# =============================================================================
+# MAIN ENTRY POINT (for direct execution)
+# =============================================================================
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        log_level="info",
+    )
--- a/app/models.py
+++ b/app/models.py
@@ -0,0 +1,127 @@
+"""
+SQLAlchemy 2.x ORM Models for eamco_address_checker.
+
+This module defines the database models using SQLAlchemy's DeclarativeBase.
+
+Models:
+    CustomerCustomer: Customer records with address fields for geocoding
+    StreetReference: Known streets by town/state for fuzzy matching corrections
+"""
+
+from sqlalchemy import Column, Integer, String, VARCHAR, TIMESTAMP, BOOLEAN, Index
+from sqlalchemy.orm import DeclarativeBase
+
+
+class Base(DeclarativeBase):
+    """Base class for all SQLAlchemy models."""
+    pass
+
+
+class CustomerCustomer(Base):
+    """
+    Customer model representing address and contact information.
+
+    The verified_at timestamp tracks when the address was last geocoded.
+    The correct_address boolean indicates if geocoding was successful.
+
+    Attributes:
+        id: Primary key
+        auth_net_profile_id: Authorize.net customer profile ID
+        account_number: Customer account number (max 25 chars)
+        customer_last_name: Customer's last name (max 250 chars)
+        customer_first_name: Customer's first name (max 250 chars)
+        customer_town: City/town name (max 140 chars)
+        customer_state: Integer mapping to US state abbreviation
+        customer_zip: ZIP code (max 25 chars)
+        customer_first_call: Timestamp of first customer contact
+        customer_email: Customer email address (max 500 chars)
+        customer_automatic: Automatic billing flag
+        customer_phone_number: Phone number (max 25 chars)
+        customer_home_type: Type of residence
+        customer_apt: Apartment/unit number (max 140 chars)
+        customer_address: Street address (max 1000 chars)
+        company_id: Associated company ID
+        customer_latitude: Geocoded latitude as string (max 250 chars)
+        customer_longitude: Geocoded longitude as string (max 250 chars)
+        correct_address: Flag indicating successful geocoding
+        verified_at: Timestamp of last verification attempt
+    """
+    __tablename__ = "customer_customer"
+    __table_args__ = {"schema": "public"}
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    auth_net_profile_id = Column(String, unique=True, index=True, nullable=True)
+    account_number = Column(VARCHAR(25))
+    customer_last_name = Column(VARCHAR(250))
+    customer_first_name = Column(VARCHAR(250))
+    customer_town = Column(VARCHAR(140))
+    customer_state = Column(Integer)  # Integer -> 2-letter US state abbreviation
+    customer_zip = Column(VARCHAR(25))
+    customer_first_call = Column(TIMESTAMP)
+    customer_email = Column(VARCHAR(500))
+    customer_automatic = Column(Integer)
+    customer_phone_number = Column(VARCHAR(25))
+    customer_home_type = Column(Integer)
+    customer_apt = Column(VARCHAR(140))
+    customer_address = Column(VARCHAR(1000))
+    company_id = Column(Integer)
+    customer_latitude = Column(VARCHAR(250))
+    customer_longitude = Column(VARCHAR(250))
+    correct_address = Column(BOOLEAN, default=False, nullable=False)
+    verified_at = Column(TIMESTAMP, nullable=True)  # NEW: Tracks verification timestamp
+
+    def __repr__(self) -> str:
+        return (
+            f"<CustomerCustomer(id={self.id}, "
+            f"name='{self.customer_first_name} {self.customer_last_name}', "
+            f"address='{self.customer_address}', "
+            f"verified={self.correct_address})>"
+        )
+
+
+class StreetReference(Base):
+    """
+    Reference table of known streets for fuzzy matching address corrections.
+
+    Streets are populated per town/state from OpenStreetMap data.
+    Used to correct misspellings and wrong street suffixes (rd vs dr, etc.)
+    when geocoding fails.
+
+    Attributes:
+        id: Primary key
+        street_name: Full street name (e.g., "Main Street")
+        street_name_normalized: Lowercase, cleaned for matching
+        street_number_low: Lowest known street number (if available)
+        street_number_high: Highest known street number (if available)
+        town: Town/city name
+        town_normalized: Lowercase town name for matching
+        state: 2-letter state abbreviation (e.g., "MA")
+        zip_codes: Comma-separated ZIP codes this street spans
+        osm_id: OpenStreetMap way ID for reference
+        created_at: When this record was added
+    """
+    __tablename__ = "street_reference"
+    __table_args__ = (
+        Index("ix_street_ref_town_state", "town_normalized", "state"),
+        Index("ix_street_ref_name_town", "street_name_normalized", "town_normalized"),
+        {"schema": "public"},
+    )
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    street_name = Column(VARCHAR(500), nullable=False)
+    street_name_normalized = Column(VARCHAR(500), nullable=False, index=True)
+    street_number_low = Column(Integer, nullable=True)
+    street_number_high = Column(Integer, nullable=True)
+    town = Column(VARCHAR(140), nullable=False)
+    town_normalized = Column(VARCHAR(140), nullable=False)
+    state = Column(VARCHAR(2), nullable=False)
+    zip_codes = Column(VARCHAR(100), nullable=True)
+    osm_id = Column(String, nullable=True, index=True)
+    created_at = Column(TIMESTAMP, nullable=False)
+
+    def __repr__(self) -> str:
+        return (
+            f"<StreetReference(id={self.id}, "
+            f"street='{self.street_name}', "
+            f"town='{self.town}', state='{self.state}')>"
+        )
--- a/app/streets.py
+++ b/app/streets.py
@@ -0,0 +1,572 @@
+"""
+Street reference tools for address correction.
+
+This module provides functionality to:
+1. Fetch streets from OpenStreetMap Overpass API for a given town/state
+2. Store streets in the StreetReference table
+3. Perform fuzzy matching to correct misspelled addresses
+
+The fuzzy matching handles common issues like:
+- Misspelled street names ("Mian St" -> "Main St")
+- Wrong suffixes ("Main Rd" -> "Main St")
+- Missing/extra spaces
+- Abbreviated vs full names ("St" vs "Street")
+"""
+
+import logging
+import re
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from typing import List, Optional, Tuple
+
+import requests
+from rapidfuzz import fuzz, process
+from sqlalchemy.orm import Session
+
+from app.config import STATE_MAPPING
+from app.models import StreetReference
+
+logger = logging.getLogger(__name__)
+
+# Overpass API endpoints (multiple for fallback)
+OVERPASS_API_URLS = [
+    "https://overpass-api.de/api/interpreter",
+    "https://overpass.kumi.systems/api/interpreter",
+    "https://maps.mail.ru/osm/tools/overpass/api/interpreter",
+]
+
+# Common street suffix variations for normalization
+STREET_SUFFIXES = {
+    # Standard -> variations
+    "street": ["st", "str", "strt"],
+    "avenue": ["ave", "av", "aven"],
+    "road": ["rd", "rod"],
+    "drive": ["dr", "drv", "driv"],
+    "lane": ["ln", "lne"],
+    "court": ["ct", "crt", "cour"],
+    "circle": ["cir", "circ", "crcl"],
+    "boulevard": ["blvd", "boul", "blv"],
+    "place": ["pl", "plc"],
+    "terrace": ["ter", "terr", "trc"],
+    "way": ["wy"],
+    "highway": ["hwy", "hiway", "hgwy"],
+    "parkway": ["pkwy", "pky", "pkway"],
+    "square": ["sq", "sqr"],
+    "trail": ["trl", "tr"],
+    "crossing": ["xing", "crssng"],
+    "heights": ["hts", "hgts"],
+    "point": ["pt", "pnt"],
+    "ridge": ["rdg", "rdge"],
+    "valley": ["vly", "vlly"],
+    "view": ["vw", "viw"],
+    "center": ["ctr", "cntr", "centre"],
+    "north": ["n"],
+    "south": ["s"],
+    "east": ["e"],
+    "west": ["w"],
+    "northeast": ["ne"],
+    "northwest": ["nw"],
+    "southeast": ["se"],
+    "southwest": ["sw"],
+}
+
+# Build reverse lookup: abbreviation -> full form
+SUFFIX_TO_FULL = {}
+for full, abbrevs in STREET_SUFFIXES.items():
+    for abbr in abbrevs:
+        SUFFIX_TO_FULL[abbr] = full
+    SUFFIX_TO_FULL[full] = full  # Also map full to itself
+
+
+@dataclass
+class StreetMatch:
+    """Result of fuzzy street matching."""
+    original_street: str
+    matched_street: str
+    confidence_score: float
+    town: str
+    state: str
+    street_ref_id: int
+    corrected_address: Optional[str] = None
+
+
+@dataclass
+class FetchResult:
+    """Result of fetching streets from OSM."""
+    success: bool
+    streets_added: int
+    streets_updated: int
+    total_found: int
+    message: str
+    errors: List[str]
+
+
+def normalize_street_name(street: str) -> str:
+    """
+    Normalize a street name for fuzzy matching.
+
+    - Lowercase
+    - Remove extra whitespace
+    - Expand common abbreviations to full form
+    - Remove punctuation
+
+    Args:
+        street: Raw street name
+
+    Returns:
+        Normalized street name
+    """
+    if not street:
+        return ""
+
+    # Lowercase and strip
+    normalized = street.lower().strip()
+
+    # Remove punctuation except hyphens
+    normalized = re.sub(r"[.,']", "", normalized)
+
+    # Normalize whitespace
+    normalized = re.sub(r"\s+", " ", normalized)
+
+    # Split into words and expand abbreviations
+    words = normalized.split()
+    expanded_words = []
+    for word in words:
+        if word in SUFFIX_TO_FULL:
+            expanded_words.append(SUFFIX_TO_FULL[word])
+        else:
+            expanded_words.append(word)
+
+    return " ".join(expanded_words)
+
+
+def extract_street_number(address: str) -> Tuple[Optional[str], str]:
+    """
+    Extract street number from an address string.
+
+    Args:
+        address: Full address like "123 Main Street"
+
+    Returns:
+        Tuple of (street_number, remaining_address)
+    """
+    if not address:
+        return None, ""
+
+    # Match leading number (possibly with letter suffix like "123A")
+    match = re.match(r"^(\d+[A-Za-z]?)\s+(.+)$", address.strip())
+    if match:
+        return match.group(1), match.group(2)
+
+    return None, address.strip()
+
+
+def get_state_name(state_abbr: str) -> str:
+    """
+    Get full state name from abbreviation for Overpass query.
+
+    Args:
+        state_abbr: 2-letter state abbreviation
+
+    Returns:
+        Full state name
+    """
+    state_names = {
+        "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
+        "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
+        "DC": "District of Columbia", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii",
+        "ID": "Idaho", "IL": "Illinois", "IN": "Indiana", "IA": "Iowa",
+        "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine",
+        "MD": "Maryland", "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota",
+        "MS": "Mississippi", "MO": "Missouri", "MT": "Montana", "NE": "Nebraska",
+        "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico",
+        "NY": "New York", "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio",
+        "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island",
+        "SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas",
+        "UT": "Utah", "VT": "Vermont", "VA": "Virginia", "WA": "Washington",
+        "WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming",
+        "PR": "Puerto Rico", "VI": "Virgin Islands", "GU": "Guam", "AS": "American Samoa",
+    }
+    return state_names.get(state_abbr.upper(), state_abbr)
+
+
+def fetch_streets_from_osm(town: str, state: str) -> Tuple[List[dict], str]:
+    """
+    Fetch all streets in a town from OpenStreetMap using Overpass API.
+
+    Args:
+        town: Town/city name
+        state: 2-letter state abbreviation
+
+    Returns:
+        Tuple of (list of street dicts, error message or empty string)
+    """
+    state_name = get_state_name(state)
+    state_upper = state.upper()
+
+    # Simpler, more reliable Overpass query
+    # Uses geocodeArea which is optimized for place lookups
+    query = f"""
+    [out:json][timeout:120];
+
+    // Use geocodeArea for reliable city lookup with state context
+    {{geocodeArea:{town}, {state_name}, United States}}->.city;
+
+    // Get all named streets in the city
+    way["highway"]["name"](area.city);
+    out tags;
+    """
+
+    # Alternative query if geocodeArea fails (more explicit)
+    fallback_query = f"""
+    [out:json][timeout:120];
+
+    // Find state by ISO code
+    area["ISO3166-2"="US-{state_upper}"]->.state;
+
+    // Find city/town within state
+    (
+      relation["name"="{town}"]["type"="boundary"](area.state);
+      way["name"="{town}"]["place"](area.state);
+      node["name"="{town}"]["place"](area.state);
+    );
+    map_to_area->.city;
+
+    // Get streets
+    way["highway"]["name"](area.city);
+    out tags;
+    """
+
+    # Most reliable: search by name within bounding box of state
+    # This uses Nominatim-style search which is very reliable
+    simple_query = f"""
+    [out:json][timeout:60];
+    area["name"="{state_name}"]["boundary"="administrative"]["admin_level"="4"]->.state;
+    area["name"="{town}"](area.state)->.city;
+    way["highway"]["name"](area.city);
+    out tags;
+    """
+
+    queries = [simple_query, query, fallback_query]
+    query_names = ["simple", "geocodeArea", "fallback"]
+
+    logger.info(f"Fetching streets from OSM for {town}, {state_name}")
+
+    last_error = ""
+
+    for api_url in OVERPASS_API_URLS:
+        for q, q_name in zip(queries, query_names):
+            try:
+                logger.info(f"Trying {q_name} query on {api_url.split('/')[2]}...")
+                logger.debug(f"Query: {q}")
+
+                response = requests.post(
+                    api_url,
+                    data={"data": q},
+                    timeout=120,
+                    headers={"User-Agent": "EamcoAddressChecker/1.0"}
+                )
+
+                if response.status_code == 429:
+                    logger.warning("Rate limited, waiting 30s...")
+                    time.sleep(30)
+                    continue
+
+                if response.status_code == 504:
+                    logger.warning(f"Timeout on {q_name} query, trying next...")
+                    continue
+
+                response.raise_for_status()
+
+                data = response.json()
+                elements = data.get("elements", [])
+
+                if elements:
+                    logger.info(f"Success with {q_name} query: {len(elements)} street segments")
+                    # Process and return results
+                    streets = []
+                    seen_names = set()
+
+                    for element in elements:
+                        tags = element.get("tags", {})
+                        name = tags.get("name")
+
+                        if name and name.lower() not in seen_names:
+                            seen_names.add(name.lower())
+                            streets.append({
+                                "name": name,
+                                "osm_id": str(element.get("id", "")),
+                                "highway_type": tags.get("highway", ""),
+                            })
+
+                    logger.info(f"Extracted {len(streets)} unique street names")
+                    return streets, ""
+                else:
+                    logger.debug(f"No results from {q_name} query")
+
+            except requests.exceptions.Timeout:
+                last_error = f"Timeout on {api_url}"
+                logger.warning(last_error)
+                continue
+
+            except requests.exceptions.RequestException as e:
+                last_error = f"Request error: {str(e)}"
+                logger.warning(last_error)
+                continue
+
+            except Exception as e:
+                last_error = f"Error: {str(e)}"
+                logger.warning(last_error)
+                continue
+
+    # All attempts failed
+    error = f"All Overpass queries failed for {town}, {state}. Last error: {last_error}"
+    logger.error(error)
+    return [], error
+
+
+def populate_streets_for_town(
+    session: Session,
+    town: str,
+    state: str,
+    clear_existing: bool = False
+) -> FetchResult:
+    """
+    Fetch streets from OSM and populate the StreetReference table.
+
+    Args:
+        session: SQLAlchemy session
+        town: Town/city name
+        state: 2-letter state abbreviation
+        clear_existing: If True, delete existing streets for this town first
+
+    Returns:
+        FetchResult with statistics
+    """
+    state = state.upper()
+    town_normalized = town.lower().strip()
+    errors = []
+
+    logger.info(f"Populating streets for {town}, {state}")
+
+    # Optionally clear existing streets for this town
+    if clear_existing:
+        deleted = session.query(StreetReference).filter(
+            StreetReference.town_normalized == town_normalized,
+            StreetReference.state == state
+        ).delete(synchronize_session=False)
+        session.commit()
+        logger.info(f"Cleared {deleted} existing street records")
+
+    # Fetch from OSM
+    streets, error = fetch_streets_from_osm(town, state)
+
+    if error:
+        errors.append(error)
+
+    if not streets:
+        return FetchResult(
+            success=len(errors) == 0,
+            streets_added=0,
+            streets_updated=0,
+            total_found=0,
+            message=f"No streets found for {town}, {state}",
+            errors=errors,
+        )
+
+    # Check for existing streets to avoid duplicates
+    existing_streets = session.query(StreetReference).filter(
+        StreetReference.town_normalized == town_normalized,
+        StreetReference.state == state
+    ).all()
+
+    existing_names = {s.street_name_normalized for s in existing_streets}
+
+    added = 0
+    now = datetime.utcnow()
+
+    for street_data in streets:
+        name = street_data["name"]
+        name_normalized = normalize_street_name(name)
+
+        if name_normalized in existing_names:
+            continue
+
+        street_ref = StreetReference(
+            street_name=name,
+            street_name_normalized=name_normalized,
+            town=town,
+            town_normalized=town_normalized,
+            state=state,
+            osm_id=street_data.get("osm_id"),
+            created_at=now,
+        )
+        session.add(street_ref)
+        existing_names.add(name_normalized)
+        added += 1
+
+    session.commit()
+
+    logger.info(f"Added {added} new streets for {town}, {state}")
+
+    return FetchResult(
+        success=True,
+        streets_added=added,
+        streets_updated=0,
+        total_found=len(streets),
+        message=f"Successfully added {added} streets for {town}, {state}",
+        errors=errors,
+    )
+
+
+def find_matching_street(
+    session: Session,
+    street_input: str,
+    town: str,
+    state: str,
+    min_confidence: float = 70.0
+) -> Optional[StreetMatch]:
+    """
+    Find the best matching street for a potentially misspelled input.
+
+    Uses fuzzy string matching with rapidfuzz to find the closest
+    match in the StreetReference table.
+
+    Args:
+        session: SQLAlchemy session
+        street_input: The street name to match (may be misspelled)
+        town: Town/city to search within
+        state: State abbreviation
+        min_confidence: Minimum match confidence (0-100)
+
+    Returns:
+        StreetMatch if found above threshold, None otherwise
+    """
+    state = state.upper()
+    town_normalized = town.lower().strip()
+
+    # Normalize the input for matching
+    input_normalized = normalize_street_name(street_input)
+
+    # Get all streets for this town
+    streets = session.query(StreetReference).filter(
+        StreetReference.town_normalized == town_normalized,
+        StreetReference.state == state
+    ).all()
+
+    if not streets:
+        logger.debug(f"No reference streets found for {town}, {state}")
+        return None
+
+    # Build list of (normalized_name, street_object) for matching
+    choices = [(s.street_name_normalized, s) for s in streets]
+
+    # Use rapidfuzz to find best match
+    # We use token_set_ratio which handles word order differences well
+    best_match = None
+    best_score = 0
+
+    for normalized_name, street_obj in choices:
+        # Try multiple scoring methods and take the best
+        scores = [
+            fuzz.ratio(input_normalized, normalized_name),
+            fuzz.partial_ratio(input_normalized, normalized_name),
+            fuzz.token_sort_ratio(input_normalized, normalized_name),
+            fuzz.token_set_ratio(input_normalized, normalized_name),
+        ]
+        score = max(scores)
+
+        if score > best_score:
+            best_score = score
+            best_match = street_obj
+
+    if best_match and best_score >= min_confidence:
+        logger.info(
+            f"Fuzzy match: '{street_input}' -> '{best_match.street_name}' "
+            f"(confidence: {best_score:.1f}%)"
+        )
+        return StreetMatch(
+            original_street=street_input,
+            matched_street=best_match.street_name,
+            confidence_score=best_score,
+            town=best_match.town,
+            state=best_match.state,
+            street_ref_id=best_match.id,
+        )
+
+    logger.debug(
+        f"No confident match for '{street_input}' "
+        f"(best: {best_score:.1f}%, threshold: {min_confidence}%)"
+    )
+    return None
+
+
+def correct_address(
+    session: Session,
+    full_address: str,
+    town: str,
+    state: str,
+    min_confidence: float = 75.0
+) -> Optional[StreetMatch]:
+    """
+    Attempt to correct a full address using fuzzy street matching.
+
+    Extracts the street portion, finds a match, and returns
+    a corrected address with the matched street name.
+
+    Args:
+        session: SQLAlchemy session
+        full_address: Full street address (e.g., "123 Mian St")
+        town: Town/city name
+        state: State abbreviation
+        min_confidence: Minimum match confidence
+
+    Returns:
+        StreetMatch with corrected_address if match found, None otherwise
+    """
+    # Extract street number and street name
+    street_number, street_name = extract_street_number(full_address)
+
+    if not street_name:
+        return None
+
+    # Find matching street
+    match = find_matching_street(
+        session=session,
+        street_input=street_name,
+        town=town,
+        state=state,
+        min_confidence=min_confidence,
+    )
+
+    if match:
+        # Build corrected address
+        if street_number:
+            match.corrected_address = f"{street_number} {match.matched_street}"
+        else:
+            match.corrected_address = match.matched_street
+
+        logger.info(
+            f"Address correction: '{full_address}' -> '{match.corrected_address}'"
+        )
+
+    return match
+
+
+def get_town_street_count(session: Session, town: str, state: str) -> int:
+    """
+    Get the number of streets in the reference table for a town.
+
+    Args:
+        session: SQLAlchemy session
+        town: Town/city name
+        state: State abbreviation
+
+    Returns:
+        Number of streets in the reference table
+    """
+    return session.query(StreetReference).filter(
+        StreetReference.town_normalized == town.lower().strip(),
+        StreetReference.state == state.upper()
+    ).count()
--- a/app/tools.py
+++ b/app/tools.py
@@ -0,0 +1,389 @@
+"""
+Geocoding tools for eamco_address_checker.
+
+This module provides modular tool functions for the agentic address verification
+workflow. Each function represents a discrete action in the ReAct-style pipeline.
+
+Tools:
+    - build_address(): Constructs full US address string from components
+    - validate_address_components(): Validates required address fields
+    - geocode_address(): Calls Nominatim API to get lat/long
+    - validate_geocode_result(): Checks quality of geocoding result
+    - update_record(): Updates database record with geocoding results
+"""
+
+import logging
+import random
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional, Tuple
+
+from geopy.geocoders import Nominatim
+from geopy.exc import GeocoderTimedOut, GeocoderServiceError, GeocoderUnavailable
+from sqlalchemy.orm import Session
+
+from app.config import (
+    NOMINATIM_USER_AGENT,
+    MIN_SLEEP_SECONDS,
+    MAX_SLEEP_SECONDS,
+    GEOCODE_TIMEOUT,
+    STATE_MAPPING,
+)
+from app.models import CustomerCustomer
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GeocodeResult:
+    """Result from geocoding operation."""
+    success: bool
+    latitude: Optional[str] = None
+    longitude: Optional[str] = None
+    raw_address: Optional[str] = None
+    country_code: Optional[str] = None
+    error_message: Optional[str] = None
+    skipped: bool = False
+    skip_reason: Optional[str] = None
+
+
+@dataclass
+class AddressComponents:
+    """Structured address components for geocoding."""
+    street: Optional[str]
+    apt: Optional[str]
+    city: Optional[str]
+    state: Optional[str]
+    zip_code: Optional[str]
+    is_valid: bool = True
+    validation_error: Optional[str] = None
+
+
+def get_state_abbreviation(state_id: Optional[int]) -> Optional[str]:
+    """
+    Convert state integer ID to 2-letter US state abbreviation.
+
+    Args:
+        state_id: Integer ID from database
+
+    Returns:
+        2-letter state abbreviation or None if not found
+
+    Note:
+        Replace with proper states table lookup when available
+    """
+    if state_id is None:
+        return None
+    return STATE_MAPPING.get(state_id)
+
+
+def build_address(customer: CustomerCustomer) -> AddressComponents:
+    """
+    TOOL: Build full US address string from customer record components.
+
+    Constructs a normalized address string suitable for geocoding.
+    Format: "street, apt, city, state zip"
+
+    Args:
+        customer: CustomerCustomer record with address fields
+
+    Returns:
+        AddressComponents dataclass with parsed components and validation status
+    """
+    # Extract and clean components
+    street = (customer.customer_address or "").strip()
+    apt = (customer.customer_apt or "").strip()
+    city = (customer.customer_town or "").strip()
+    state = get_state_abbreviation(customer.customer_state)
+    zip_code = (customer.customer_zip or "").strip()
+
+    logger.debug(
+        "Building address",
+        extra={
+            "customer_id": customer.id,
+            "street": street,
+            "apt": apt,
+            "city": city,
+            "state": state,
+            "zip": zip_code,
+        }
+    )
+
+    return AddressComponents(
+        street=street if street else None,
+        apt=apt if apt else None,
+        city=city if city else None,
+        state=state,
+        zip_code=zip_code if zip_code else None,
+    )
+
+
+def validate_address_components(components: AddressComponents) -> AddressComponents:
+    """
+    TOOL: Validate that address has minimum required components.
+
+    An address is considered valid for geocoding if it has:
+    - Street address (required)
+    - City (required)
+    - ZIP code (required)
+    - State is recommended but not strictly required
+
+    Args:
+        components: AddressComponents to validate
+
+    Returns:
+        Updated AddressComponents with is_valid flag and validation_error
+    """
+    missing = []
+
+    if not components.street:
+        missing.append("street")
+    if not components.city:
+        missing.append("city")
+    if not components.zip_code:
+        missing.append("zip")
+
+    if missing:
+        components.is_valid = False
+        components.validation_error = f"Missing required fields: {', '.join(missing)}"
+        logger.debug(f"Address validation failed: {components.validation_error}")
+    else:
+        components.is_valid = True
+        logger.debug("Address validation passed")
+
+    return components
+
+
+def format_address_string(components: AddressComponents) -> str:
+    """
+    Format address components into a single string for geocoding.
+
+    Args:
+        components: Validated AddressComponents
+
+    Returns:
+        Formatted address string
+    """
+    parts = []
+
+    # Street + Apt
+    if components.street:
+        if components.apt:
+            parts.append(f"{components.street}, {components.apt}")
+        else:
+            parts.append(components.street)
+
+    # City
+    if components.city:
+        parts.append(components.city)
+
+    # State + ZIP
+    if components.state and components.zip_code:
+        parts.append(f"{components.state} {components.zip_code}")
+    elif components.state:
+        parts.append(components.state)
+    elif components.zip_code:
+        parts.append(components.zip_code)
+
+    # Add country for better accuracy
+    parts.append("USA")
+
+    return ", ".join(parts)
+
+
+def geocode_address(
+    address_string: str,
+    geocoder: Optional[Nominatim] = None
+) -> GeocodeResult:
+    """
+    TOOL: Call Nominatim API to geocode an address.
+
+    Uses geopy's Nominatim geocoder with proper rate limiting.
+    Respects Nominatim's 1 request/second policy.
+
+    Args:
+        address_string: Full formatted address to geocode
+        geocoder: Optional pre-initialized Nominatim instance
+
+    Returns:
+        GeocodeResult with lat/long or error information
+    """
+    if geocoder is None:
+        geocoder = Nominatim(user_agent=NOMINATIM_USER_AGENT)
+
+    logger.info(f"Geocoding address: {address_string}")
+
+    try:
+        # Call Nominatim API with timeout
+        location = geocoder.geocode(
+            address_string,
+            timeout=GEOCODE_TIMEOUT,
+            addressdetails=True,
+            country_codes="us",  # Limit to USA
+        )
+
+        if location is None:
+            logger.warning(f"No geocoding result for: {address_string}")
+            return GeocodeResult(
+                success=False,
+                error_message="No location found for address"
+            )
+
+        # Extract country code from raw response if available
+        country_code = None
+        if hasattr(location, 'raw') and 'address' in location.raw:
+            country_code = location.raw['address'].get('country_code', '').upper()
+
+        logger.info(
+            f"Geocoding successful: lat={location.latitude}, lon={location.longitude}",
+            extra={
+                "latitude": location.latitude,
+                "longitude": location.longitude,
+                "raw_address": location.address,
+                "country_code": country_code,
+            }
+        )
+
+        return GeocodeResult(
+            success=True,
+            latitude=str(location.latitude),
+            longitude=str(location.longitude),
+            raw_address=location.address,
+            country_code=country_code,
+        )
+
+    except GeocoderTimedOut as e:
+        logger.error(f"Geocoding timeout: {e}")
+        return GeocodeResult(
+            success=False,
+            error_message=f"Geocoding timed out after {GEOCODE_TIMEOUT}s"
+        )
+
+    except GeocoderServiceError as e:
+        logger.error(f"Geocoder service error: {e}")
+        return GeocodeResult(
+            success=False,
+            error_message=f"Geocoder service error: {str(e)}"
+        )
+
+    except GeocoderUnavailable as e:
+        logger.error(f"Geocoder unavailable: {e}")
+        return GeocodeResult(
+            success=False,
+            error_message=f"Geocoder unavailable: {str(e)}"
+        )
+
+    except Exception as e:
+        logger.error(f"Unexpected geocoding error: {e}", exc_info=True)
+        return GeocodeResult(
+            success=False,
+            error_message=f"Unexpected error: {str(e)}"
+        )
+
+
+def validate_geocode_result(result: GeocodeResult) -> Tuple[bool, str]:
+    """
+    TOOL: Validate quality of geocoding result.
+
+    Checks:
+    - Result was successful
+    - Country is USA (if available)
+    - Coordinates are within reasonable US bounds
+
+    Args:
+        result: GeocodeResult to validate
+
+    Returns:
+        Tuple of (is_valid, reason_string)
+    """
+    if not result.success:
+        return False, f"Geocoding failed: {result.error_message}"
+
+    # Check country code if available
+    if result.country_code and result.country_code != "US":
+        logger.warning(f"Non-US country code: {result.country_code}")
+        return False, f"Result is outside USA (country: {result.country_code})"
+
+    # Basic bounds check for continental US + Alaska + Hawaii
+    try:
+        lat = float(result.latitude)
+        lon = float(result.longitude)
+
+        # Rough US bounds (including Alaska and Hawaii)
+        if not (18.0 <= lat <= 72.0):
+            return False, f"Latitude {lat} outside US bounds"
+        if not (-180.0 <= lon <= -65.0):
+            return False, f"Longitude {lon} outside US bounds"
+
+    except (ValueError, TypeError) as e:
+        return False, f"Invalid coordinates: {e}"
+
+    return True, "Valid US geocode result"
+
+
+def update_record(
+    session: Session,
+    customer: CustomerCustomer,
+    geocode_result: GeocodeResult,
+    is_valid: bool
+) -> bool:
+    """
+    TOOL: Update customer record with geocoding results.
+
+    Sets latitude, longitude, correct_address flag, and verified_at timestamp.
+
+    Args:
+        session: SQLAlchemy session
+        customer: CustomerCustomer record to update
+        geocode_result: Result from geocoding operation
+        is_valid: Whether the geocode result passed validation
+
+    Returns:
+        True if update successful, False otherwise
+    """
+    try:
+        now = datetime.utcnow()
+
+        if is_valid and geocode_result.success:
+            # Successful geocoding - update all fields
+            customer.customer_latitude = geocode_result.latitude
+            customer.customer_longitude = geocode_result.longitude
+            customer.correct_address = True
+            customer.verified_at = now
+
+            logger.info(
+                f"Updated record {customer.id}: lat={geocode_result.latitude}, "
+                f"lon={geocode_result.longitude}, correct_address=True"
+            )
+        else:
+            # Failed geocoding - mark as verified but not correct
+            customer.correct_address = False
+            customer.verified_at = now
+
+            logger.info(
+                f"Updated record {customer.id}: correct_address=False "
+                f"(reason: {geocode_result.error_message or 'validation failed'})"
+            )
+
+        return True
+
+    except Exception as e:
+        logger.error(f"Failed to update record {customer.id}: {e}", exc_info=True)
+        return False
+
+
+def rate_limit_sleep() -> float:
+    """
+    Sleep for a random duration to respect Nominatim rate limits.
+
+    Nominatim requires max 1 request per second. We sleep between
+    MIN_SLEEP_SECONDS and MAX_SLEEP_SECONDS (default 1.2-1.8s).
+
+    Returns:
+        Actual sleep duration in seconds
+    """
+    sleep_time = random.uniform(MIN_SLEEP_SECONDS, MAX_SLEEP_SECONDS)
+    logger.debug(f"Rate limiting: sleeping {sleep_time:.2f}s")
+    time.sleep(sleep_time)
+    return sleep_time
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,23 @@
+# eamco_address_checker dependencies
+# FastAPI web framework and server
+fastapi>=0.109.0,<1.0.0
+uvicorn[standard]>=0.27.0,<1.0.0
+pydantic>=2.5.0,<3.0.0
+
+# Database
+sqlalchemy>=2.0.0,<3.0.0
+psycopg2-binary>=2.9.9,<3.0.0
+
+# Geocoding
+geopy>=2.4.1,<3.0.0
+
+# Fuzzy string matching for address correction
+rapidfuzz>=3.5.0,<4.0.0
+
+# HTTP client (for OSM Overpass API and geopy)
+requests>=2.31.0,<3.0.0
+urllib3>=2.0.0,<3.0.0
+certifi>=2023.0.0
+
+# Configuration
+python-dotenv>=1.0.0,<2.0.0