first commit
This commit is contained in:
389
app/tools.py
Normal file
389
app/tools.py
Normal file
@@ -0,0 +1,389 @@
|
||||
"""
|
||||
Geocoding tools for eamco_address_checker.
|
||||
|
||||
This module provides modular tool functions for the agentic address verification
|
||||
workflow. Each function represents a discrete action in the ReAct-style pipeline.
|
||||
|
||||
Tools:
|
||||
- build_address(): Constructs full US address string from components
|
||||
- validate_address_components(): Validates required address fields
|
||||
- geocode_address(): Calls Nominatim API to get lat/long
|
||||
- validate_geocode_result(): Checks quality of geocoding result
|
||||
- update_record(): Updates database record with geocoding results
|
||||
"""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from geopy.geocoders import Nominatim
|
||||
from geopy.exc import GeocoderTimedOut, GeocoderServiceError, GeocoderUnavailable
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import (
|
||||
NOMINATIM_USER_AGENT,
|
||||
MIN_SLEEP_SECONDS,
|
||||
MAX_SLEEP_SECONDS,
|
||||
GEOCODE_TIMEOUT,
|
||||
STATE_MAPPING,
|
||||
)
|
||||
from app.models import CustomerCustomer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GeocodeResult:
|
||||
"""Result from geocoding operation."""
|
||||
success: bool
|
||||
latitude: Optional[str] = None
|
||||
longitude: Optional[str] = None
|
||||
raw_address: Optional[str] = None
|
||||
country_code: Optional[str] = None
|
||||
error_message: Optional[str] = None
|
||||
skipped: bool = False
|
||||
skip_reason: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class AddressComponents:
|
||||
"""Structured address components for geocoding."""
|
||||
street: Optional[str]
|
||||
apt: Optional[str]
|
||||
city: Optional[str]
|
||||
state: Optional[str]
|
||||
zip_code: Optional[str]
|
||||
is_valid: bool = True
|
||||
validation_error: Optional[str] = None
|
||||
|
||||
|
||||
def get_state_abbreviation(state_id: Optional[int]) -> Optional[str]:
|
||||
"""
|
||||
Convert state integer ID to 2-letter US state abbreviation.
|
||||
|
||||
Args:
|
||||
state_id: Integer ID from database
|
||||
|
||||
Returns:
|
||||
2-letter state abbreviation or None if not found
|
||||
|
||||
Note:
|
||||
Replace with proper states table lookup when available
|
||||
"""
|
||||
if state_id is None:
|
||||
return None
|
||||
return STATE_MAPPING.get(state_id)
|
||||
|
||||
|
||||
def build_address(customer: CustomerCustomer) -> AddressComponents:
|
||||
"""
|
||||
TOOL: Build full US address string from customer record components.
|
||||
|
||||
Constructs a normalized address string suitable for geocoding.
|
||||
Format: "street, apt, city, state zip"
|
||||
|
||||
Args:
|
||||
customer: CustomerCustomer record with address fields
|
||||
|
||||
Returns:
|
||||
AddressComponents dataclass with parsed components and validation status
|
||||
"""
|
||||
# Extract and clean components
|
||||
street = (customer.customer_address or "").strip()
|
||||
apt = (customer.customer_apt or "").strip()
|
||||
city = (customer.customer_town or "").strip()
|
||||
state = get_state_abbreviation(customer.customer_state)
|
||||
zip_code = (customer.customer_zip or "").strip()
|
||||
|
||||
logger.debug(
|
||||
"Building address",
|
||||
extra={
|
||||
"customer_id": customer.id,
|
||||
"street": street,
|
||||
"apt": apt,
|
||||
"city": city,
|
||||
"state": state,
|
||||
"zip": zip_code,
|
||||
}
|
||||
)
|
||||
|
||||
return AddressComponents(
|
||||
street=street if street else None,
|
||||
apt=apt if apt else None,
|
||||
city=city if city else None,
|
||||
state=state,
|
||||
zip_code=zip_code if zip_code else None,
|
||||
)
|
||||
|
||||
|
||||
def validate_address_components(components: AddressComponents) -> AddressComponents:
|
||||
"""
|
||||
TOOL: Validate that address has minimum required components.
|
||||
|
||||
An address is considered valid for geocoding if it has:
|
||||
- Street address (required)
|
||||
- City (required)
|
||||
- ZIP code (required)
|
||||
- State is recommended but not strictly required
|
||||
|
||||
Args:
|
||||
components: AddressComponents to validate
|
||||
|
||||
Returns:
|
||||
Updated AddressComponents with is_valid flag and validation_error
|
||||
"""
|
||||
missing = []
|
||||
|
||||
if not components.street:
|
||||
missing.append("street")
|
||||
if not components.city:
|
||||
missing.append("city")
|
||||
if not components.zip_code:
|
||||
missing.append("zip")
|
||||
|
||||
if missing:
|
||||
components.is_valid = False
|
||||
components.validation_error = f"Missing required fields: {', '.join(missing)}"
|
||||
logger.debug(f"Address validation failed: {components.validation_error}")
|
||||
else:
|
||||
components.is_valid = True
|
||||
logger.debug("Address validation passed")
|
||||
|
||||
return components
|
||||
|
||||
|
||||
def format_address_string(components: AddressComponents) -> str:
|
||||
"""
|
||||
Format address components into a single string for geocoding.
|
||||
|
||||
Args:
|
||||
components: Validated AddressComponents
|
||||
|
||||
Returns:
|
||||
Formatted address string
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Street + Apt
|
||||
if components.street:
|
||||
if components.apt:
|
||||
parts.append(f"{components.street}, {components.apt}")
|
||||
else:
|
||||
parts.append(components.street)
|
||||
|
||||
# City
|
||||
if components.city:
|
||||
parts.append(components.city)
|
||||
|
||||
# State + ZIP
|
||||
if components.state and components.zip_code:
|
||||
parts.append(f"{components.state} {components.zip_code}")
|
||||
elif components.state:
|
||||
parts.append(components.state)
|
||||
elif components.zip_code:
|
||||
parts.append(components.zip_code)
|
||||
|
||||
# Add country for better accuracy
|
||||
parts.append("USA")
|
||||
|
||||
return ", ".join(parts)
|
||||
|
||||
|
||||
def geocode_address(
|
||||
address_string: str,
|
||||
geocoder: Optional[Nominatim] = None
|
||||
) -> GeocodeResult:
|
||||
"""
|
||||
TOOL: Call Nominatim API to geocode an address.
|
||||
|
||||
Uses geopy's Nominatim geocoder with proper rate limiting.
|
||||
Respects Nominatim's 1 request/second policy.
|
||||
|
||||
Args:
|
||||
address_string: Full formatted address to geocode
|
||||
geocoder: Optional pre-initialized Nominatim instance
|
||||
|
||||
Returns:
|
||||
GeocodeResult with lat/long or error information
|
||||
"""
|
||||
if geocoder is None:
|
||||
geocoder = Nominatim(user_agent=NOMINATIM_USER_AGENT)
|
||||
|
||||
logger.info(f"Geocoding address: {address_string}")
|
||||
|
||||
try:
|
||||
# Call Nominatim API with timeout
|
||||
location = geocoder.geocode(
|
||||
address_string,
|
||||
timeout=GEOCODE_TIMEOUT,
|
||||
addressdetails=True,
|
||||
country_codes="us", # Limit to USA
|
||||
)
|
||||
|
||||
if location is None:
|
||||
logger.warning(f"No geocoding result for: {address_string}")
|
||||
return GeocodeResult(
|
||||
success=False,
|
||||
error_message="No location found for address"
|
||||
)
|
||||
|
||||
# Extract country code from raw response if available
|
||||
country_code = None
|
||||
if hasattr(location, 'raw') and 'address' in location.raw:
|
||||
country_code = location.raw['address'].get('country_code', '').upper()
|
||||
|
||||
logger.info(
|
||||
f"Geocoding successful: lat={location.latitude}, lon={location.longitude}",
|
||||
extra={
|
||||
"latitude": location.latitude,
|
||||
"longitude": location.longitude,
|
||||
"raw_address": location.address,
|
||||
"country_code": country_code,
|
||||
}
|
||||
)
|
||||
|
||||
return GeocodeResult(
|
||||
success=True,
|
||||
latitude=str(location.latitude),
|
||||
longitude=str(location.longitude),
|
||||
raw_address=location.address,
|
||||
country_code=country_code,
|
||||
)
|
||||
|
||||
except GeocoderTimedOut as e:
|
||||
logger.error(f"Geocoding timeout: {e}")
|
||||
return GeocodeResult(
|
||||
success=False,
|
||||
error_message=f"Geocoding timed out after {GEOCODE_TIMEOUT}s"
|
||||
)
|
||||
|
||||
except GeocoderServiceError as e:
|
||||
logger.error(f"Geocoder service error: {e}")
|
||||
return GeocodeResult(
|
||||
success=False,
|
||||
error_message=f"Geocoder service error: {str(e)}"
|
||||
)
|
||||
|
||||
except GeocoderUnavailable as e:
|
||||
logger.error(f"Geocoder unavailable: {e}")
|
||||
return GeocodeResult(
|
||||
success=False,
|
||||
error_message=f"Geocoder unavailable: {str(e)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected geocoding error: {e}", exc_info=True)
|
||||
return GeocodeResult(
|
||||
success=False,
|
||||
error_message=f"Unexpected error: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def validate_geocode_result(result: GeocodeResult) -> Tuple[bool, str]:
|
||||
"""
|
||||
TOOL: Validate quality of geocoding result.
|
||||
|
||||
Checks:
|
||||
- Result was successful
|
||||
- Country is USA (if available)
|
||||
- Coordinates are within reasonable US bounds
|
||||
|
||||
Args:
|
||||
result: GeocodeResult to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, reason_string)
|
||||
"""
|
||||
if not result.success:
|
||||
return False, f"Geocoding failed: {result.error_message}"
|
||||
|
||||
# Check country code if available
|
||||
if result.country_code and result.country_code != "US":
|
||||
logger.warning(f"Non-US country code: {result.country_code}")
|
||||
return False, f"Result is outside USA (country: {result.country_code})"
|
||||
|
||||
# Basic bounds check for continental US + Alaska + Hawaii
|
||||
try:
|
||||
lat = float(result.latitude)
|
||||
lon = float(result.longitude)
|
||||
|
||||
# Rough US bounds (including Alaska and Hawaii)
|
||||
if not (18.0 <= lat <= 72.0):
|
||||
return False, f"Latitude {lat} outside US bounds"
|
||||
if not (-180.0 <= lon <= -65.0):
|
||||
return False, f"Longitude {lon} outside US bounds"
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
return False, f"Invalid coordinates: {e}"
|
||||
|
||||
return True, "Valid US geocode result"
|
||||
|
||||
|
||||
def update_record(
|
||||
session: Session,
|
||||
customer: CustomerCustomer,
|
||||
geocode_result: GeocodeResult,
|
||||
is_valid: bool
|
||||
) -> bool:
|
||||
"""
|
||||
TOOL: Update customer record with geocoding results.
|
||||
|
||||
Sets latitude, longitude, correct_address flag, and verified_at timestamp.
|
||||
|
||||
Args:
|
||||
session: SQLAlchemy session
|
||||
customer: CustomerCustomer record to update
|
||||
geocode_result: Result from geocoding operation
|
||||
is_valid: Whether the geocode result passed validation
|
||||
|
||||
Returns:
|
||||
True if update successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
now = datetime.utcnow()
|
||||
|
||||
if is_valid and geocode_result.success:
|
||||
# Successful geocoding - update all fields
|
||||
customer.customer_latitude = geocode_result.latitude
|
||||
customer.customer_longitude = geocode_result.longitude
|
||||
customer.correct_address = True
|
||||
customer.verified_at = now
|
||||
|
||||
logger.info(
|
||||
f"Updated record {customer.id}: lat={geocode_result.latitude}, "
|
||||
f"lon={geocode_result.longitude}, correct_address=True"
|
||||
)
|
||||
else:
|
||||
# Failed geocoding - mark as verified but not correct
|
||||
customer.correct_address = False
|
||||
customer.verified_at = now
|
||||
|
||||
logger.info(
|
||||
f"Updated record {customer.id}: correct_address=False "
|
||||
f"(reason: {geocode_result.error_message or 'validation failed'})"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update record {customer.id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
def rate_limit_sleep() -> float:
|
||||
"""
|
||||
Sleep for a random duration to respect Nominatim rate limits.
|
||||
|
||||
Nominatim requires max 1 request per second. We sleep between
|
||||
MIN_SLEEP_SECONDS and MAX_SLEEP_SECONDS (default 1.2-1.8s).
|
||||
|
||||
Returns:
|
||||
Actual sleep duration in seconds
|
||||
"""
|
||||
sleep_time = random.uniform(MIN_SLEEP_SECONDS, MAX_SLEEP_SECONDS)
|
||||
logger.debug(f"Rate limiting: sleeping {sleep_time:.2f}s")
|
||||
time.sleep(sleep_time)
|
||||
return sleep_time
|
||||
Reference in New Issue
Block a user