diff --git a/app/main.py b/app/main.py index 2d3379f..b84f380 100644 --- a/app/main.py +++ b/app/main.py @@ -26,9 +26,9 @@ import logging import os import sys from contextlib import contextmanager -from typing import Generator +from typing import Generator, List, Optional -from fastapi import FastAPI, Depends, HTTPException +from fastapi import FastAPI, Depends, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from sqlalchemy import create_engine, text @@ -42,6 +42,7 @@ from app.config import ( LOG_FORMAT, BATCH_SIZE, COMMIT_BATCH_SIZE, + STATE_MAPPING, ) from app.agent import AddressVerificationAgent from app.models import CustomerCustomer, StreetReference, Base @@ -205,6 +206,37 @@ class StreetInfoResponse(BaseModel): message: str +class TownSuggestion(BaseModel): + """Single town suggestion.""" + town: str + state: str + state_id: int + customer_count: int + + +class TownSearchResponse(BaseModel): + """Response for town search endpoint.""" + ok: bool + suggestions: List[TownSuggestion] + query: str + + +class StreetSuggestion(BaseModel): + """Single street suggestion.""" + street_name: str + full_address: str + zip: str + + +class StreetSearchResponse(BaseModel): + """Response for street search endpoint.""" + ok: bool + suggestions: List[StreetSuggestion] + town: str + state: str + query: str + + # ============================================================================= # ENDPOINTS # ============================================================================= @@ -512,6 +544,233 @@ async def get_street_info( ) +# ============================================================================= +# ADDRESS AUTOCOMPLETE ENDPOINTS +# ============================================================================= + + +@app.get( + "/towns/search", + response_model=TownSearchResponse, + tags=["Autocomplete"], +) +async def search_towns( + q: str = Query(..., min_length=2, description="Search query for town name"), + limit: int = Query(10, ge=1, le=50, description="Maximum results to return"), + db: Session = Depends(get_db) +): + """ + Search for towns based on existing customer data. + + This endpoint searches the customer database for unique town/state + combinations matching the query. Returns towns sorted by customer count. + + Args: + q: Partial town name to search for (min 2 characters) + limit: Maximum number of suggestions to return (default 10) + + Example: + GET /towns/search?q=spring + + Returns: + TownSearchResponse with list of matching towns + """ + from sqlalchemy import func, distinct + + query_lower = q.lower().strip() + + # Query distinct town/state combinations with customer counts + results = ( + db.query( + CustomerCustomer.customer_town, + CustomerCustomer.customer_state, + func.count(CustomerCustomer.id).label('customer_count') + ) + .filter(func.lower(CustomerCustomer.customer_town).contains(query_lower)) + .filter(CustomerCustomer.customer_town.isnot(None)) + .filter(CustomerCustomer.customer_town != '') + .group_by(CustomerCustomer.customer_town, CustomerCustomer.customer_state) + .order_by(func.count(CustomerCustomer.id).desc()) + .limit(limit) + .all() + ) + + suggestions = [] + for town, state_id, count in results: + if town: + # Handle state_id being 0 or None - default to empty string + state_abbr = STATE_MAPPING.get(state_id, "") if state_id else "" + suggestions.append(TownSuggestion( + town=town.strip(), # Remove trailing whitespace + state=state_abbr or "MA", # Default to MA if no state mapped + state_id=state_id or 0, + customer_count=count + )) + + return TownSearchResponse( + ok=True, + suggestions=suggestions, + query=q + ) + + +@app.get( + "/streets/search", + response_model=StreetSearchResponse, + tags=["Autocomplete"], +) +async def search_streets( + town: str = Query(..., min_length=1, description="Town name to search within"), + state: str = Query(..., min_length=2, max_length=2, description="2-letter state abbreviation"), + q: str = Query(..., min_length=1, description="Partial street name to search"), + limit: int = Query(10, ge=1, le=50, description="Maximum results to return"), + db: Session = Depends(get_db) +): + """ + Search for streets within a specific town using fuzzy matching. + + This endpoint searches the StreetReference table for streets matching + the partial query within the specified town/state using fuzzy matching. + It then looks up zip codes from existing customer data. + + Args: + town: Town/city name to search within + state: 2-letter state abbreviation (e.g., "MA", "NY") + q: Partial street name to search for + limit: Maximum number of suggestions to return (default 10) + + Example: + GET /streets/search?town=Auburn&state=MA&q=main + + Returns: + StreetSearchResponse with list of matching streets and zip codes + """ + from sqlalchemy import func, or_ + from rapidfuzz import fuzz, process + import re + + state = state.upper() + town_normalized = town.lower().strip() + query_raw = q.strip() + query_lower = query_raw.lower() + + suggestions = [] + + # Build state_ids list for customer lookup (include 0 for unmapped) + state_ids = [0] + for sid, abbr in STATE_MAPPING.items(): + if abbr == state: + state_ids.append(sid) + + # Check if query starts with a house number (e.g., "32 Roch" or "32 Rochdale") + house_number = "" + street_query = query_lower + house_match = re.match(r'^(\d+[A-Za-z]?)\s+(.+)$', query_raw) + if house_match: + house_number = house_match.group(1) + street_query = house_match.group(2).lower() + + # First, get streets that contain the street query string (case-insensitive) + matching_street_refs = ( + db.query(StreetReference) + .filter(StreetReference.town_normalized == town_normalized) + .filter(StreetReference.state == state) + .filter(func.lower(StreetReference.street_name).contains(street_query)) + .limit(limit * 3) # Get more for ranking + .all() + ) + + if matching_street_refs: + # Rank by fuzzy score - prefer exact matches and starts-with + scored_streets = [] + for street in matching_street_refs: + street_lower = street.street_name.lower() + # Scoring: exact match > starts with > contains + if street_lower == street_query: + score = 100 + elif street_lower.startswith(street_query): + score = 90 + fuzz.ratio(street_query, street_lower) + else: + score = fuzz.ratio(street_query, street_lower) + scored_streets.append((street.street_name, score)) + + # Sort by score descending + scored_streets.sort(key=lambda x: x[1], reverse=True) + + # For each matched street, look up zip code from customer data + for street_name, score in scored_streets[:limit]: + # Look up zip code from customers on this street in this town + zip_code = "" + + # Extract the base street name for matching (e.g., "Main" from "Main Street") + street_base = street_name.lower() + # Remove common suffixes to get base name + for suffix in [' street', ' st', ' avenue', ' ave', ' road', ' rd', + ' drive', ' dr', ' lane', ' ln', ' court', ' ct', + ' circle', ' cir', ' boulevard', ' blvd', ' way', + ' place', ' pl', ' terrace', ' ter']: + if street_base.endswith(suffix): + street_base = street_base[:-len(suffix)] + break + + # Try to find a customer with this street name in their address + customer_zip = ( + db.query(CustomerCustomer.customer_zip) + .filter(func.lower(func.trim(CustomerCustomer.customer_town)) == town_normalized) + .filter(CustomerCustomer.customer_state.in_(state_ids)) + .filter(func.lower(CustomerCustomer.customer_address).contains(street_base)) + .filter(CustomerCustomer.customer_zip.isnot(None)) + .filter(CustomerCustomer.customer_zip != '') + .first() + ) + if customer_zip: + zip_code = customer_zip[0] + + # Include house number if provided + display_street = f"{house_number} {street_name}" if house_number else street_name + suggestions.append(StreetSuggestion( + street_name=display_street, + full_address=f"{display_street}, {town}, {state}", + zip=zip_code + )) + + if not suggestions: + # Fallback: search existing customer addresses directly + # Use the full query (with house number) for direct address matching + customer_addresses = ( + db.query( + CustomerCustomer.customer_address, + CustomerCustomer.customer_zip, + func.count(CustomerCustomer.id).label('count') + ) + .filter(func.lower(func.trim(CustomerCustomer.customer_town)) == town_normalized) + .filter(CustomerCustomer.customer_state.in_(state_ids)) + .filter(func.lower(CustomerCustomer.customer_address).contains(query_lower)) + .filter(CustomerCustomer.customer_address.isnot(None)) + .filter(CustomerCustomer.customer_address != '') + .group_by(CustomerCustomer.customer_address, CustomerCustomer.customer_zip) + .order_by(func.count(CustomerCustomer.id).desc()) + .limit(limit) + .all() + ) + + for address, zip_code, _ in customer_addresses: + if address: + suggestions.append(StreetSuggestion( + street_name=address, + full_address=f"{address}, {town}, {state}", + zip=zip_code or "" + )) + + return StreetSearchResponse( + ok=True, + suggestions=suggestions, + town=town, + state=state, + query=q + ) + + # ============================================================================= # STARTUP/SHUTDOWN EVENTS # =============================================================================