feat: rewrite K-factor engine with history tracking and outlier detection
Replace simple exponential smoothing with a rolling-average K-factor system backed by a new auto_kfactor_history table. Budget fills are detected and excluded from calculations, outliers beyond 2-sigma are flagged, and confidence scores track data quality per customer. Adds backfill endpoint, auto-create for missing estimation records, and manual house_factor PUT endpoints for both auto and regular customers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,15 +2,26 @@ import logging
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from pydantic import BaseModel
|
||||
from database import session
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from sqlalchemy import func
|
||||
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
|
||||
from app.models.customer import Customer_Customer
|
||||
from app.models.delivery import Delivery
|
||||
from app.models.auth import Auth_User
|
||||
from app.auth import get_current_user
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HOT_WATER_DAILY_USAGE = Decimal('1.0')
|
||||
HDD_FORECAST_DAYS = 7
|
||||
|
||||
|
||||
class HouseFactorUpdate(BaseModel):
|
||||
house_factor: float
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
@@ -20,6 +31,36 @@ router = APIRouter(
|
||||
)
|
||||
|
||||
|
||||
def _get_avg_hdd(days: int = HDD_FORECAST_DAYS) -> Decimal:
|
||||
"""Get average HDD over the last N days as a forecast proxy."""
|
||||
cutoff = date.today() - timedelta(days=days)
|
||||
rows = session.query(Auto_Temp.temp_avg).filter(
|
||||
Auto_Temp.todays_date > cutoff
|
||||
).all()
|
||||
if not rows:
|
||||
return Decimal('0')
|
||||
total = sum(max(0, 65 - float(r.temp_avg)) for r in rows)
|
||||
return Decimal(str(round(total / len(rows), 2)))
|
||||
|
||||
|
||||
def _enrich_auto(auto_obj, avg_hdd: Decimal) -> dict:
|
||||
"""Add computed gallons_per_day and days_remaining to a serialized auto delivery."""
|
||||
data = jsonable_encoder(auto_obj)
|
||||
k = Decimal(str(auto_obj.house_factor)) if auto_obj.house_factor else Decimal('0')
|
||||
hot_water = HOT_WATER_DAILY_USAGE if auto_obj.hot_water_summer == 1 else Decimal('0')
|
||||
daily_burn = k * avg_hdd + hot_water
|
||||
|
||||
data['gallons_per_day'] = float(round(daily_burn, 2))
|
||||
data['avg_hdd'] = float(avg_hdd)
|
||||
data['hot_water_summer'] = auto_obj.hot_water_summer
|
||||
if daily_burn > 0 and auto_obj.estimated_gallons_left is not None:
|
||||
days_left = int(auto_obj.estimated_gallons_left / daily_burn)
|
||||
data['days_remaining'] = min(days_left, 999)
|
||||
else:
|
||||
data['days_remaining'] = 999
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@router.get("/all/customers", status_code=200)
|
||||
def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
|
||||
@@ -31,7 +72,10 @@ def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
|
||||
.all()
|
||||
)
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(automatics), status_code=200)
|
||||
avg_hdd = _get_avg_hdd()
|
||||
enriched = [_enrich_auto(a, avg_hdd) for a in automatics]
|
||||
|
||||
return JSONResponse(content=enriched, status_code=200)
|
||||
|
||||
|
||||
@router.get("/driver/{driver_employee_id}", status_code=200)
|
||||
@@ -112,7 +156,7 @@ def get_autos_customers_extended(customer_id, current_user: Auth_User = Depends(
|
||||
|
||||
|
||||
@router.get("/auto/customer/{customer_id}", status_code=200)
|
||||
def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends(get_current_user)):
|
||||
def get_auto_delivery_by_customer(customer_id: int, current_user: Auth_User = Depends(get_current_user)):
|
||||
logger.info(f"GET /delivery/auto/customer/{customer_id} - User: {current_user.username}")
|
||||
get_auto_delivery = (
|
||||
session.query(Auto_Delivery)
|
||||
@@ -120,7 +164,47 @@ def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends
|
||||
.first()
|
||||
)
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(get_auto_delivery), status_code=200)
|
||||
if not get_auto_delivery:
|
||||
# Auto-create record from customer data
|
||||
customer = session.query(Customer_Customer).filter(
|
||||
Customer_Customer.id == customer_id
|
||||
).first()
|
||||
if not customer:
|
||||
return JSONResponse(content={"error": "Customer not found"}, status_code=404)
|
||||
|
||||
# Use division average K-factor as default
|
||||
div_avg = session.query(func.avg(Auto_Delivery.house_factor)).filter(
|
||||
Auto_Delivery.house_factor.isnot(None),
|
||||
Auto_Delivery.house_factor > 0
|
||||
).scalar()
|
||||
default_k = float(div_avg) if div_avg else 0.12
|
||||
|
||||
get_auto_delivery = Auto_Delivery(
|
||||
customer_id=customer.id,
|
||||
account_number=customer.account_number,
|
||||
customer_town=customer.customer_town,
|
||||
customer_state=customer.customer_state,
|
||||
customer_address=customer.customer_address,
|
||||
customer_zip=customer.customer_zip,
|
||||
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
|
||||
estimated_gallons_left=Decimal('100'),
|
||||
estimated_gallons_left_prev_day=Decimal('100'),
|
||||
tank_size='275',
|
||||
house_factor=Decimal(str(round(default_k, 4))),
|
||||
auto_status=1,
|
||||
hot_water_summer=0,
|
||||
confidence_score=20,
|
||||
k_factor_source='default'
|
||||
)
|
||||
session.add(get_auto_delivery)
|
||||
session.commit()
|
||||
session.refresh(get_auto_delivery)
|
||||
logger.info(f"Auto-created Auto_Delivery record for customer {customer_id}")
|
||||
|
||||
avg_hdd = _get_avg_hdd()
|
||||
enriched = _enrich_auto(get_auto_delivery, avg_hdd)
|
||||
|
||||
return JSONResponse(content=enriched, status_code=200)
|
||||
|
||||
|
||||
@router.put("/update_status/{auto_id}", status_code=200)
|
||||
@@ -137,3 +221,26 @@ def update_auto_status(auto_id: int, current_user: Auth_User = Depends(get_curre
|
||||
session.commit()
|
||||
return {"message": "Auto status updated to 3"}
|
||||
return {"error": "Auto delivery not found"}
|
||||
|
||||
|
||||
@router.put("/auto/customer/{customer_id}/house_factor", status_code=200)
|
||||
def update_house_factor(customer_id: int, body: HouseFactorUpdate, current_user: Auth_User = Depends(get_current_user)):
|
||||
logger.info(f"PUT /delivery/auto/customer/{customer_id}/house_factor - User: {current_user.username}")
|
||||
auto_delivery = (
|
||||
session.query(Auto_Delivery)
|
||||
.filter(Auto_Delivery.customer_id == customer_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not auto_delivery:
|
||||
return JSONResponse(content={"error": "Auto delivery record not found"}, status_code=404)
|
||||
|
||||
auto_delivery.house_factor = Decimal(str(round(body.house_factor, 4)))
|
||||
auto_delivery.k_factor_source = 'manual'
|
||||
session.commit()
|
||||
session.refresh(auto_delivery)
|
||||
|
||||
avg_hdd = _get_avg_hdd()
|
||||
enriched = _enrich_auto(auto_delivery, avg_hdd)
|
||||
|
||||
return JSONResponse(content=enriched, status_code=200)
|
||||
|
||||
@@ -7,9 +7,10 @@ from sqlalchemy import func
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp, KFactorHistory
|
||||
from app.models.delivery import Delivery
|
||||
from app.constants import DEFAULT_TANK_SIZE_GALLONS
|
||||
from app.script.fuel_estimator import FuelEstimator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -61,7 +62,7 @@ def fix_customer_last_delivered():
|
||||
"new_date": str(latest_ticket.fill_date)
|
||||
})
|
||||
session.add(ad)
|
||||
|
||||
|
||||
session.commit()
|
||||
result = {
|
||||
"total_customers": total_customers,
|
||||
@@ -213,3 +214,114 @@ def estimate_customer_gallons(update_db: int):
|
||||
session.commit()
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(estimates))
|
||||
|
||||
|
||||
@router.get("/backfill_kfactor_history", status_code=200)
|
||||
def backfill_kfactor_history():
|
||||
"""
|
||||
Backfill the auto_kfactor_history table from existing ticket data.
|
||||
For each auto customer with 2+ tickets, calculates K-factor for each
|
||||
consecutive ticket pair and inserts into history. Then runs the rolling
|
||||
K-factor calculation to set the customer's house_factor, confidence, and source.
|
||||
"""
|
||||
logger.info("GET /fixstuff/backfill_kfactor_history - Starting K-factor history backfill")
|
||||
|
||||
estimator = FuelEstimator(session=session)
|
||||
auto_deliveries = session.query(Auto_Delivery).all()
|
||||
|
||||
stats = {
|
||||
"customers_processed": 0,
|
||||
"customers_skipped": 0,
|
||||
"history_entries_created": 0,
|
||||
"customers_updated": 0,
|
||||
}
|
||||
|
||||
for ad in auto_deliveries:
|
||||
tickets = session.query(Tickets_Auto_Delivery).filter(
|
||||
Tickets_Auto_Delivery.customer_id == ad.customer_id,
|
||||
Tickets_Auto_Delivery.fill_date.isnot(None)
|
||||
).order_by(Tickets_Auto_Delivery.fill_date).all()
|
||||
|
||||
if len(tickets) < 2:
|
||||
stats["customers_skipped"] += 1
|
||||
# Set division avg for customers with <2 tickets
|
||||
if ad.confidence_score is None:
|
||||
ad.confidence_score = 20
|
||||
if ad.k_factor_source is None:
|
||||
ad.k_factor_source = 'default'
|
||||
continue
|
||||
|
||||
stats["customers_processed"] += 1
|
||||
|
||||
# Check if this customer already has history entries
|
||||
existing = session.query(KFactorHistory).filter(
|
||||
KFactorHistory.customer_id == ad.customer_id
|
||||
).count()
|
||||
if existing > 0:
|
||||
continue
|
||||
|
||||
for i in range(len(tickets) - 1):
|
||||
prev_ticket = tickets[i]
|
||||
next_ticket = tickets[i + 1]
|
||||
|
||||
start_date = prev_ticket.fill_date
|
||||
end_date = next_ticket.fill_date
|
||||
num_days = (end_date - start_date).days
|
||||
|
||||
if num_days <= 0:
|
||||
continue
|
||||
|
||||
# Calculate HDD for the interval
|
||||
interval_temps = session.query(Auto_Temp).filter(
|
||||
Auto_Temp.todays_date > start_date,
|
||||
Auto_Temp.todays_date <= end_date
|
||||
).all()
|
||||
total_hdd = Decimal(sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps))
|
||||
|
||||
if total_hdd == 0:
|
||||
continue
|
||||
|
||||
# Hot water adjustment
|
||||
total_hot_water = Decimal('0.0')
|
||||
if ad.hot_water_summer == 1:
|
||||
total_hot_water = Decimal(num_days) * HOT_WATER_DAILY_USAGE
|
||||
|
||||
gallons_for_heating = next_ticket.gallons_delivered - total_hot_water
|
||||
|
||||
k_factor_obs = None
|
||||
if gallons_for_heating > 0 and total_hdd > 0:
|
||||
k_factor_obs = gallons_for_heating / total_hdd
|
||||
|
||||
is_budget = estimator._is_budget_fill(next_ticket.gallons_delivered)
|
||||
|
||||
# Flag the ticket too
|
||||
next_ticket.is_budget_fill = is_budget
|
||||
|
||||
history_entry = KFactorHistory(
|
||||
customer_id=ad.customer_id,
|
||||
ticket_id=next_ticket.id,
|
||||
fill_date=next_ticket.fill_date,
|
||||
gallons_delivered=next_ticket.gallons_delivered,
|
||||
total_hdd=total_hdd,
|
||||
days_in_period=num_days,
|
||||
k_factor=k_factor_obs,
|
||||
is_budget_fill=is_budget,
|
||||
is_outlier=False,
|
||||
created_at=date.today()
|
||||
)
|
||||
session.add(history_entry)
|
||||
stats["history_entries_created"] += 1
|
||||
|
||||
# Flush so rolling calc can see the new entries
|
||||
session.flush()
|
||||
|
||||
# Run rolling K-factor calculation to set customer values
|
||||
new_k, confidence, source = estimator._calculate_rolling_k_factor(ad.customer_id)
|
||||
ad.house_factor = new_k
|
||||
ad.confidence_score = confidence
|
||||
ad.k_factor_source = source
|
||||
stats["customers_updated"] += 1
|
||||
|
||||
session.commit()
|
||||
logger.info(f"Backfill complete: {stats}")
|
||||
return JSONResponse(content=jsonable_encoder(stats))
|
||||
|
||||
@@ -2,6 +2,7 @@ import logging
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from pydantic import BaseModel
|
||||
from database import session
|
||||
from sqlalchemy import func
|
||||
from datetime import date
|
||||
@@ -27,6 +28,9 @@ TANK_MAX_FILLS = {
|
||||
}
|
||||
|
||||
|
||||
class HouseFactorUpdate(BaseModel):
|
||||
house_factor: float
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/fixstuff_customer",
|
||||
@@ -256,10 +260,32 @@ def estimate_customer_gallons_specific(customer_id: int):
|
||||
).first()
|
||||
|
||||
if not customer_estimate:
|
||||
return JSONResponse(content={
|
||||
"error": f"No fuel estimation data found for customer {customer_id}",
|
||||
"solution": "Run the populate_estimates endpoint first to initialize customer data."
|
||||
})
|
||||
# Auto-create record from customer data
|
||||
customer = session.query(Customer_Customer).filter(
|
||||
Customer_Customer.id == customer_id
|
||||
).first()
|
||||
if not customer:
|
||||
return JSONResponse(content={"error": f"Customer {customer_id} not found"}, status_code=404)
|
||||
|
||||
customer_estimate = Customer_estimate_gallons(
|
||||
customer_id=customer.id,
|
||||
account_number=customer.account_number,
|
||||
customer_town=customer.customer_town,
|
||||
customer_state=customer.customer_state,
|
||||
customer_address=customer.customer_address,
|
||||
customer_zip=customer.customer_zip,
|
||||
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
|
||||
estimated_gallons_left=Decimal('100'),
|
||||
estimated_gallons_left_prev_day=Decimal('100'),
|
||||
tank_size='275',
|
||||
house_factor=Decimal('0.12'),
|
||||
auto_status=1,
|
||||
hot_water_summer=0
|
||||
)
|
||||
session.add(customer_estimate)
|
||||
session.commit()
|
||||
session.refresh(customer_estimate)
|
||||
logger.info(f"Auto-created Customer_estimate_gallons record for customer {customer_id}")
|
||||
|
||||
deliveries = session.query(Delivery).filter(
|
||||
Delivery.customer_id == customer_estimate.customer_id,
|
||||
@@ -448,3 +474,25 @@ def populate_customer_estimates():
|
||||
}
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(result))
|
||||
|
||||
|
||||
@router.put("/house_factor/{customer_id}", status_code=200)
|
||||
def update_customer_house_factor(customer_id: int, body: HouseFactorUpdate):
|
||||
logger.info(f"PUT /fixstuff_customer/house_factor/{customer_id}")
|
||||
customer_estimate = session.query(Customer_estimate_gallons).filter(
|
||||
Customer_estimate_gallons.customer_id == customer_id
|
||||
).first()
|
||||
|
||||
if not customer_estimate:
|
||||
return JSONResponse(content={"error": "Customer estimate record not found"}, status_code=404)
|
||||
|
||||
customer_estimate.house_factor = Decimal(str(round(body.house_factor, 4)))
|
||||
session.commit()
|
||||
session.refresh(customer_estimate)
|
||||
|
||||
return JSONResponse(content=jsonable_encoder({
|
||||
"id": customer_estimate.id,
|
||||
"customer_id": customer_estimate.customer_id,
|
||||
"house_factor": float(customer_estimate.house_factor),
|
||||
"message": "House factor updated"
|
||||
}), status_code=200)
|
||||
|
||||
Reference in New Issue
Block a user