feat: rewrite K-factor engine with history tracking and outlier detection

Replace simple exponential smoothing with a rolling-average K-factor
system backed by a new auto_kfactor_history table. Budget fills are
detected and excluded from calculations, outliers beyond 2-sigma are
flagged, and confidence scores track data quality per customer.
Adds backfill endpoint, auto-create for missing estimation records,
and manual house_factor PUT endpoints for both auto and regular customers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:54:27 -05:00
parent 764c094eed
commit c134c05947
5 changed files with 473 additions and 105 deletions

View File

@@ -7,9 +7,10 @@ from sqlalchemy import func
from datetime import date
from decimal import Decimal
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp, KFactorHistory
from app.models.delivery import Delivery
from app.constants import DEFAULT_TANK_SIZE_GALLONS
from app.script.fuel_estimator import FuelEstimator
logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ def fix_customer_last_delivered():
"new_date": str(latest_ticket.fill_date)
})
session.add(ad)
session.commit()
result = {
"total_customers": total_customers,
@@ -213,3 +214,114 @@ def estimate_customer_gallons(update_db: int):
session.commit()
return JSONResponse(content=jsonable_encoder(estimates))
@router.get("/backfill_kfactor_history", status_code=200)
def backfill_kfactor_history():
"""
Backfill the auto_kfactor_history table from existing ticket data.
For each auto customer with 2+ tickets, calculates K-factor for each
consecutive ticket pair and inserts into history. Then runs the rolling
K-factor calculation to set the customer's house_factor, confidence, and source.
"""
logger.info("GET /fixstuff/backfill_kfactor_history - Starting K-factor history backfill")
estimator = FuelEstimator(session=session)
auto_deliveries = session.query(Auto_Delivery).all()
stats = {
"customers_processed": 0,
"customers_skipped": 0,
"history_entries_created": 0,
"customers_updated": 0,
}
for ad in auto_deliveries:
tickets = session.query(Tickets_Auto_Delivery).filter(
Tickets_Auto_Delivery.customer_id == ad.customer_id,
Tickets_Auto_Delivery.fill_date.isnot(None)
).order_by(Tickets_Auto_Delivery.fill_date).all()
if len(tickets) < 2:
stats["customers_skipped"] += 1
# Set division avg for customers with <2 tickets
if ad.confidence_score is None:
ad.confidence_score = 20
if ad.k_factor_source is None:
ad.k_factor_source = 'default'
continue
stats["customers_processed"] += 1
# Check if this customer already has history entries
existing = session.query(KFactorHistory).filter(
KFactorHistory.customer_id == ad.customer_id
).count()
if existing > 0:
continue
for i in range(len(tickets) - 1):
prev_ticket = tickets[i]
next_ticket = tickets[i + 1]
start_date = prev_ticket.fill_date
end_date = next_ticket.fill_date
num_days = (end_date - start_date).days
if num_days <= 0:
continue
# Calculate HDD for the interval
interval_temps = session.query(Auto_Temp).filter(
Auto_Temp.todays_date > start_date,
Auto_Temp.todays_date <= end_date
).all()
total_hdd = Decimal(sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps))
if total_hdd == 0:
continue
# Hot water adjustment
total_hot_water = Decimal('0.0')
if ad.hot_water_summer == 1:
total_hot_water = Decimal(num_days) * HOT_WATER_DAILY_USAGE
gallons_for_heating = next_ticket.gallons_delivered - total_hot_water
k_factor_obs = None
if gallons_for_heating > 0 and total_hdd > 0:
k_factor_obs = gallons_for_heating / total_hdd
is_budget = estimator._is_budget_fill(next_ticket.gallons_delivered)
# Flag the ticket too
next_ticket.is_budget_fill = is_budget
history_entry = KFactorHistory(
customer_id=ad.customer_id,
ticket_id=next_ticket.id,
fill_date=next_ticket.fill_date,
gallons_delivered=next_ticket.gallons_delivered,
total_hdd=total_hdd,
days_in_period=num_days,
k_factor=k_factor_obs,
is_budget_fill=is_budget,
is_outlier=False,
created_at=date.today()
)
session.add(history_entry)
stats["history_entries_created"] += 1
# Flush so rolling calc can see the new entries
session.flush()
# Run rolling K-factor calculation to set customer values
new_k, confidence, source = estimator._calculate_rolling_k_factor(ad.customer_id)
ad.house_factor = new_k
ad.confidence_score = confidence
ad.k_factor_source = source
stats["customers_updated"] += 1
session.commit()
logger.info(f"Backfill complete: {stats}")
return JSONResponse(content=jsonable_encoder(stats))