From c134c0594737a1bbb97b4052165f97bc6fb41625 Mon Sep 17 00:00:00 2001 From: Edwin Eames Date: Sun, 8 Feb 2026 17:54:27 -0500 Subject: [PATCH] feat: rewrite K-factor engine with history tracking and outlier detection Replace simple exponential smoothing with a rolling-average K-factor system backed by a new auto_kfactor_history table. Budget fills are detected and excluded from calculations, outliers beyond 2-sigma are flagged, and confidence scores track data quality per customer. Adds backfill endpoint, auto-create for missing estimation records, and manual house_factor PUT endpoints for both auto and regular customers. Co-Authored-By: Claude Opus 4.6 --- app/models/auto.py | 30 +++- app/routers/delivery.py | 115 +++++++++++++- app/routers/fixstuff_auto.py | 116 +++++++++++++- app/routers/fixstuff_customer.py | 56 ++++++- app/script/fuel_estimator.py | 261 ++++++++++++++++++++----------- 5 files changed, 473 insertions(+), 105 deletions(-) diff --git a/app/models/auto.py b/app/models/auto.py index 3b540f1..c3c6749 100644 --- a/app/models/auto.py +++ b/app/models/auto.py @@ -1,6 +1,6 @@ from sqlalchemy import (Column, Integer, - DECIMAL, TEXT, - VARCHAR, DATE, INTEGER) + DECIMAL, TEXT, Boolean, + VARCHAR, DATE, INTEGER, Index) from datetime import datetime from database import Base @@ -57,11 +57,12 @@ class Auto_Delivery(Base): estimated_gallons_left_prev_day = Column(DECIMAL(6, 2)) tank_height = Column(VARCHAR(25)) tank_size = Column(VARCHAR(25)) - house_factor = Column(DECIMAL(5, 2)) + house_factor = Column(DECIMAL(7, 4)) auto_status = Column(INTEGER()) open_ticket_id = Column(Integer, nullable=True) hot_water_summer = Column(INTEGER()) - + confidence_score = Column(INTEGER(), default=20) + k_factor_source = Column(VARCHAR(20), default='default') @@ -92,3 +93,24 @@ class Tickets_Auto_Delivery(Base): payment_type = Column(Integer, nullable=True) payment_card_id = Column(Integer, nullable=True) payment_status = Column(Integer, nullable=True) + is_budget_fill = Column(Boolean, default=False) + + +class KFactorHistory(Base): + __tablename__ = 'auto_kfactor_history' + + id = Column(Integer, primary_key=True, autoincrement=True) + customer_id = Column(INTEGER(), nullable=False, index=True) + ticket_id = Column(Integer, nullable=True) + fill_date = Column(DATE()) + gallons_delivered = Column(DECIMAL(6, 2)) + total_hdd = Column(DECIMAL(8, 2)) + days_in_period = Column(Integer) + k_factor = Column(DECIMAL(7, 4)) + is_budget_fill = Column(Boolean, default=False) + is_outlier = Column(Boolean, default=False) + created_at = Column(DATE()) + + __table_args__ = ( + Index('ix_auto_kfactor_history_customer_fill', 'customer_id', fill_date.desc()), + ) diff --git a/app/routers/delivery.py b/app/routers/delivery.py index 8a909b6..781b55c 100644 --- a/app/routers/delivery.py +++ b/app/routers/delivery.py @@ -2,15 +2,26 @@ import logging from fastapi import APIRouter, Depends from fastapi.responses import JSONResponse from fastapi.encoders import jsonable_encoder +from pydantic import BaseModel from database import session +from datetime import date, timedelta +from decimal import Decimal +from sqlalchemy import func -from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery +from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp +from app.models.customer import Customer_Customer from app.models.delivery import Delivery from app.models.auth import Auth_User from app.auth import get_current_user logger = logging.getLogger(__name__) +HOT_WATER_DAILY_USAGE = Decimal('1.0') +HDD_FORECAST_DAYS = 7 + + +class HouseFactorUpdate(BaseModel): + house_factor: float router = APIRouter( @@ -20,6 +31,36 @@ router = APIRouter( ) +def _get_avg_hdd(days: int = HDD_FORECAST_DAYS) -> Decimal: + """Get average HDD over the last N days as a forecast proxy.""" + cutoff = date.today() - timedelta(days=days) + rows = session.query(Auto_Temp.temp_avg).filter( + Auto_Temp.todays_date > cutoff + ).all() + if not rows: + return Decimal('0') + total = sum(max(0, 65 - float(r.temp_avg)) for r in rows) + return Decimal(str(round(total / len(rows), 2))) + + +def _enrich_auto(auto_obj, avg_hdd: Decimal) -> dict: + """Add computed gallons_per_day and days_remaining to a serialized auto delivery.""" + data = jsonable_encoder(auto_obj) + k = Decimal(str(auto_obj.house_factor)) if auto_obj.house_factor else Decimal('0') + hot_water = HOT_WATER_DAILY_USAGE if auto_obj.hot_water_summer == 1 else Decimal('0') + daily_burn = k * avg_hdd + hot_water + + data['gallons_per_day'] = float(round(daily_burn, 2)) + data['avg_hdd'] = float(avg_hdd) + data['hot_water_summer'] = auto_obj.hot_water_summer + if daily_burn > 0 and auto_obj.estimated_gallons_left is not None: + days_left = int(auto_obj.estimated_gallons_left / daily_burn) + data['days_remaining'] = min(days_left, 999) + else: + data['days_remaining'] = 999 + + return data + @router.get("/all/customers", status_code=200) def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)): @@ -31,7 +72,10 @@ def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)): .all() ) - return JSONResponse(content=jsonable_encoder(automatics), status_code=200) + avg_hdd = _get_avg_hdd() + enriched = [_enrich_auto(a, avg_hdd) for a in automatics] + + return JSONResponse(content=enriched, status_code=200) @router.get("/driver/{driver_employee_id}", status_code=200) @@ -112,7 +156,7 @@ def get_autos_customers_extended(customer_id, current_user: Auth_User = Depends( @router.get("/auto/customer/{customer_id}", status_code=200) -def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends(get_current_user)): +def get_auto_delivery_by_customer(customer_id: int, current_user: Auth_User = Depends(get_current_user)): logger.info(f"GET /delivery/auto/customer/{customer_id} - User: {current_user.username}") get_auto_delivery = ( session.query(Auto_Delivery) @@ -120,7 +164,47 @@ def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends .first() ) - return JSONResponse(content=jsonable_encoder(get_auto_delivery), status_code=200) + if not get_auto_delivery: + # Auto-create record from customer data + customer = session.query(Customer_Customer).filter( + Customer_Customer.id == customer_id + ).first() + if not customer: + return JSONResponse(content={"error": "Customer not found"}, status_code=404) + + # Use division average K-factor as default + div_avg = session.query(func.avg(Auto_Delivery.house_factor)).filter( + Auto_Delivery.house_factor.isnot(None), + Auto_Delivery.house_factor > 0 + ).scalar() + default_k = float(div_avg) if div_avg else 0.12 + + get_auto_delivery = Auto_Delivery( + customer_id=customer.id, + account_number=customer.account_number, + customer_town=customer.customer_town, + customer_state=customer.customer_state, + customer_address=customer.customer_address, + customer_zip=customer.customer_zip, + customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(), + estimated_gallons_left=Decimal('100'), + estimated_gallons_left_prev_day=Decimal('100'), + tank_size='275', + house_factor=Decimal(str(round(default_k, 4))), + auto_status=1, + hot_water_summer=0, + confidence_score=20, + k_factor_source='default' + ) + session.add(get_auto_delivery) + session.commit() + session.refresh(get_auto_delivery) + logger.info(f"Auto-created Auto_Delivery record for customer {customer_id}") + + avg_hdd = _get_avg_hdd() + enriched = _enrich_auto(get_auto_delivery, avg_hdd) + + return JSONResponse(content=enriched, status_code=200) @router.put("/update_status/{auto_id}", status_code=200) @@ -137,3 +221,26 @@ def update_auto_status(auto_id: int, current_user: Auth_User = Depends(get_curre session.commit() return {"message": "Auto status updated to 3"} return {"error": "Auto delivery not found"} + + +@router.put("/auto/customer/{customer_id}/house_factor", status_code=200) +def update_house_factor(customer_id: int, body: HouseFactorUpdate, current_user: Auth_User = Depends(get_current_user)): + logger.info(f"PUT /delivery/auto/customer/{customer_id}/house_factor - User: {current_user.username}") + auto_delivery = ( + session.query(Auto_Delivery) + .filter(Auto_Delivery.customer_id == customer_id) + .first() + ) + + if not auto_delivery: + return JSONResponse(content={"error": "Auto delivery record not found"}, status_code=404) + + auto_delivery.house_factor = Decimal(str(round(body.house_factor, 4))) + auto_delivery.k_factor_source = 'manual' + session.commit() + session.refresh(auto_delivery) + + avg_hdd = _get_avg_hdd() + enriched = _enrich_auto(auto_delivery, avg_hdd) + + return JSONResponse(content=enriched, status_code=200) diff --git a/app/routers/fixstuff_auto.py b/app/routers/fixstuff_auto.py index b8277f8..1908cb2 100644 --- a/app/routers/fixstuff_auto.py +++ b/app/routers/fixstuff_auto.py @@ -7,9 +7,10 @@ from sqlalchemy import func from datetime import date from decimal import Decimal -from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp +from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp, KFactorHistory from app.models.delivery import Delivery from app.constants import DEFAULT_TANK_SIZE_GALLONS +from app.script.fuel_estimator import FuelEstimator logger = logging.getLogger(__name__) @@ -61,7 +62,7 @@ def fix_customer_last_delivered(): "new_date": str(latest_ticket.fill_date) }) session.add(ad) - + session.commit() result = { "total_customers": total_customers, @@ -213,3 +214,114 @@ def estimate_customer_gallons(update_db: int): session.commit() return JSONResponse(content=jsonable_encoder(estimates)) + + +@router.get("/backfill_kfactor_history", status_code=200) +def backfill_kfactor_history(): + """ + Backfill the auto_kfactor_history table from existing ticket data. + For each auto customer with 2+ tickets, calculates K-factor for each + consecutive ticket pair and inserts into history. Then runs the rolling + K-factor calculation to set the customer's house_factor, confidence, and source. + """ + logger.info("GET /fixstuff/backfill_kfactor_history - Starting K-factor history backfill") + + estimator = FuelEstimator(session=session) + auto_deliveries = session.query(Auto_Delivery).all() + + stats = { + "customers_processed": 0, + "customers_skipped": 0, + "history_entries_created": 0, + "customers_updated": 0, + } + + for ad in auto_deliveries: + tickets = session.query(Tickets_Auto_Delivery).filter( + Tickets_Auto_Delivery.customer_id == ad.customer_id, + Tickets_Auto_Delivery.fill_date.isnot(None) + ).order_by(Tickets_Auto_Delivery.fill_date).all() + + if len(tickets) < 2: + stats["customers_skipped"] += 1 + # Set division avg for customers with <2 tickets + if ad.confidence_score is None: + ad.confidence_score = 20 + if ad.k_factor_source is None: + ad.k_factor_source = 'default' + continue + + stats["customers_processed"] += 1 + + # Check if this customer already has history entries + existing = session.query(KFactorHistory).filter( + KFactorHistory.customer_id == ad.customer_id + ).count() + if existing > 0: + continue + + for i in range(len(tickets) - 1): + prev_ticket = tickets[i] + next_ticket = tickets[i + 1] + + start_date = prev_ticket.fill_date + end_date = next_ticket.fill_date + num_days = (end_date - start_date).days + + if num_days <= 0: + continue + + # Calculate HDD for the interval + interval_temps = session.query(Auto_Temp).filter( + Auto_Temp.todays_date > start_date, + Auto_Temp.todays_date <= end_date + ).all() + total_hdd = Decimal(sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps)) + + if total_hdd == 0: + continue + + # Hot water adjustment + total_hot_water = Decimal('0.0') + if ad.hot_water_summer == 1: + total_hot_water = Decimal(num_days) * HOT_WATER_DAILY_USAGE + + gallons_for_heating = next_ticket.gallons_delivered - total_hot_water + + k_factor_obs = None + if gallons_for_heating > 0 and total_hdd > 0: + k_factor_obs = gallons_for_heating / total_hdd + + is_budget = estimator._is_budget_fill(next_ticket.gallons_delivered) + + # Flag the ticket too + next_ticket.is_budget_fill = is_budget + + history_entry = KFactorHistory( + customer_id=ad.customer_id, + ticket_id=next_ticket.id, + fill_date=next_ticket.fill_date, + gallons_delivered=next_ticket.gallons_delivered, + total_hdd=total_hdd, + days_in_period=num_days, + k_factor=k_factor_obs, + is_budget_fill=is_budget, + is_outlier=False, + created_at=date.today() + ) + session.add(history_entry) + stats["history_entries_created"] += 1 + + # Flush so rolling calc can see the new entries + session.flush() + + # Run rolling K-factor calculation to set customer values + new_k, confidence, source = estimator._calculate_rolling_k_factor(ad.customer_id) + ad.house_factor = new_k + ad.confidence_score = confidence + ad.k_factor_source = source + stats["customers_updated"] += 1 + + session.commit() + logger.info(f"Backfill complete: {stats}") + return JSONResponse(content=jsonable_encoder(stats)) diff --git a/app/routers/fixstuff_customer.py b/app/routers/fixstuff_customer.py index 8302e4f..a9315b8 100644 --- a/app/routers/fixstuff_customer.py +++ b/app/routers/fixstuff_customer.py @@ -2,6 +2,7 @@ import logging from fastapi import APIRouter from fastapi.responses import JSONResponse from fastapi.encoders import jsonable_encoder +from pydantic import BaseModel from database import session from sqlalchemy import func from datetime import date @@ -27,6 +28,9 @@ TANK_MAX_FILLS = { } +class HouseFactorUpdate(BaseModel): + house_factor: float + router = APIRouter( prefix="/fixstuff_customer", @@ -256,10 +260,32 @@ def estimate_customer_gallons_specific(customer_id: int): ).first() if not customer_estimate: - return JSONResponse(content={ - "error": f"No fuel estimation data found for customer {customer_id}", - "solution": "Run the populate_estimates endpoint first to initialize customer data." - }) + # Auto-create record from customer data + customer = session.query(Customer_Customer).filter( + Customer_Customer.id == customer_id + ).first() + if not customer: + return JSONResponse(content={"error": f"Customer {customer_id} not found"}, status_code=404) + + customer_estimate = Customer_estimate_gallons( + customer_id=customer.id, + account_number=customer.account_number, + customer_town=customer.customer_town, + customer_state=customer.customer_state, + customer_address=customer.customer_address, + customer_zip=customer.customer_zip, + customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(), + estimated_gallons_left=Decimal('100'), + estimated_gallons_left_prev_day=Decimal('100'), + tank_size='275', + house_factor=Decimal('0.12'), + auto_status=1, + hot_water_summer=0 + ) + session.add(customer_estimate) + session.commit() + session.refresh(customer_estimate) + logger.info(f"Auto-created Customer_estimate_gallons record for customer {customer_id}") deliveries = session.query(Delivery).filter( Delivery.customer_id == customer_estimate.customer_id, @@ -448,3 +474,25 @@ def populate_customer_estimates(): } return JSONResponse(content=jsonable_encoder(result)) + + +@router.put("/house_factor/{customer_id}", status_code=200) +def update_customer_house_factor(customer_id: int, body: HouseFactorUpdate): + logger.info(f"PUT /fixstuff_customer/house_factor/{customer_id}") + customer_estimate = session.query(Customer_estimate_gallons).filter( + Customer_estimate_gallons.customer_id == customer_id + ).first() + + if not customer_estimate: + return JSONResponse(content={"error": "Customer estimate record not found"}, status_code=404) + + customer_estimate.house_factor = Decimal(str(round(body.house_factor, 4))) + session.commit() + session.refresh(customer_estimate) + + return JSONResponse(content=jsonable_encoder({ + "id": customer_estimate.id, + "customer_id": customer_estimate.customer_id, + "house_factor": float(customer_estimate.house_factor), + "message": "House factor updated" + }), status_code=200) diff --git a/app/script/fuel_estimator.py b/app/script/fuel_estimator.py index 90dea37..90ef1dc 100644 --- a/app/script/fuel_estimator.py +++ b/app/script/fuel_estimator.py @@ -3,22 +3,15 @@ from sqlalchemy.orm import Session from sqlalchemy import func from datetime import date, timedelta from decimal import Decimal +import statistics logger = logging.getLogger(__name__) -# Import your existing database models -from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery +from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery, KFactorHistory -# --- Constants for the Model --- -# This is a baseline daily usage for homes that use oil for hot water. -# A typical value is 0.5 to 1.0 gallons per day. Adjust as needed. +# --- Constants --- HOT_WATER_DAILY_USAGE = Decimal('1.0') -# This determines how quickly the K-Factor adjusts. -# 0.7 means 70% weight is given to the historical factor and 30% to the new one. -# This prevents wild swings from a single unusual delivery period. -K_FACTOR_SMOOTHING_WEIGHT = Decimal('0.7') - TANK_MAX_FILLS = { 275: 240, 330: 280, @@ -26,7 +19,23 @@ TANK_MAX_FILLS = { 550: 500 } -PARTIAL_DELIVERIES = [100, 125, 150, 200] +# K-Factor rolling average settings +K_FACTOR_ROLLING_WINDOW = 5 +K_FACTOR_HISTORY_SIZE = 10 +OUTLIER_SIGMA_THRESHOLD = 2.0 + +# Budget fill detection +BUDGET_FILL_AMOUNTS = {100, 125, 150, 200} +BUDGET_FILL_TOLERANCE = 0.5 + +# Confidence scoring +CONFIDENCE_PER_DELIVERY = 8 +CONFIDENCE_MAX = 100 +CONFIDENCE_MIN = 20 +CONFIDENCE_VARIANCE_PENALTY = 10 + +# Default K-factor when no data available +DEFAULT_K_FACTOR = Decimal('0.12') class FuelEstimator: @@ -34,64 +43,117 @@ class FuelEstimator: self.session = session def _get_weather_for_date(self, target_date: date) -> Auto_Temp | None: - """Helper to fetch weather data for a specific date.""" return self.session.query(Auto_Temp).filter(Auto_Temp.todays_date == target_date).first() - def _estimate_initial_house_factor(self, customer: Auto_Delivery) -> Decimal: - """ - Generic function to estimate initial house factor for customers with only one delivery. - This can be improved with more sophisticated logic (e.g., averaging similar customers). - """ - # Default generic house factor: 0.12 gallons per degree day (average based on existing customer data) - # This represents typical heating usage and can be adjusted based on future data analysis - return Decimal('0.12') + @staticmethod + def _is_budget_fill(gallons) -> bool: + """Returns True if gallons is within +/-0.5 of a budget fill amount.""" + gal = float(gallons) + for amount in BUDGET_FILL_AMOUNTS: + if abs(gal - amount) <= BUDGET_FILL_TOLERANCE: + return True + return False - def _verify_house_factor_correctness(self, customer: Auto_Delivery) -> bool: - """ - Verify and correct house_factor based on delivery history. - Returns True if correction was made. - """ - # Count deliveries for this customer - delivery_count = self.session.query(func.count(Tickets_Auto_Delivery.id)).filter( - Tickets_Auto_Delivery.customer_id == customer.customer_id + def _get_division_average_k(self) -> Decimal: + """Average K from all valid (non-budget, non-outlier) history entries. + Fallback for new customers instead of hardcoded 0.12.""" + result = self.session.query(func.avg(KFactorHistory.k_factor)).filter( + KFactorHistory.is_budget_fill == False, + KFactorHistory.is_outlier == False, + KFactorHistory.k_factor.isnot(None), + KFactorHistory.k_factor > 0 ).scalar() + if result and result > 0: + return Decimal(str(round(float(result), 4))) + return DEFAULT_K_FACTOR - corrected = False + def _calculate_rolling_k_factor(self, customer_id: int): + """Returns (k_factor, confidence_score, source). - if delivery_count <= 1: - # Customers with 0 or 1 delivery should have house_factor = 0.12 (initial average) - if customer.house_factor != Decimal('0.12'): - logger.info(f"Correcting house_factor for customer {customer.customer_id} from {customer.house_factor} to 0.12 (1 or fewer deliveries)") - customer.house_factor = Decimal('0.12') - corrected = True - # For customers with 2+ deliveries, keep their calculated factor (no correction needed) + 1. Query last 10 non-budget history entries + 2. Take last 5 for rolling window + 3. Calculate mean + std dev + 4. Exclude entries >2 sigma from mean, mark as outliers + 5. Recalculate mean from filtered set + 6. Confidence = min(100, qualifying_deliveries * 8) - penalty for high variance + """ + # Get last HISTORY_SIZE non-budget entries ordered most recent first + history = self.session.query(KFactorHistory).filter( + KFactorHistory.customer_id == customer_id, + KFactorHistory.is_budget_fill == False, + KFactorHistory.k_factor.isnot(None), + KFactorHistory.k_factor > 0 + ).order_by(KFactorHistory.fill_date.desc()).limit(K_FACTOR_HISTORY_SIZE).all() - return corrected + if not history: + div_avg = self._get_division_average_k() + return (div_avg, CONFIDENCE_MIN, 'division_avg') + + # Take last ROLLING_WINDOW for calculation + window = history[:K_FACTOR_ROLLING_WINDOW] + k_values = [float(h.k_factor) for h in window] + + if len(k_values) < 2: + k = Decimal(str(round(k_values[0], 4))) + confidence = min(CONFIDENCE_MAX, CONFIDENCE_PER_DELIVERY) + return (k, max(CONFIDENCE_MIN, confidence), 'calculated') + + mean_k = statistics.mean(k_values) + stdev_k = statistics.stdev(k_values) + + # Mark outliers (>2 sigma from mean) + filtered = [] + for h in window: + kf = float(h.k_factor) + if stdev_k > 0 and abs(kf - mean_k) > OUTLIER_SIGMA_THRESHOLD * stdev_k: + if not h.is_outlier: + h.is_outlier = True + else: + filtered.append(kf) + if h.is_outlier: + h.is_outlier = False + + if not filtered: + # All were outliers - use full set + filtered = k_values + + final_k = Decimal(str(round(statistics.mean(filtered), 4))) + + # Confidence scoring + qualifying = len([h for h in history if not h.is_budget_fill and not h.is_outlier]) + confidence = min(CONFIDENCE_MAX, qualifying * CONFIDENCE_PER_DELIVERY) + + # Penalty for high variance (coefficient of variation) + if len(filtered) >= 2: + cv = statistics.stdev(filtered) / statistics.mean(filtered) if statistics.mean(filtered) > 0 else 0 + if cv > 0.3: + confidence -= CONFIDENCE_VARIANCE_PENALTY * 2 + elif cv > 0.15: + confidence -= CONFIDENCE_VARIANCE_PENALTY + + confidence = max(CONFIDENCE_MIN, confidence) + return (final_k, confidence, 'calculated') def run_daily_update(self): """ - Main function to run once per day. It updates the estimated fuel level - for all active automatic delivery customers. The calling function must commit the session. + Main function to run once per day. Updates estimated fuel level + for all active automatic delivery customers. """ today = date.today() - # 1. Check if the update has already run today if self.session.query(Auto_Update).filter(Auto_Update.last_updated == today).first(): logger.info(f"Daily update for {today} has already been completed.") return {"ok": True, "message": "Update already run today."} - # 2. Get today's weather data (specifically the Heating Degree Days) todays_weather = self._get_weather_for_date(today) if not todays_weather: logger.info(f"Error: Weather data for {today} not found. Cannot run update.") return {"ok": False, "message": f"Weather data for {today} not found."} - # Degree days can't be negative for this calculation. If it's warm, HDD = 0. degree_day = Decimal(max(0, 65 - float(todays_weather.temp_avg))) - # 3. Get all active automatic customers auto_customers = self.session.query(Auto_Delivery).filter( - Auto_Delivery.auto_status == 1 # Assuming 1 means active + Auto_Delivery.auto_status == 1 ).all() if not auto_customers: @@ -100,14 +162,7 @@ class FuelEstimator: logger.info(f"Staging daily fuel update for {len(auto_customers)} customers...") - corrections_made = 0 - - # 4. Loop through each customer and update their fuel level for customer in auto_customers: - # Verify and correct house_factor if needed - if self._verify_house_factor_correctness(customer): - corrections_made += 1 - heating_usage = customer.house_factor * degree_day hot_water_usage = Decimal('0.0') @@ -123,21 +178,16 @@ class FuelEstimator: if customer.days_since_last_fill is not None: customer.days_since_last_fill += 1 - # 5. Log that today's update is complete new_update_log = Auto_Update(last_updated=today) self.session.add(new_update_log) logger.info("Daily update staged. Awaiting commit.") - message = f"Successfully staged updates for {len(auto_customers)} customers." - if corrections_made > 0: - message += f" Corrected house factors for {corrections_made} customers." - - return {"ok": True, "message": message} + return {"ok": True, "message": f"Successfully staged updates for {len(auto_customers)} customers."} def refine_factor_after_delivery(self, ticket: Tickets_Auto_Delivery): """ - This is the self-correction logic. It recalculates and refines the customer's - K-Factor (house_factor) after a delivery. The calling function must commit the session. + Recalculates and refines the customer's K-Factor after a delivery. + Uses K-factor history with rolling averages and outlier detection. """ customer = self.session.query(Auto_Delivery).filter( Auto_Delivery.customer_id == ticket.customer_id @@ -147,74 +197,103 @@ class FuelEstimator: logger.info(f"Customer {ticket.customer_id} not found.") return + # 1. Detect and flag budget fill + is_budget = self._is_budget_fill(ticket.gallons_delivered) + ticket.is_budget_fill = is_budget + if is_budget: + logger.info(f"Budget fill detected for customer {ticket.customer_id}: {ticket.gallons_delivered} gal") + + # 2. First delivery - no previous fill to compare against if not customer.last_fill: - logger.info(f"Setting initial K-Factor for new customer {ticket.customer_id} with only one delivery.") - customer.house_factor = self._estimate_initial_house_factor(customer) - self._update_tank_after_fill(customer, ticket) + logger.info(f"First delivery for customer {ticket.customer_id}. Setting division average K-factor.") + div_avg = self._get_division_average_k() + customer.house_factor = div_avg + customer.confidence_score = CONFIDENCE_MIN + customer.k_factor_source = 'division_avg' + self._update_tank_after_fill(customer, ticket, is_budget) return start_date = customer.last_fill end_date = ticket.fill_date - + if start_date >= end_date: - logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: New fill date is not after the last one. Resetting tank only.") - self._update_tank_after_fill(customer, ticket) + logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: fill date not after last fill. Resetting tank only.") + self._update_tank_after_fill(customer, ticket, is_budget) return + # 3. Calculate HDD for interval interval_temps = self.session.query(Auto_Temp).filter( Auto_Temp.todays_date > start_date, Auto_Temp.todays_date <= end_date ).all() total_degree_days = sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps) total_hdd = Decimal(total_degree_days) - + + # Hot water adjustment + num_days = (end_date - start_date).days total_hot_water_usage = Decimal('0.0') if customer.hot_water_summer == 1: - num_days = (end_date - start_date).days total_hot_water_usage = Decimal(num_days) * HOT_WATER_DAILY_USAGE gallons_for_heating = ticket.gallons_delivered - total_hot_water_usage - if gallons_for_heating <= 0 or total_hdd == 0: - logger.info(f"Cannot calculate new K-Factor for customer {ticket.customer_id}. (HDD: {total_hdd}, Heating Gallons: {gallons_for_heating}). Resetting tank only.") - self._update_tank_after_fill(customer, ticket) - return - - new_k_factor = gallons_for_heating / total_hdd - - current_k_factor = customer.house_factor - smoothed_k_factor = (current_k_factor * K_FACTOR_SMOOTHING_WEIGHT) + (new_k_factor * (Decimal('1.0') - K_FACTOR_SMOOTHING_WEIGHT)) - logger.info(f"Refining K-Factor for Customer ID {customer.customer_id}:") - logger.info(f" - Old K-Factor: {current_k_factor:.4f}, New Smoothed K-Factor: {smoothed_k_factor:.4f}") + # Calculate K-factor for this observation + k_factor_obs = None + if gallons_for_heating > 0 and total_hdd > 0: + k_factor_obs = gallons_for_heating / total_hdd + + # 4. Store K-factor observation in history (even budget fills, flagged) + history_entry = KFactorHistory( + customer_id=ticket.customer_id, + ticket_id=ticket.id, + fill_date=ticket.fill_date, + gallons_delivered=ticket.gallons_delivered, + total_hdd=total_hdd, + days_in_period=num_days, + k_factor=k_factor_obs, + is_budget_fill=is_budget, + is_outlier=False, + created_at=date.today() + ) + self.session.add(history_entry) + + # 5. Run rolling K-factor calculation + # Flush so the new entry is visible to the query + self.session.flush() + new_k, confidence, source = self._calculate_rolling_k_factor(ticket.customer_id) + + logger.info(f"Refining K-Factor for Customer {ticket.customer_id}:") + logger.info(f" Old K: {customer.house_factor:.4f}, New K: {new_k:.4f}, Confidence: {confidence}, Source: {source}") + + # 6. Update customer + customer.house_factor = new_k + customer.confidence_score = confidence + customer.k_factor_source = source + + # 7. Update tank after fill + self._update_tank_after_fill(customer, ticket, is_budget) - customer.house_factor = smoothed_k_factor - self._update_tank_after_fill(customer, ticket) - logger.info(f"K-Factor and tank status for Customer {customer.customer_id} staged for update.") - def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery): - """Helper to update customer tank status after a fill-up or partial delivery.""" + def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery, is_budget: bool = False): + """Update customer tank status after a fill-up.""" customer.last_fill = ticket.fill_date customer.days_since_last_fill = 0 - # Determine max fill capacity if customer.tank_size and Decimal(customer.tank_size) > 0: tank_size = float(Decimal(customer.tank_size)) max_fill = TANK_MAX_FILLS.get(tank_size, tank_size) else: - # Default to legal max for common tank size (275 gallons = 240) max_fill = 240.0 - # Check if this is a partial delivery - if float(ticket.gallons_delivered) in PARTIAL_DELIVERIES: - # Partial delivery: add to current level, cap at max_fill + if is_budget: + # Budget fill: ADD gallons to current level, cap at max_fill customer.estimated_gallons_left += ticket.gallons_delivered customer.estimated_gallons_left = min(customer.estimated_gallons_left, Decimal(str(max_fill))) else: - # Full delivery: set to max_fill + # Full delivery: RESET to max_fill customer.estimated_gallons_left = Decimal(str(max_fill)) - # The previous day's value should match the new value on a fill day. customer.estimated_gallons_left_prev_day = customer.estimated_gallons_left customer.last_updated = date.today() - customer.auto_status = 1 # Reactivate the customer + customer.auto_status = 1