feat: rewrite K-factor engine with history tracking and outlier detection

Replace simple exponential smoothing with a rolling-average K-factor
system backed by a new auto_kfactor_history table. Budget fills are
detected and excluded from calculations, outliers beyond 2-sigma are
flagged, and confidence scores track data quality per customer.
Adds backfill endpoint, auto-create for missing estimation records,
and manual house_factor PUT endpoints for both auto and regular customers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:54:27 -05:00
parent 764c094eed
commit c134c05947
5 changed files with 473 additions and 105 deletions

View File

@@ -1,6 +1,6 @@
from sqlalchemy import (Column, Integer,
DECIMAL, TEXT,
VARCHAR, DATE, INTEGER)
DECIMAL, TEXT, Boolean,
VARCHAR, DATE, INTEGER, Index)
from datetime import datetime
from database import Base
@@ -57,11 +57,12 @@ class Auto_Delivery(Base):
estimated_gallons_left_prev_day = Column(DECIMAL(6, 2))
tank_height = Column(VARCHAR(25))
tank_size = Column(VARCHAR(25))
house_factor = Column(DECIMAL(5, 2))
house_factor = Column(DECIMAL(7, 4))
auto_status = Column(INTEGER())
open_ticket_id = Column(Integer, nullable=True)
hot_water_summer = Column(INTEGER())
confidence_score = Column(INTEGER(), default=20)
k_factor_source = Column(VARCHAR(20), default='default')
@@ -92,3 +93,24 @@ class Tickets_Auto_Delivery(Base):
payment_type = Column(Integer, nullable=True)
payment_card_id = Column(Integer, nullable=True)
payment_status = Column(Integer, nullable=True)
is_budget_fill = Column(Boolean, default=False)
class KFactorHistory(Base):
__tablename__ = 'auto_kfactor_history'
id = Column(Integer, primary_key=True, autoincrement=True)
customer_id = Column(INTEGER(), nullable=False, index=True)
ticket_id = Column(Integer, nullable=True)
fill_date = Column(DATE())
gallons_delivered = Column(DECIMAL(6, 2))
total_hdd = Column(DECIMAL(8, 2))
days_in_period = Column(Integer)
k_factor = Column(DECIMAL(7, 4))
is_budget_fill = Column(Boolean, default=False)
is_outlier = Column(Boolean, default=False)
created_at = Column(DATE())
__table_args__ = (
Index('ix_auto_kfactor_history_customer_fill', 'customer_id', fill_date.desc()),
)

View File

@@ -2,15 +2,26 @@ import logging
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from database import session
from datetime import date, timedelta
from decimal import Decimal
from sqlalchemy import func
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
from app.models.customer import Customer_Customer
from app.models.delivery import Delivery
from app.models.auth import Auth_User
from app.auth import get_current_user
logger = logging.getLogger(__name__)
HOT_WATER_DAILY_USAGE = Decimal('1.0')
HDD_FORECAST_DAYS = 7
class HouseFactorUpdate(BaseModel):
house_factor: float
router = APIRouter(
@@ -20,6 +31,36 @@ router = APIRouter(
)
def _get_avg_hdd(days: int = HDD_FORECAST_DAYS) -> Decimal:
"""Get average HDD over the last N days as a forecast proxy."""
cutoff = date.today() - timedelta(days=days)
rows = session.query(Auto_Temp.temp_avg).filter(
Auto_Temp.todays_date > cutoff
).all()
if not rows:
return Decimal('0')
total = sum(max(0, 65 - float(r.temp_avg)) for r in rows)
return Decimal(str(round(total / len(rows), 2)))
def _enrich_auto(auto_obj, avg_hdd: Decimal) -> dict:
"""Add computed gallons_per_day and days_remaining to a serialized auto delivery."""
data = jsonable_encoder(auto_obj)
k = Decimal(str(auto_obj.house_factor)) if auto_obj.house_factor else Decimal('0')
hot_water = HOT_WATER_DAILY_USAGE if auto_obj.hot_water_summer == 1 else Decimal('0')
daily_burn = k * avg_hdd + hot_water
data['gallons_per_day'] = float(round(daily_burn, 2))
data['avg_hdd'] = float(avg_hdd)
data['hot_water_summer'] = auto_obj.hot_water_summer
if daily_burn > 0 and auto_obj.estimated_gallons_left is not None:
days_left = int(auto_obj.estimated_gallons_left / daily_burn)
data['days_remaining'] = min(days_left, 999)
else:
data['days_remaining'] = 999
return data
@router.get("/all/customers", status_code=200)
def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
@@ -31,7 +72,10 @@ def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
.all()
)
return JSONResponse(content=jsonable_encoder(automatics), status_code=200)
avg_hdd = _get_avg_hdd()
enriched = [_enrich_auto(a, avg_hdd) for a in automatics]
return JSONResponse(content=enriched, status_code=200)
@router.get("/driver/{driver_employee_id}", status_code=200)
@@ -112,7 +156,7 @@ def get_autos_customers_extended(customer_id, current_user: Auth_User = Depends(
@router.get("/auto/customer/{customer_id}", status_code=200)
def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends(get_current_user)):
def get_auto_delivery_by_customer(customer_id: int, current_user: Auth_User = Depends(get_current_user)):
logger.info(f"GET /delivery/auto/customer/{customer_id} - User: {current_user.username}")
get_auto_delivery = (
session.query(Auto_Delivery)
@@ -120,7 +164,47 @@ def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends
.first()
)
return JSONResponse(content=jsonable_encoder(get_auto_delivery), status_code=200)
if not get_auto_delivery:
# Auto-create record from customer data
customer = session.query(Customer_Customer).filter(
Customer_Customer.id == customer_id
).first()
if not customer:
return JSONResponse(content={"error": "Customer not found"}, status_code=404)
# Use division average K-factor as default
div_avg = session.query(func.avg(Auto_Delivery.house_factor)).filter(
Auto_Delivery.house_factor.isnot(None),
Auto_Delivery.house_factor > 0
).scalar()
default_k = float(div_avg) if div_avg else 0.12
get_auto_delivery = Auto_Delivery(
customer_id=customer.id,
account_number=customer.account_number,
customer_town=customer.customer_town,
customer_state=customer.customer_state,
customer_address=customer.customer_address,
customer_zip=customer.customer_zip,
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
estimated_gallons_left=Decimal('100'),
estimated_gallons_left_prev_day=Decimal('100'),
tank_size='275',
house_factor=Decimal(str(round(default_k, 4))),
auto_status=1,
hot_water_summer=0,
confidence_score=20,
k_factor_source='default'
)
session.add(get_auto_delivery)
session.commit()
session.refresh(get_auto_delivery)
logger.info(f"Auto-created Auto_Delivery record for customer {customer_id}")
avg_hdd = _get_avg_hdd()
enriched = _enrich_auto(get_auto_delivery, avg_hdd)
return JSONResponse(content=enriched, status_code=200)
@router.put("/update_status/{auto_id}", status_code=200)
@@ -137,3 +221,26 @@ def update_auto_status(auto_id: int, current_user: Auth_User = Depends(get_curre
session.commit()
return {"message": "Auto status updated to 3"}
return {"error": "Auto delivery not found"}
@router.put("/auto/customer/{customer_id}/house_factor", status_code=200)
def update_house_factor(customer_id: int, body: HouseFactorUpdate, current_user: Auth_User = Depends(get_current_user)):
logger.info(f"PUT /delivery/auto/customer/{customer_id}/house_factor - User: {current_user.username}")
auto_delivery = (
session.query(Auto_Delivery)
.filter(Auto_Delivery.customer_id == customer_id)
.first()
)
if not auto_delivery:
return JSONResponse(content={"error": "Auto delivery record not found"}, status_code=404)
auto_delivery.house_factor = Decimal(str(round(body.house_factor, 4)))
auto_delivery.k_factor_source = 'manual'
session.commit()
session.refresh(auto_delivery)
avg_hdd = _get_avg_hdd()
enriched = _enrich_auto(auto_delivery, avg_hdd)
return JSONResponse(content=enriched, status_code=200)

View File

@@ -7,9 +7,10 @@ from sqlalchemy import func
from datetime import date
from decimal import Decimal
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp, KFactorHistory
from app.models.delivery import Delivery
from app.constants import DEFAULT_TANK_SIZE_GALLONS
from app.script.fuel_estimator import FuelEstimator
logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ def fix_customer_last_delivered():
"new_date": str(latest_ticket.fill_date)
})
session.add(ad)
session.commit()
result = {
"total_customers": total_customers,
@@ -213,3 +214,114 @@ def estimate_customer_gallons(update_db: int):
session.commit()
return JSONResponse(content=jsonable_encoder(estimates))
@router.get("/backfill_kfactor_history", status_code=200)
def backfill_kfactor_history():
"""
Backfill the auto_kfactor_history table from existing ticket data.
For each auto customer with 2+ tickets, calculates K-factor for each
consecutive ticket pair and inserts into history. Then runs the rolling
K-factor calculation to set the customer's house_factor, confidence, and source.
"""
logger.info("GET /fixstuff/backfill_kfactor_history - Starting K-factor history backfill")
estimator = FuelEstimator(session=session)
auto_deliveries = session.query(Auto_Delivery).all()
stats = {
"customers_processed": 0,
"customers_skipped": 0,
"history_entries_created": 0,
"customers_updated": 0,
}
for ad in auto_deliveries:
tickets = session.query(Tickets_Auto_Delivery).filter(
Tickets_Auto_Delivery.customer_id == ad.customer_id,
Tickets_Auto_Delivery.fill_date.isnot(None)
).order_by(Tickets_Auto_Delivery.fill_date).all()
if len(tickets) < 2:
stats["customers_skipped"] += 1
# Set division avg for customers with <2 tickets
if ad.confidence_score is None:
ad.confidence_score = 20
if ad.k_factor_source is None:
ad.k_factor_source = 'default'
continue
stats["customers_processed"] += 1
# Check if this customer already has history entries
existing = session.query(KFactorHistory).filter(
KFactorHistory.customer_id == ad.customer_id
).count()
if existing > 0:
continue
for i in range(len(tickets) - 1):
prev_ticket = tickets[i]
next_ticket = tickets[i + 1]
start_date = prev_ticket.fill_date
end_date = next_ticket.fill_date
num_days = (end_date - start_date).days
if num_days <= 0:
continue
# Calculate HDD for the interval
interval_temps = session.query(Auto_Temp).filter(
Auto_Temp.todays_date > start_date,
Auto_Temp.todays_date <= end_date
).all()
total_hdd = Decimal(sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps))
if total_hdd == 0:
continue
# Hot water adjustment
total_hot_water = Decimal('0.0')
if ad.hot_water_summer == 1:
total_hot_water = Decimal(num_days) * HOT_WATER_DAILY_USAGE
gallons_for_heating = next_ticket.gallons_delivered - total_hot_water
k_factor_obs = None
if gallons_for_heating > 0 and total_hdd > 0:
k_factor_obs = gallons_for_heating / total_hdd
is_budget = estimator._is_budget_fill(next_ticket.gallons_delivered)
# Flag the ticket too
next_ticket.is_budget_fill = is_budget
history_entry = KFactorHistory(
customer_id=ad.customer_id,
ticket_id=next_ticket.id,
fill_date=next_ticket.fill_date,
gallons_delivered=next_ticket.gallons_delivered,
total_hdd=total_hdd,
days_in_period=num_days,
k_factor=k_factor_obs,
is_budget_fill=is_budget,
is_outlier=False,
created_at=date.today()
)
session.add(history_entry)
stats["history_entries_created"] += 1
# Flush so rolling calc can see the new entries
session.flush()
# Run rolling K-factor calculation to set customer values
new_k, confidence, source = estimator._calculate_rolling_k_factor(ad.customer_id)
ad.house_factor = new_k
ad.confidence_score = confidence
ad.k_factor_source = source
stats["customers_updated"] += 1
session.commit()
logger.info(f"Backfill complete: {stats}")
return JSONResponse(content=jsonable_encoder(stats))

View File

@@ -2,6 +2,7 @@ import logging
from fastapi import APIRouter
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from database import session
from sqlalchemy import func
from datetime import date
@@ -27,6 +28,9 @@ TANK_MAX_FILLS = {
}
class HouseFactorUpdate(BaseModel):
house_factor: float
router = APIRouter(
prefix="/fixstuff_customer",
@@ -256,10 +260,32 @@ def estimate_customer_gallons_specific(customer_id: int):
).first()
if not customer_estimate:
return JSONResponse(content={
"error": f"No fuel estimation data found for customer {customer_id}",
"solution": "Run the populate_estimates endpoint first to initialize customer data."
})
# Auto-create record from customer data
customer = session.query(Customer_Customer).filter(
Customer_Customer.id == customer_id
).first()
if not customer:
return JSONResponse(content={"error": f"Customer {customer_id} not found"}, status_code=404)
customer_estimate = Customer_estimate_gallons(
customer_id=customer.id,
account_number=customer.account_number,
customer_town=customer.customer_town,
customer_state=customer.customer_state,
customer_address=customer.customer_address,
customer_zip=customer.customer_zip,
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
estimated_gallons_left=Decimal('100'),
estimated_gallons_left_prev_day=Decimal('100'),
tank_size='275',
house_factor=Decimal('0.12'),
auto_status=1,
hot_water_summer=0
)
session.add(customer_estimate)
session.commit()
session.refresh(customer_estimate)
logger.info(f"Auto-created Customer_estimate_gallons record for customer {customer_id}")
deliveries = session.query(Delivery).filter(
Delivery.customer_id == customer_estimate.customer_id,
@@ -448,3 +474,25 @@ def populate_customer_estimates():
}
return JSONResponse(content=jsonable_encoder(result))
@router.put("/house_factor/{customer_id}", status_code=200)
def update_customer_house_factor(customer_id: int, body: HouseFactorUpdate):
logger.info(f"PUT /fixstuff_customer/house_factor/{customer_id}")
customer_estimate = session.query(Customer_estimate_gallons).filter(
Customer_estimate_gallons.customer_id == customer_id
).first()
if not customer_estimate:
return JSONResponse(content={"error": "Customer estimate record not found"}, status_code=404)
customer_estimate.house_factor = Decimal(str(round(body.house_factor, 4)))
session.commit()
session.refresh(customer_estimate)
return JSONResponse(content=jsonable_encoder({
"id": customer_estimate.id,
"customer_id": customer_estimate.customer_id,
"house_factor": float(customer_estimate.house_factor),
"message": "House factor updated"
}), status_code=200)

View File

@@ -3,22 +3,15 @@ from sqlalchemy.orm import Session
from sqlalchemy import func
from datetime import date, timedelta
from decimal import Decimal
import statistics
logger = logging.getLogger(__name__)
# Import your existing database models
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery, KFactorHistory
# --- Constants for the Model ---
# This is a baseline daily usage for homes that use oil for hot water.
# A typical value is 0.5 to 1.0 gallons per day. Adjust as needed.
# --- Constants ---
HOT_WATER_DAILY_USAGE = Decimal('1.0')
# This determines how quickly the K-Factor adjusts.
# 0.7 means 70% weight is given to the historical factor and 30% to the new one.
# This prevents wild swings from a single unusual delivery period.
K_FACTOR_SMOOTHING_WEIGHT = Decimal('0.7')
TANK_MAX_FILLS = {
275: 240,
330: 280,
@@ -26,7 +19,23 @@ TANK_MAX_FILLS = {
550: 500
}
PARTIAL_DELIVERIES = [100, 125, 150, 200]
# K-Factor rolling average settings
K_FACTOR_ROLLING_WINDOW = 5
K_FACTOR_HISTORY_SIZE = 10
OUTLIER_SIGMA_THRESHOLD = 2.0
# Budget fill detection
BUDGET_FILL_AMOUNTS = {100, 125, 150, 200}
BUDGET_FILL_TOLERANCE = 0.5
# Confidence scoring
CONFIDENCE_PER_DELIVERY = 8
CONFIDENCE_MAX = 100
CONFIDENCE_MIN = 20
CONFIDENCE_VARIANCE_PENALTY = 10
# Default K-factor when no data available
DEFAULT_K_FACTOR = Decimal('0.12')
class FuelEstimator:
@@ -34,64 +43,117 @@ class FuelEstimator:
self.session = session
def _get_weather_for_date(self, target_date: date) -> Auto_Temp | None:
"""Helper to fetch weather data for a specific date."""
return self.session.query(Auto_Temp).filter(Auto_Temp.todays_date == target_date).first()
def _estimate_initial_house_factor(self, customer: Auto_Delivery) -> Decimal:
"""
Generic function to estimate initial house factor for customers with only one delivery.
This can be improved with more sophisticated logic (e.g., averaging similar customers).
"""
# Default generic house factor: 0.12 gallons per degree day (average based on existing customer data)
# This represents typical heating usage and can be adjusted based on future data analysis
return Decimal('0.12')
@staticmethod
def _is_budget_fill(gallons) -> bool:
"""Returns True if gallons is within +/-0.5 of a budget fill amount."""
gal = float(gallons)
for amount in BUDGET_FILL_AMOUNTS:
if abs(gal - amount) <= BUDGET_FILL_TOLERANCE:
return True
return False
def _verify_house_factor_correctness(self, customer: Auto_Delivery) -> bool:
"""
Verify and correct house_factor based on delivery history.
Returns True if correction was made.
"""
# Count deliveries for this customer
delivery_count = self.session.query(func.count(Tickets_Auto_Delivery.id)).filter(
Tickets_Auto_Delivery.customer_id == customer.customer_id
def _get_division_average_k(self) -> Decimal:
"""Average K from all valid (non-budget, non-outlier) history entries.
Fallback for new customers instead of hardcoded 0.12."""
result = self.session.query(func.avg(KFactorHistory.k_factor)).filter(
KFactorHistory.is_budget_fill == False,
KFactorHistory.is_outlier == False,
KFactorHistory.k_factor.isnot(None),
KFactorHistory.k_factor > 0
).scalar()
if result and result > 0:
return Decimal(str(round(float(result), 4)))
return DEFAULT_K_FACTOR
corrected = False
def _calculate_rolling_k_factor(self, customer_id: int):
"""Returns (k_factor, confidence_score, source).
if delivery_count <= 1:
# Customers with 0 or 1 delivery should have house_factor = 0.12 (initial average)
if customer.house_factor != Decimal('0.12'):
logger.info(f"Correcting house_factor for customer {customer.customer_id} from {customer.house_factor} to 0.12 (1 or fewer deliveries)")
customer.house_factor = Decimal('0.12')
corrected = True
# For customers with 2+ deliveries, keep their calculated factor (no correction needed)
1. Query last 10 non-budget history entries
2. Take last 5 for rolling window
3. Calculate mean + std dev
4. Exclude entries >2 sigma from mean, mark as outliers
5. Recalculate mean from filtered set
6. Confidence = min(100, qualifying_deliveries * 8) - penalty for high variance
"""
# Get last HISTORY_SIZE non-budget entries ordered most recent first
history = self.session.query(KFactorHistory).filter(
KFactorHistory.customer_id == customer_id,
KFactorHistory.is_budget_fill == False,
KFactorHistory.k_factor.isnot(None),
KFactorHistory.k_factor > 0
).order_by(KFactorHistory.fill_date.desc()).limit(K_FACTOR_HISTORY_SIZE).all()
return corrected
if not history:
div_avg = self._get_division_average_k()
return (div_avg, CONFIDENCE_MIN, 'division_avg')
# Take last ROLLING_WINDOW for calculation
window = history[:K_FACTOR_ROLLING_WINDOW]
k_values = [float(h.k_factor) for h in window]
if len(k_values) < 2:
k = Decimal(str(round(k_values[0], 4)))
confidence = min(CONFIDENCE_MAX, CONFIDENCE_PER_DELIVERY)
return (k, max(CONFIDENCE_MIN, confidence), 'calculated')
mean_k = statistics.mean(k_values)
stdev_k = statistics.stdev(k_values)
# Mark outliers (>2 sigma from mean)
filtered = []
for h in window:
kf = float(h.k_factor)
if stdev_k > 0 and abs(kf - mean_k) > OUTLIER_SIGMA_THRESHOLD * stdev_k:
if not h.is_outlier:
h.is_outlier = True
else:
filtered.append(kf)
if h.is_outlier:
h.is_outlier = False
if not filtered:
# All were outliers - use full set
filtered = k_values
final_k = Decimal(str(round(statistics.mean(filtered), 4)))
# Confidence scoring
qualifying = len([h for h in history if not h.is_budget_fill and not h.is_outlier])
confidence = min(CONFIDENCE_MAX, qualifying * CONFIDENCE_PER_DELIVERY)
# Penalty for high variance (coefficient of variation)
if len(filtered) >= 2:
cv = statistics.stdev(filtered) / statistics.mean(filtered) if statistics.mean(filtered) > 0 else 0
if cv > 0.3:
confidence -= CONFIDENCE_VARIANCE_PENALTY * 2
elif cv > 0.15:
confidence -= CONFIDENCE_VARIANCE_PENALTY
confidence = max(CONFIDENCE_MIN, confidence)
return (final_k, confidence, 'calculated')
def run_daily_update(self):
"""
Main function to run once per day. It updates the estimated fuel level
for all active automatic delivery customers. The calling function must commit the session.
Main function to run once per day. Updates estimated fuel level
for all active automatic delivery customers.
"""
today = date.today()
# 1. Check if the update has already run today
if self.session.query(Auto_Update).filter(Auto_Update.last_updated == today).first():
logger.info(f"Daily update for {today} has already been completed.")
return {"ok": True, "message": "Update already run today."}
# 2. Get today's weather data (specifically the Heating Degree Days)
todays_weather = self._get_weather_for_date(today)
if not todays_weather:
logger.info(f"Error: Weather data for {today} not found. Cannot run update.")
return {"ok": False, "message": f"Weather data for {today} not found."}
# Degree days can't be negative for this calculation. If it's warm, HDD = 0.
degree_day = Decimal(max(0, 65 - float(todays_weather.temp_avg)))
# 3. Get all active automatic customers
auto_customers = self.session.query(Auto_Delivery).filter(
Auto_Delivery.auto_status == 1 # Assuming 1 means active
Auto_Delivery.auto_status == 1
).all()
if not auto_customers:
@@ -100,14 +162,7 @@ class FuelEstimator:
logger.info(f"Staging daily fuel update for {len(auto_customers)} customers...")
corrections_made = 0
# 4. Loop through each customer and update their fuel level
for customer in auto_customers:
# Verify and correct house_factor if needed
if self._verify_house_factor_correctness(customer):
corrections_made += 1
heating_usage = customer.house_factor * degree_day
hot_water_usage = Decimal('0.0')
@@ -123,21 +178,16 @@ class FuelEstimator:
if customer.days_since_last_fill is not None:
customer.days_since_last_fill += 1
# 5. Log that today's update is complete
new_update_log = Auto_Update(last_updated=today)
self.session.add(new_update_log)
logger.info("Daily update staged. Awaiting commit.")
message = f"Successfully staged updates for {len(auto_customers)} customers."
if corrections_made > 0:
message += f" Corrected house factors for {corrections_made} customers."
return {"ok": True, "message": message}
return {"ok": True, "message": f"Successfully staged updates for {len(auto_customers)} customers."}
def refine_factor_after_delivery(self, ticket: Tickets_Auto_Delivery):
"""
This is the self-correction logic. It recalculates and refines the customer's
K-Factor (house_factor) after a delivery. The calling function must commit the session.
Recalculates and refines the customer's K-Factor after a delivery.
Uses K-factor history with rolling averages and outlier detection.
"""
customer = self.session.query(Auto_Delivery).filter(
Auto_Delivery.customer_id == ticket.customer_id
@@ -147,74 +197,103 @@ class FuelEstimator:
logger.info(f"Customer {ticket.customer_id} not found.")
return
# 1. Detect and flag budget fill
is_budget = self._is_budget_fill(ticket.gallons_delivered)
ticket.is_budget_fill = is_budget
if is_budget:
logger.info(f"Budget fill detected for customer {ticket.customer_id}: {ticket.gallons_delivered} gal")
# 2. First delivery - no previous fill to compare against
if not customer.last_fill:
logger.info(f"Setting initial K-Factor for new customer {ticket.customer_id} with only one delivery.")
customer.house_factor = self._estimate_initial_house_factor(customer)
self._update_tank_after_fill(customer, ticket)
logger.info(f"First delivery for customer {ticket.customer_id}. Setting division average K-factor.")
div_avg = self._get_division_average_k()
customer.house_factor = div_avg
customer.confidence_score = CONFIDENCE_MIN
customer.k_factor_source = 'division_avg'
self._update_tank_after_fill(customer, ticket, is_budget)
return
start_date = customer.last_fill
end_date = ticket.fill_date
if start_date >= end_date:
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: New fill date is not after the last one. Resetting tank only.")
self._update_tank_after_fill(customer, ticket)
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: fill date not after last fill. Resetting tank only.")
self._update_tank_after_fill(customer, ticket, is_budget)
return
# 3. Calculate HDD for interval
interval_temps = self.session.query(Auto_Temp).filter(
Auto_Temp.todays_date > start_date,
Auto_Temp.todays_date <= end_date
).all()
total_degree_days = sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps)
total_hdd = Decimal(total_degree_days)
# Hot water adjustment
num_days = (end_date - start_date).days
total_hot_water_usage = Decimal('0.0')
if customer.hot_water_summer == 1:
num_days = (end_date - start_date).days
total_hot_water_usage = Decimal(num_days) * HOT_WATER_DAILY_USAGE
gallons_for_heating = ticket.gallons_delivered - total_hot_water_usage
if gallons_for_heating <= 0 or total_hdd == 0:
logger.info(f"Cannot calculate new K-Factor for customer {ticket.customer_id}. (HDD: {total_hdd}, Heating Gallons: {gallons_for_heating}). Resetting tank only.")
self._update_tank_after_fill(customer, ticket)
return
new_k_factor = gallons_for_heating / total_hdd
current_k_factor = customer.house_factor
smoothed_k_factor = (current_k_factor * K_FACTOR_SMOOTHING_WEIGHT) + (new_k_factor * (Decimal('1.0') - K_FACTOR_SMOOTHING_WEIGHT))
logger.info(f"Refining K-Factor for Customer ID {customer.customer_id}:")
logger.info(f" - Old K-Factor: {current_k_factor:.4f}, New Smoothed K-Factor: {smoothed_k_factor:.4f}")
# Calculate K-factor for this observation
k_factor_obs = None
if gallons_for_heating > 0 and total_hdd > 0:
k_factor_obs = gallons_for_heating / total_hdd
# 4. Store K-factor observation in history (even budget fills, flagged)
history_entry = KFactorHistory(
customer_id=ticket.customer_id,
ticket_id=ticket.id,
fill_date=ticket.fill_date,
gallons_delivered=ticket.gallons_delivered,
total_hdd=total_hdd,
days_in_period=num_days,
k_factor=k_factor_obs,
is_budget_fill=is_budget,
is_outlier=False,
created_at=date.today()
)
self.session.add(history_entry)
# 5. Run rolling K-factor calculation
# Flush so the new entry is visible to the query
self.session.flush()
new_k, confidence, source = self._calculate_rolling_k_factor(ticket.customer_id)
logger.info(f"Refining K-Factor for Customer {ticket.customer_id}:")
logger.info(f" Old K: {customer.house_factor:.4f}, New K: {new_k:.4f}, Confidence: {confidence}, Source: {source}")
# 6. Update customer
customer.house_factor = new_k
customer.confidence_score = confidence
customer.k_factor_source = source
# 7. Update tank after fill
self._update_tank_after_fill(customer, ticket, is_budget)
customer.house_factor = smoothed_k_factor
self._update_tank_after_fill(customer, ticket)
logger.info(f"K-Factor and tank status for Customer {customer.customer_id} staged for update.")
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery):
"""Helper to update customer tank status after a fill-up or partial delivery."""
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery, is_budget: bool = False):
"""Update customer tank status after a fill-up."""
customer.last_fill = ticket.fill_date
customer.days_since_last_fill = 0
# Determine max fill capacity
if customer.tank_size and Decimal(customer.tank_size) > 0:
tank_size = float(Decimal(customer.tank_size))
max_fill = TANK_MAX_FILLS.get(tank_size, tank_size)
else:
# Default to legal max for common tank size (275 gallons = 240)
max_fill = 240.0
# Check if this is a partial delivery
if float(ticket.gallons_delivered) in PARTIAL_DELIVERIES:
# Partial delivery: add to current level, cap at max_fill
if is_budget:
# Budget fill: ADD gallons to current level, cap at max_fill
customer.estimated_gallons_left += ticket.gallons_delivered
customer.estimated_gallons_left = min(customer.estimated_gallons_left, Decimal(str(max_fill)))
else:
# Full delivery: set to max_fill
# Full delivery: RESET to max_fill
customer.estimated_gallons_left = Decimal(str(max_fill))
# The previous day's value should match the new value on a fill day.
customer.estimated_gallons_left_prev_day = customer.estimated_gallons_left
customer.last_updated = date.today()
customer.auto_status = 1 # Reactivate the customer
customer.auto_status = 1