feat: rewrite K-factor engine with history tracking and outlier detection
Replace simple exponential smoothing with a rolling-average K-factor system backed by a new auto_kfactor_history table. Budget fills are detected and excluded from calculations, outliers beyond 2-sigma are flagged, and confidence scores track data quality per customer. Adds backfill endpoint, auto-create for missing estimation records, and manual house_factor PUT endpoints for both auto and regular customers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from sqlalchemy import (Column, Integer,
|
||||
DECIMAL, TEXT,
|
||||
VARCHAR, DATE, INTEGER)
|
||||
DECIMAL, TEXT, Boolean,
|
||||
VARCHAR, DATE, INTEGER, Index)
|
||||
from datetime import datetime
|
||||
from database import Base
|
||||
|
||||
@@ -57,11 +57,12 @@ class Auto_Delivery(Base):
|
||||
estimated_gallons_left_prev_day = Column(DECIMAL(6, 2))
|
||||
tank_height = Column(VARCHAR(25))
|
||||
tank_size = Column(VARCHAR(25))
|
||||
house_factor = Column(DECIMAL(5, 2))
|
||||
house_factor = Column(DECIMAL(7, 4))
|
||||
auto_status = Column(INTEGER())
|
||||
open_ticket_id = Column(Integer, nullable=True)
|
||||
hot_water_summer = Column(INTEGER())
|
||||
|
||||
confidence_score = Column(INTEGER(), default=20)
|
||||
k_factor_source = Column(VARCHAR(20), default='default')
|
||||
|
||||
|
||||
|
||||
@@ -92,3 +93,24 @@ class Tickets_Auto_Delivery(Base):
|
||||
payment_type = Column(Integer, nullable=True)
|
||||
payment_card_id = Column(Integer, nullable=True)
|
||||
payment_status = Column(Integer, nullable=True)
|
||||
is_budget_fill = Column(Boolean, default=False)
|
||||
|
||||
|
||||
class KFactorHistory(Base):
|
||||
__tablename__ = 'auto_kfactor_history'
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
customer_id = Column(INTEGER(), nullable=False, index=True)
|
||||
ticket_id = Column(Integer, nullable=True)
|
||||
fill_date = Column(DATE())
|
||||
gallons_delivered = Column(DECIMAL(6, 2))
|
||||
total_hdd = Column(DECIMAL(8, 2))
|
||||
days_in_period = Column(Integer)
|
||||
k_factor = Column(DECIMAL(7, 4))
|
||||
is_budget_fill = Column(Boolean, default=False)
|
||||
is_outlier = Column(Boolean, default=False)
|
||||
created_at = Column(DATE())
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_auto_kfactor_history_customer_fill', 'customer_id', fill_date.desc()),
|
||||
)
|
||||
|
||||
@@ -2,15 +2,26 @@ import logging
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from pydantic import BaseModel
|
||||
from database import session
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from sqlalchemy import func
|
||||
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
|
||||
from app.models.customer import Customer_Customer
|
||||
from app.models.delivery import Delivery
|
||||
from app.models.auth import Auth_User
|
||||
from app.auth import get_current_user
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HOT_WATER_DAILY_USAGE = Decimal('1.0')
|
||||
HDD_FORECAST_DAYS = 7
|
||||
|
||||
|
||||
class HouseFactorUpdate(BaseModel):
|
||||
house_factor: float
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
@@ -20,6 +31,36 @@ router = APIRouter(
|
||||
)
|
||||
|
||||
|
||||
def _get_avg_hdd(days: int = HDD_FORECAST_DAYS) -> Decimal:
|
||||
"""Get average HDD over the last N days as a forecast proxy."""
|
||||
cutoff = date.today() - timedelta(days=days)
|
||||
rows = session.query(Auto_Temp.temp_avg).filter(
|
||||
Auto_Temp.todays_date > cutoff
|
||||
).all()
|
||||
if not rows:
|
||||
return Decimal('0')
|
||||
total = sum(max(0, 65 - float(r.temp_avg)) for r in rows)
|
||||
return Decimal(str(round(total / len(rows), 2)))
|
||||
|
||||
|
||||
def _enrich_auto(auto_obj, avg_hdd: Decimal) -> dict:
|
||||
"""Add computed gallons_per_day and days_remaining to a serialized auto delivery."""
|
||||
data = jsonable_encoder(auto_obj)
|
||||
k = Decimal(str(auto_obj.house_factor)) if auto_obj.house_factor else Decimal('0')
|
||||
hot_water = HOT_WATER_DAILY_USAGE if auto_obj.hot_water_summer == 1 else Decimal('0')
|
||||
daily_burn = k * avg_hdd + hot_water
|
||||
|
||||
data['gallons_per_day'] = float(round(daily_burn, 2))
|
||||
data['avg_hdd'] = float(avg_hdd)
|
||||
data['hot_water_summer'] = auto_obj.hot_water_summer
|
||||
if daily_burn > 0 and auto_obj.estimated_gallons_left is not None:
|
||||
days_left = int(auto_obj.estimated_gallons_left / daily_burn)
|
||||
data['days_remaining'] = min(days_left, 999)
|
||||
else:
|
||||
data['days_remaining'] = 999
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@router.get("/all/customers", status_code=200)
|
||||
def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
|
||||
@@ -31,7 +72,10 @@ def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
|
||||
.all()
|
||||
)
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(automatics), status_code=200)
|
||||
avg_hdd = _get_avg_hdd()
|
||||
enriched = [_enrich_auto(a, avg_hdd) for a in automatics]
|
||||
|
||||
return JSONResponse(content=enriched, status_code=200)
|
||||
|
||||
|
||||
@router.get("/driver/{driver_employee_id}", status_code=200)
|
||||
@@ -112,7 +156,7 @@ def get_autos_customers_extended(customer_id, current_user: Auth_User = Depends(
|
||||
|
||||
|
||||
@router.get("/auto/customer/{customer_id}", status_code=200)
|
||||
def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends(get_current_user)):
|
||||
def get_auto_delivery_by_customer(customer_id: int, current_user: Auth_User = Depends(get_current_user)):
|
||||
logger.info(f"GET /delivery/auto/customer/{customer_id} - User: {current_user.username}")
|
||||
get_auto_delivery = (
|
||||
session.query(Auto_Delivery)
|
||||
@@ -120,7 +164,47 @@ def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends
|
||||
.first()
|
||||
)
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(get_auto_delivery), status_code=200)
|
||||
if not get_auto_delivery:
|
||||
# Auto-create record from customer data
|
||||
customer = session.query(Customer_Customer).filter(
|
||||
Customer_Customer.id == customer_id
|
||||
).first()
|
||||
if not customer:
|
||||
return JSONResponse(content={"error": "Customer not found"}, status_code=404)
|
||||
|
||||
# Use division average K-factor as default
|
||||
div_avg = session.query(func.avg(Auto_Delivery.house_factor)).filter(
|
||||
Auto_Delivery.house_factor.isnot(None),
|
||||
Auto_Delivery.house_factor > 0
|
||||
).scalar()
|
||||
default_k = float(div_avg) if div_avg else 0.12
|
||||
|
||||
get_auto_delivery = Auto_Delivery(
|
||||
customer_id=customer.id,
|
||||
account_number=customer.account_number,
|
||||
customer_town=customer.customer_town,
|
||||
customer_state=customer.customer_state,
|
||||
customer_address=customer.customer_address,
|
||||
customer_zip=customer.customer_zip,
|
||||
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
|
||||
estimated_gallons_left=Decimal('100'),
|
||||
estimated_gallons_left_prev_day=Decimal('100'),
|
||||
tank_size='275',
|
||||
house_factor=Decimal(str(round(default_k, 4))),
|
||||
auto_status=1,
|
||||
hot_water_summer=0,
|
||||
confidence_score=20,
|
||||
k_factor_source='default'
|
||||
)
|
||||
session.add(get_auto_delivery)
|
||||
session.commit()
|
||||
session.refresh(get_auto_delivery)
|
||||
logger.info(f"Auto-created Auto_Delivery record for customer {customer_id}")
|
||||
|
||||
avg_hdd = _get_avg_hdd()
|
||||
enriched = _enrich_auto(get_auto_delivery, avg_hdd)
|
||||
|
||||
return JSONResponse(content=enriched, status_code=200)
|
||||
|
||||
|
||||
@router.put("/update_status/{auto_id}", status_code=200)
|
||||
@@ -137,3 +221,26 @@ def update_auto_status(auto_id: int, current_user: Auth_User = Depends(get_curre
|
||||
session.commit()
|
||||
return {"message": "Auto status updated to 3"}
|
||||
return {"error": "Auto delivery not found"}
|
||||
|
||||
|
||||
@router.put("/auto/customer/{customer_id}/house_factor", status_code=200)
|
||||
def update_house_factor(customer_id: int, body: HouseFactorUpdate, current_user: Auth_User = Depends(get_current_user)):
|
||||
logger.info(f"PUT /delivery/auto/customer/{customer_id}/house_factor - User: {current_user.username}")
|
||||
auto_delivery = (
|
||||
session.query(Auto_Delivery)
|
||||
.filter(Auto_Delivery.customer_id == customer_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not auto_delivery:
|
||||
return JSONResponse(content={"error": "Auto delivery record not found"}, status_code=404)
|
||||
|
||||
auto_delivery.house_factor = Decimal(str(round(body.house_factor, 4)))
|
||||
auto_delivery.k_factor_source = 'manual'
|
||||
session.commit()
|
||||
session.refresh(auto_delivery)
|
||||
|
||||
avg_hdd = _get_avg_hdd()
|
||||
enriched = _enrich_auto(auto_delivery, avg_hdd)
|
||||
|
||||
return JSONResponse(content=enriched, status_code=200)
|
||||
|
||||
@@ -7,9 +7,10 @@ from sqlalchemy import func
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
|
||||
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp, KFactorHistory
|
||||
from app.models.delivery import Delivery
|
||||
from app.constants import DEFAULT_TANK_SIZE_GALLONS
|
||||
from app.script.fuel_estimator import FuelEstimator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -61,7 +62,7 @@ def fix_customer_last_delivered():
|
||||
"new_date": str(latest_ticket.fill_date)
|
||||
})
|
||||
session.add(ad)
|
||||
|
||||
|
||||
session.commit()
|
||||
result = {
|
||||
"total_customers": total_customers,
|
||||
@@ -213,3 +214,114 @@ def estimate_customer_gallons(update_db: int):
|
||||
session.commit()
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(estimates))
|
||||
|
||||
|
||||
@router.get("/backfill_kfactor_history", status_code=200)
|
||||
def backfill_kfactor_history():
|
||||
"""
|
||||
Backfill the auto_kfactor_history table from existing ticket data.
|
||||
For each auto customer with 2+ tickets, calculates K-factor for each
|
||||
consecutive ticket pair and inserts into history. Then runs the rolling
|
||||
K-factor calculation to set the customer's house_factor, confidence, and source.
|
||||
"""
|
||||
logger.info("GET /fixstuff/backfill_kfactor_history - Starting K-factor history backfill")
|
||||
|
||||
estimator = FuelEstimator(session=session)
|
||||
auto_deliveries = session.query(Auto_Delivery).all()
|
||||
|
||||
stats = {
|
||||
"customers_processed": 0,
|
||||
"customers_skipped": 0,
|
||||
"history_entries_created": 0,
|
||||
"customers_updated": 0,
|
||||
}
|
||||
|
||||
for ad in auto_deliveries:
|
||||
tickets = session.query(Tickets_Auto_Delivery).filter(
|
||||
Tickets_Auto_Delivery.customer_id == ad.customer_id,
|
||||
Tickets_Auto_Delivery.fill_date.isnot(None)
|
||||
).order_by(Tickets_Auto_Delivery.fill_date).all()
|
||||
|
||||
if len(tickets) < 2:
|
||||
stats["customers_skipped"] += 1
|
||||
# Set division avg for customers with <2 tickets
|
||||
if ad.confidence_score is None:
|
||||
ad.confidence_score = 20
|
||||
if ad.k_factor_source is None:
|
||||
ad.k_factor_source = 'default'
|
||||
continue
|
||||
|
||||
stats["customers_processed"] += 1
|
||||
|
||||
# Check if this customer already has history entries
|
||||
existing = session.query(KFactorHistory).filter(
|
||||
KFactorHistory.customer_id == ad.customer_id
|
||||
).count()
|
||||
if existing > 0:
|
||||
continue
|
||||
|
||||
for i in range(len(tickets) - 1):
|
||||
prev_ticket = tickets[i]
|
||||
next_ticket = tickets[i + 1]
|
||||
|
||||
start_date = prev_ticket.fill_date
|
||||
end_date = next_ticket.fill_date
|
||||
num_days = (end_date - start_date).days
|
||||
|
||||
if num_days <= 0:
|
||||
continue
|
||||
|
||||
# Calculate HDD for the interval
|
||||
interval_temps = session.query(Auto_Temp).filter(
|
||||
Auto_Temp.todays_date > start_date,
|
||||
Auto_Temp.todays_date <= end_date
|
||||
).all()
|
||||
total_hdd = Decimal(sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps))
|
||||
|
||||
if total_hdd == 0:
|
||||
continue
|
||||
|
||||
# Hot water adjustment
|
||||
total_hot_water = Decimal('0.0')
|
||||
if ad.hot_water_summer == 1:
|
||||
total_hot_water = Decimal(num_days) * HOT_WATER_DAILY_USAGE
|
||||
|
||||
gallons_for_heating = next_ticket.gallons_delivered - total_hot_water
|
||||
|
||||
k_factor_obs = None
|
||||
if gallons_for_heating > 0 and total_hdd > 0:
|
||||
k_factor_obs = gallons_for_heating / total_hdd
|
||||
|
||||
is_budget = estimator._is_budget_fill(next_ticket.gallons_delivered)
|
||||
|
||||
# Flag the ticket too
|
||||
next_ticket.is_budget_fill = is_budget
|
||||
|
||||
history_entry = KFactorHistory(
|
||||
customer_id=ad.customer_id,
|
||||
ticket_id=next_ticket.id,
|
||||
fill_date=next_ticket.fill_date,
|
||||
gallons_delivered=next_ticket.gallons_delivered,
|
||||
total_hdd=total_hdd,
|
||||
days_in_period=num_days,
|
||||
k_factor=k_factor_obs,
|
||||
is_budget_fill=is_budget,
|
||||
is_outlier=False,
|
||||
created_at=date.today()
|
||||
)
|
||||
session.add(history_entry)
|
||||
stats["history_entries_created"] += 1
|
||||
|
||||
# Flush so rolling calc can see the new entries
|
||||
session.flush()
|
||||
|
||||
# Run rolling K-factor calculation to set customer values
|
||||
new_k, confidence, source = estimator._calculate_rolling_k_factor(ad.customer_id)
|
||||
ad.house_factor = new_k
|
||||
ad.confidence_score = confidence
|
||||
ad.k_factor_source = source
|
||||
stats["customers_updated"] += 1
|
||||
|
||||
session.commit()
|
||||
logger.info(f"Backfill complete: {stats}")
|
||||
return JSONResponse(content=jsonable_encoder(stats))
|
||||
|
||||
@@ -2,6 +2,7 @@ import logging
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from pydantic import BaseModel
|
||||
from database import session
|
||||
from sqlalchemy import func
|
||||
from datetime import date
|
||||
@@ -27,6 +28,9 @@ TANK_MAX_FILLS = {
|
||||
}
|
||||
|
||||
|
||||
class HouseFactorUpdate(BaseModel):
|
||||
house_factor: float
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/fixstuff_customer",
|
||||
@@ -256,10 +260,32 @@ def estimate_customer_gallons_specific(customer_id: int):
|
||||
).first()
|
||||
|
||||
if not customer_estimate:
|
||||
return JSONResponse(content={
|
||||
"error": f"No fuel estimation data found for customer {customer_id}",
|
||||
"solution": "Run the populate_estimates endpoint first to initialize customer data."
|
||||
})
|
||||
# Auto-create record from customer data
|
||||
customer = session.query(Customer_Customer).filter(
|
||||
Customer_Customer.id == customer_id
|
||||
).first()
|
||||
if not customer:
|
||||
return JSONResponse(content={"error": f"Customer {customer_id} not found"}, status_code=404)
|
||||
|
||||
customer_estimate = Customer_estimate_gallons(
|
||||
customer_id=customer.id,
|
||||
account_number=customer.account_number,
|
||||
customer_town=customer.customer_town,
|
||||
customer_state=customer.customer_state,
|
||||
customer_address=customer.customer_address,
|
||||
customer_zip=customer.customer_zip,
|
||||
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
|
||||
estimated_gallons_left=Decimal('100'),
|
||||
estimated_gallons_left_prev_day=Decimal('100'),
|
||||
tank_size='275',
|
||||
house_factor=Decimal('0.12'),
|
||||
auto_status=1,
|
||||
hot_water_summer=0
|
||||
)
|
||||
session.add(customer_estimate)
|
||||
session.commit()
|
||||
session.refresh(customer_estimate)
|
||||
logger.info(f"Auto-created Customer_estimate_gallons record for customer {customer_id}")
|
||||
|
||||
deliveries = session.query(Delivery).filter(
|
||||
Delivery.customer_id == customer_estimate.customer_id,
|
||||
@@ -448,3 +474,25 @@ def populate_customer_estimates():
|
||||
}
|
||||
|
||||
return JSONResponse(content=jsonable_encoder(result))
|
||||
|
||||
|
||||
@router.put("/house_factor/{customer_id}", status_code=200)
|
||||
def update_customer_house_factor(customer_id: int, body: HouseFactorUpdate):
|
||||
logger.info(f"PUT /fixstuff_customer/house_factor/{customer_id}")
|
||||
customer_estimate = session.query(Customer_estimate_gallons).filter(
|
||||
Customer_estimate_gallons.customer_id == customer_id
|
||||
).first()
|
||||
|
||||
if not customer_estimate:
|
||||
return JSONResponse(content={"error": "Customer estimate record not found"}, status_code=404)
|
||||
|
||||
customer_estimate.house_factor = Decimal(str(round(body.house_factor, 4)))
|
||||
session.commit()
|
||||
session.refresh(customer_estimate)
|
||||
|
||||
return JSONResponse(content=jsonable_encoder({
|
||||
"id": customer_estimate.id,
|
||||
"customer_id": customer_estimate.customer_id,
|
||||
"house_factor": float(customer_estimate.house_factor),
|
||||
"message": "House factor updated"
|
||||
}), status_code=200)
|
||||
|
||||
@@ -3,22 +3,15 @@ from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
import statistics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import your existing database models
|
||||
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery
|
||||
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery, KFactorHistory
|
||||
|
||||
# --- Constants for the Model ---
|
||||
# This is a baseline daily usage for homes that use oil for hot water.
|
||||
# A typical value is 0.5 to 1.0 gallons per day. Adjust as needed.
|
||||
# --- Constants ---
|
||||
HOT_WATER_DAILY_USAGE = Decimal('1.0')
|
||||
|
||||
# This determines how quickly the K-Factor adjusts.
|
||||
# 0.7 means 70% weight is given to the historical factor and 30% to the new one.
|
||||
# This prevents wild swings from a single unusual delivery period.
|
||||
K_FACTOR_SMOOTHING_WEIGHT = Decimal('0.7')
|
||||
|
||||
TANK_MAX_FILLS = {
|
||||
275: 240,
|
||||
330: 280,
|
||||
@@ -26,7 +19,23 @@ TANK_MAX_FILLS = {
|
||||
550: 500
|
||||
}
|
||||
|
||||
PARTIAL_DELIVERIES = [100, 125, 150, 200]
|
||||
# K-Factor rolling average settings
|
||||
K_FACTOR_ROLLING_WINDOW = 5
|
||||
K_FACTOR_HISTORY_SIZE = 10
|
||||
OUTLIER_SIGMA_THRESHOLD = 2.0
|
||||
|
||||
# Budget fill detection
|
||||
BUDGET_FILL_AMOUNTS = {100, 125, 150, 200}
|
||||
BUDGET_FILL_TOLERANCE = 0.5
|
||||
|
||||
# Confidence scoring
|
||||
CONFIDENCE_PER_DELIVERY = 8
|
||||
CONFIDENCE_MAX = 100
|
||||
CONFIDENCE_MIN = 20
|
||||
CONFIDENCE_VARIANCE_PENALTY = 10
|
||||
|
||||
# Default K-factor when no data available
|
||||
DEFAULT_K_FACTOR = Decimal('0.12')
|
||||
|
||||
|
||||
class FuelEstimator:
|
||||
@@ -34,64 +43,117 @@ class FuelEstimator:
|
||||
self.session = session
|
||||
|
||||
def _get_weather_for_date(self, target_date: date) -> Auto_Temp | None:
|
||||
"""Helper to fetch weather data for a specific date."""
|
||||
return self.session.query(Auto_Temp).filter(Auto_Temp.todays_date == target_date).first()
|
||||
|
||||
def _estimate_initial_house_factor(self, customer: Auto_Delivery) -> Decimal:
|
||||
"""
|
||||
Generic function to estimate initial house factor for customers with only one delivery.
|
||||
This can be improved with more sophisticated logic (e.g., averaging similar customers).
|
||||
"""
|
||||
# Default generic house factor: 0.12 gallons per degree day (average based on existing customer data)
|
||||
# This represents typical heating usage and can be adjusted based on future data analysis
|
||||
return Decimal('0.12')
|
||||
@staticmethod
|
||||
def _is_budget_fill(gallons) -> bool:
|
||||
"""Returns True if gallons is within +/-0.5 of a budget fill amount."""
|
||||
gal = float(gallons)
|
||||
for amount in BUDGET_FILL_AMOUNTS:
|
||||
if abs(gal - amount) <= BUDGET_FILL_TOLERANCE:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _verify_house_factor_correctness(self, customer: Auto_Delivery) -> bool:
|
||||
"""
|
||||
Verify and correct house_factor based on delivery history.
|
||||
Returns True if correction was made.
|
||||
"""
|
||||
# Count deliveries for this customer
|
||||
delivery_count = self.session.query(func.count(Tickets_Auto_Delivery.id)).filter(
|
||||
Tickets_Auto_Delivery.customer_id == customer.customer_id
|
||||
def _get_division_average_k(self) -> Decimal:
|
||||
"""Average K from all valid (non-budget, non-outlier) history entries.
|
||||
Fallback for new customers instead of hardcoded 0.12."""
|
||||
result = self.session.query(func.avg(KFactorHistory.k_factor)).filter(
|
||||
KFactorHistory.is_budget_fill == False,
|
||||
KFactorHistory.is_outlier == False,
|
||||
KFactorHistory.k_factor.isnot(None),
|
||||
KFactorHistory.k_factor > 0
|
||||
).scalar()
|
||||
if result and result > 0:
|
||||
return Decimal(str(round(float(result), 4)))
|
||||
return DEFAULT_K_FACTOR
|
||||
|
||||
corrected = False
|
||||
def _calculate_rolling_k_factor(self, customer_id: int):
|
||||
"""Returns (k_factor, confidence_score, source).
|
||||
|
||||
if delivery_count <= 1:
|
||||
# Customers with 0 or 1 delivery should have house_factor = 0.12 (initial average)
|
||||
if customer.house_factor != Decimal('0.12'):
|
||||
logger.info(f"Correcting house_factor for customer {customer.customer_id} from {customer.house_factor} to 0.12 (1 or fewer deliveries)")
|
||||
customer.house_factor = Decimal('0.12')
|
||||
corrected = True
|
||||
# For customers with 2+ deliveries, keep their calculated factor (no correction needed)
|
||||
1. Query last 10 non-budget history entries
|
||||
2. Take last 5 for rolling window
|
||||
3. Calculate mean + std dev
|
||||
4. Exclude entries >2 sigma from mean, mark as outliers
|
||||
5. Recalculate mean from filtered set
|
||||
6. Confidence = min(100, qualifying_deliveries * 8) - penalty for high variance
|
||||
"""
|
||||
# Get last HISTORY_SIZE non-budget entries ordered most recent first
|
||||
history = self.session.query(KFactorHistory).filter(
|
||||
KFactorHistory.customer_id == customer_id,
|
||||
KFactorHistory.is_budget_fill == False,
|
||||
KFactorHistory.k_factor.isnot(None),
|
||||
KFactorHistory.k_factor > 0
|
||||
).order_by(KFactorHistory.fill_date.desc()).limit(K_FACTOR_HISTORY_SIZE).all()
|
||||
|
||||
return corrected
|
||||
if not history:
|
||||
div_avg = self._get_division_average_k()
|
||||
return (div_avg, CONFIDENCE_MIN, 'division_avg')
|
||||
|
||||
# Take last ROLLING_WINDOW for calculation
|
||||
window = history[:K_FACTOR_ROLLING_WINDOW]
|
||||
k_values = [float(h.k_factor) for h in window]
|
||||
|
||||
if len(k_values) < 2:
|
||||
k = Decimal(str(round(k_values[0], 4)))
|
||||
confidence = min(CONFIDENCE_MAX, CONFIDENCE_PER_DELIVERY)
|
||||
return (k, max(CONFIDENCE_MIN, confidence), 'calculated')
|
||||
|
||||
mean_k = statistics.mean(k_values)
|
||||
stdev_k = statistics.stdev(k_values)
|
||||
|
||||
# Mark outliers (>2 sigma from mean)
|
||||
filtered = []
|
||||
for h in window:
|
||||
kf = float(h.k_factor)
|
||||
if stdev_k > 0 and abs(kf - mean_k) > OUTLIER_SIGMA_THRESHOLD * stdev_k:
|
||||
if not h.is_outlier:
|
||||
h.is_outlier = True
|
||||
else:
|
||||
filtered.append(kf)
|
||||
if h.is_outlier:
|
||||
h.is_outlier = False
|
||||
|
||||
if not filtered:
|
||||
# All were outliers - use full set
|
||||
filtered = k_values
|
||||
|
||||
final_k = Decimal(str(round(statistics.mean(filtered), 4)))
|
||||
|
||||
# Confidence scoring
|
||||
qualifying = len([h for h in history if not h.is_budget_fill and not h.is_outlier])
|
||||
confidence = min(CONFIDENCE_MAX, qualifying * CONFIDENCE_PER_DELIVERY)
|
||||
|
||||
# Penalty for high variance (coefficient of variation)
|
||||
if len(filtered) >= 2:
|
||||
cv = statistics.stdev(filtered) / statistics.mean(filtered) if statistics.mean(filtered) > 0 else 0
|
||||
if cv > 0.3:
|
||||
confidence -= CONFIDENCE_VARIANCE_PENALTY * 2
|
||||
elif cv > 0.15:
|
||||
confidence -= CONFIDENCE_VARIANCE_PENALTY
|
||||
|
||||
confidence = max(CONFIDENCE_MIN, confidence)
|
||||
return (final_k, confidence, 'calculated')
|
||||
|
||||
def run_daily_update(self):
|
||||
"""
|
||||
Main function to run once per day. It updates the estimated fuel level
|
||||
for all active automatic delivery customers. The calling function must commit the session.
|
||||
Main function to run once per day. Updates estimated fuel level
|
||||
for all active automatic delivery customers.
|
||||
"""
|
||||
today = date.today()
|
||||
|
||||
# 1. Check if the update has already run today
|
||||
if self.session.query(Auto_Update).filter(Auto_Update.last_updated == today).first():
|
||||
logger.info(f"Daily update for {today} has already been completed.")
|
||||
return {"ok": True, "message": "Update already run today."}
|
||||
|
||||
# 2. Get today's weather data (specifically the Heating Degree Days)
|
||||
todays_weather = self._get_weather_for_date(today)
|
||||
if not todays_weather:
|
||||
logger.info(f"Error: Weather data for {today} not found. Cannot run update.")
|
||||
return {"ok": False, "message": f"Weather data for {today} not found."}
|
||||
|
||||
# Degree days can't be negative for this calculation. If it's warm, HDD = 0.
|
||||
degree_day = Decimal(max(0, 65 - float(todays_weather.temp_avg)))
|
||||
|
||||
# 3. Get all active automatic customers
|
||||
auto_customers = self.session.query(Auto_Delivery).filter(
|
||||
Auto_Delivery.auto_status == 1 # Assuming 1 means active
|
||||
Auto_Delivery.auto_status == 1
|
||||
).all()
|
||||
|
||||
if not auto_customers:
|
||||
@@ -100,14 +162,7 @@ class FuelEstimator:
|
||||
|
||||
logger.info(f"Staging daily fuel update for {len(auto_customers)} customers...")
|
||||
|
||||
corrections_made = 0
|
||||
|
||||
# 4. Loop through each customer and update their fuel level
|
||||
for customer in auto_customers:
|
||||
# Verify and correct house_factor if needed
|
||||
if self._verify_house_factor_correctness(customer):
|
||||
corrections_made += 1
|
||||
|
||||
heating_usage = customer.house_factor * degree_day
|
||||
|
||||
hot_water_usage = Decimal('0.0')
|
||||
@@ -123,21 +178,16 @@ class FuelEstimator:
|
||||
if customer.days_since_last_fill is not None:
|
||||
customer.days_since_last_fill += 1
|
||||
|
||||
# 5. Log that today's update is complete
|
||||
new_update_log = Auto_Update(last_updated=today)
|
||||
self.session.add(new_update_log)
|
||||
|
||||
logger.info("Daily update staged. Awaiting commit.")
|
||||
message = f"Successfully staged updates for {len(auto_customers)} customers."
|
||||
if corrections_made > 0:
|
||||
message += f" Corrected house factors for {corrections_made} customers."
|
||||
|
||||
return {"ok": True, "message": message}
|
||||
return {"ok": True, "message": f"Successfully staged updates for {len(auto_customers)} customers."}
|
||||
|
||||
def refine_factor_after_delivery(self, ticket: Tickets_Auto_Delivery):
|
||||
"""
|
||||
This is the self-correction logic. It recalculates and refines the customer's
|
||||
K-Factor (house_factor) after a delivery. The calling function must commit the session.
|
||||
Recalculates and refines the customer's K-Factor after a delivery.
|
||||
Uses K-factor history with rolling averages and outlier detection.
|
||||
"""
|
||||
customer = self.session.query(Auto_Delivery).filter(
|
||||
Auto_Delivery.customer_id == ticket.customer_id
|
||||
@@ -147,74 +197,103 @@ class FuelEstimator:
|
||||
logger.info(f"Customer {ticket.customer_id} not found.")
|
||||
return
|
||||
|
||||
# 1. Detect and flag budget fill
|
||||
is_budget = self._is_budget_fill(ticket.gallons_delivered)
|
||||
ticket.is_budget_fill = is_budget
|
||||
if is_budget:
|
||||
logger.info(f"Budget fill detected for customer {ticket.customer_id}: {ticket.gallons_delivered} gal")
|
||||
|
||||
# 2. First delivery - no previous fill to compare against
|
||||
if not customer.last_fill:
|
||||
logger.info(f"Setting initial K-Factor for new customer {ticket.customer_id} with only one delivery.")
|
||||
customer.house_factor = self._estimate_initial_house_factor(customer)
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
logger.info(f"First delivery for customer {ticket.customer_id}. Setting division average K-factor.")
|
||||
div_avg = self._get_division_average_k()
|
||||
customer.house_factor = div_avg
|
||||
customer.confidence_score = CONFIDENCE_MIN
|
||||
customer.k_factor_source = 'division_avg'
|
||||
self._update_tank_after_fill(customer, ticket, is_budget)
|
||||
return
|
||||
|
||||
start_date = customer.last_fill
|
||||
end_date = ticket.fill_date
|
||||
|
||||
|
||||
if start_date >= end_date:
|
||||
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: New fill date is not after the last one. Resetting tank only.")
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: fill date not after last fill. Resetting tank only.")
|
||||
self._update_tank_after_fill(customer, ticket, is_budget)
|
||||
return
|
||||
|
||||
# 3. Calculate HDD for interval
|
||||
interval_temps = self.session.query(Auto_Temp).filter(
|
||||
Auto_Temp.todays_date > start_date,
|
||||
Auto_Temp.todays_date <= end_date
|
||||
).all()
|
||||
total_degree_days = sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps)
|
||||
total_hdd = Decimal(total_degree_days)
|
||||
|
||||
|
||||
# Hot water adjustment
|
||||
num_days = (end_date - start_date).days
|
||||
total_hot_water_usage = Decimal('0.0')
|
||||
if customer.hot_water_summer == 1:
|
||||
num_days = (end_date - start_date).days
|
||||
total_hot_water_usage = Decimal(num_days) * HOT_WATER_DAILY_USAGE
|
||||
|
||||
gallons_for_heating = ticket.gallons_delivered - total_hot_water_usage
|
||||
if gallons_for_heating <= 0 or total_hdd == 0:
|
||||
logger.info(f"Cannot calculate new K-Factor for customer {ticket.customer_id}. (HDD: {total_hdd}, Heating Gallons: {gallons_for_heating}). Resetting tank only.")
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
return
|
||||
|
||||
new_k_factor = gallons_for_heating / total_hdd
|
||||
|
||||
current_k_factor = customer.house_factor
|
||||
smoothed_k_factor = (current_k_factor * K_FACTOR_SMOOTHING_WEIGHT) + (new_k_factor * (Decimal('1.0') - K_FACTOR_SMOOTHING_WEIGHT))
|
||||
|
||||
logger.info(f"Refining K-Factor for Customer ID {customer.customer_id}:")
|
||||
logger.info(f" - Old K-Factor: {current_k_factor:.4f}, New Smoothed K-Factor: {smoothed_k_factor:.4f}")
|
||||
# Calculate K-factor for this observation
|
||||
k_factor_obs = None
|
||||
if gallons_for_heating > 0 and total_hdd > 0:
|
||||
k_factor_obs = gallons_for_heating / total_hdd
|
||||
|
||||
# 4. Store K-factor observation in history (even budget fills, flagged)
|
||||
history_entry = KFactorHistory(
|
||||
customer_id=ticket.customer_id,
|
||||
ticket_id=ticket.id,
|
||||
fill_date=ticket.fill_date,
|
||||
gallons_delivered=ticket.gallons_delivered,
|
||||
total_hdd=total_hdd,
|
||||
days_in_period=num_days,
|
||||
k_factor=k_factor_obs,
|
||||
is_budget_fill=is_budget,
|
||||
is_outlier=False,
|
||||
created_at=date.today()
|
||||
)
|
||||
self.session.add(history_entry)
|
||||
|
||||
# 5. Run rolling K-factor calculation
|
||||
# Flush so the new entry is visible to the query
|
||||
self.session.flush()
|
||||
new_k, confidence, source = self._calculate_rolling_k_factor(ticket.customer_id)
|
||||
|
||||
logger.info(f"Refining K-Factor for Customer {ticket.customer_id}:")
|
||||
logger.info(f" Old K: {customer.house_factor:.4f}, New K: {new_k:.4f}, Confidence: {confidence}, Source: {source}")
|
||||
|
||||
# 6. Update customer
|
||||
customer.house_factor = new_k
|
||||
customer.confidence_score = confidence
|
||||
customer.k_factor_source = source
|
||||
|
||||
# 7. Update tank after fill
|
||||
self._update_tank_after_fill(customer, ticket, is_budget)
|
||||
|
||||
customer.house_factor = smoothed_k_factor
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
|
||||
logger.info(f"K-Factor and tank status for Customer {customer.customer_id} staged for update.")
|
||||
|
||||
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery):
|
||||
"""Helper to update customer tank status after a fill-up or partial delivery."""
|
||||
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery, is_budget: bool = False):
|
||||
"""Update customer tank status after a fill-up."""
|
||||
customer.last_fill = ticket.fill_date
|
||||
customer.days_since_last_fill = 0
|
||||
|
||||
# Determine max fill capacity
|
||||
if customer.tank_size and Decimal(customer.tank_size) > 0:
|
||||
tank_size = float(Decimal(customer.tank_size))
|
||||
max_fill = TANK_MAX_FILLS.get(tank_size, tank_size)
|
||||
else:
|
||||
# Default to legal max for common tank size (275 gallons = 240)
|
||||
max_fill = 240.0
|
||||
|
||||
# Check if this is a partial delivery
|
||||
if float(ticket.gallons_delivered) in PARTIAL_DELIVERIES:
|
||||
# Partial delivery: add to current level, cap at max_fill
|
||||
if is_budget:
|
||||
# Budget fill: ADD gallons to current level, cap at max_fill
|
||||
customer.estimated_gallons_left += ticket.gallons_delivered
|
||||
customer.estimated_gallons_left = min(customer.estimated_gallons_left, Decimal(str(max_fill)))
|
||||
else:
|
||||
# Full delivery: set to max_fill
|
||||
# Full delivery: RESET to max_fill
|
||||
customer.estimated_gallons_left = Decimal(str(max_fill))
|
||||
|
||||
# The previous day's value should match the new value on a fill day.
|
||||
customer.estimated_gallons_left_prev_day = customer.estimated_gallons_left
|
||||
customer.last_updated = date.today()
|
||||
customer.auto_status = 1 # Reactivate the customer
|
||||
customer.auto_status = 1
|
||||
|
||||
Reference in New Issue
Block a user