feat: rewrite K-factor engine with history tracking and outlier detection

Replace simple exponential smoothing with a rolling-average K-factor
system backed by a new auto_kfactor_history table. Budget fills are
detected and excluded from calculations, outliers beyond 2-sigma are
flagged, and confidence scores track data quality per customer.
Adds backfill endpoint, auto-create for missing estimation records,
and manual house_factor PUT endpoints for both auto and regular customers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:54:27 -05:00
parent 764c094eed
commit c134c05947
5 changed files with 473 additions and 105 deletions

View File

@@ -1,6 +1,6 @@
from sqlalchemy import (Column, Integer, from sqlalchemy import (Column, Integer,
DECIMAL, TEXT, DECIMAL, TEXT, Boolean,
VARCHAR, DATE, INTEGER) VARCHAR, DATE, INTEGER, Index)
from datetime import datetime from datetime import datetime
from database import Base from database import Base
@@ -57,11 +57,12 @@ class Auto_Delivery(Base):
estimated_gallons_left_prev_day = Column(DECIMAL(6, 2)) estimated_gallons_left_prev_day = Column(DECIMAL(6, 2))
tank_height = Column(VARCHAR(25)) tank_height = Column(VARCHAR(25))
tank_size = Column(VARCHAR(25)) tank_size = Column(VARCHAR(25))
house_factor = Column(DECIMAL(5, 2)) house_factor = Column(DECIMAL(7, 4))
auto_status = Column(INTEGER()) auto_status = Column(INTEGER())
open_ticket_id = Column(Integer, nullable=True) open_ticket_id = Column(Integer, nullable=True)
hot_water_summer = Column(INTEGER()) hot_water_summer = Column(INTEGER())
confidence_score = Column(INTEGER(), default=20)
k_factor_source = Column(VARCHAR(20), default='default')
@@ -92,3 +93,24 @@ class Tickets_Auto_Delivery(Base):
payment_type = Column(Integer, nullable=True) payment_type = Column(Integer, nullable=True)
payment_card_id = Column(Integer, nullable=True) payment_card_id = Column(Integer, nullable=True)
payment_status = Column(Integer, nullable=True) payment_status = Column(Integer, nullable=True)
is_budget_fill = Column(Boolean, default=False)
class KFactorHistory(Base):
__tablename__ = 'auto_kfactor_history'
id = Column(Integer, primary_key=True, autoincrement=True)
customer_id = Column(INTEGER(), nullable=False, index=True)
ticket_id = Column(Integer, nullable=True)
fill_date = Column(DATE())
gallons_delivered = Column(DECIMAL(6, 2))
total_hdd = Column(DECIMAL(8, 2))
days_in_period = Column(Integer)
k_factor = Column(DECIMAL(7, 4))
is_budget_fill = Column(Boolean, default=False)
is_outlier = Column(Boolean, default=False)
created_at = Column(DATE())
__table_args__ = (
Index('ix_auto_kfactor_history_customer_fill', 'customer_id', fill_date.desc()),
)

View File

@@ -2,15 +2,26 @@ import logging
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from database import session from database import session
from datetime import date, timedelta
from decimal import Decimal
from sqlalchemy import func
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp
from app.models.customer import Customer_Customer
from app.models.delivery import Delivery from app.models.delivery import Delivery
from app.models.auth import Auth_User from app.models.auth import Auth_User
from app.auth import get_current_user from app.auth import get_current_user
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
HOT_WATER_DAILY_USAGE = Decimal('1.0')
HDD_FORECAST_DAYS = 7
class HouseFactorUpdate(BaseModel):
house_factor: float
router = APIRouter( router = APIRouter(
@@ -20,6 +31,36 @@ router = APIRouter(
) )
def _get_avg_hdd(days: int = HDD_FORECAST_DAYS) -> Decimal:
"""Get average HDD over the last N days as a forecast proxy."""
cutoff = date.today() - timedelta(days=days)
rows = session.query(Auto_Temp.temp_avg).filter(
Auto_Temp.todays_date > cutoff
).all()
if not rows:
return Decimal('0')
total = sum(max(0, 65 - float(r.temp_avg)) for r in rows)
return Decimal(str(round(total / len(rows), 2)))
def _enrich_auto(auto_obj, avg_hdd: Decimal) -> dict:
"""Add computed gallons_per_day and days_remaining to a serialized auto delivery."""
data = jsonable_encoder(auto_obj)
k = Decimal(str(auto_obj.house_factor)) if auto_obj.house_factor else Decimal('0')
hot_water = HOT_WATER_DAILY_USAGE if auto_obj.hot_water_summer == 1 else Decimal('0')
daily_burn = k * avg_hdd + hot_water
data['gallons_per_day'] = float(round(daily_burn, 2))
data['avg_hdd'] = float(avg_hdd)
data['hot_water_summer'] = auto_obj.hot_water_summer
if daily_burn > 0 and auto_obj.estimated_gallons_left is not None:
days_left = int(auto_obj.estimated_gallons_left / daily_burn)
data['days_remaining'] = min(days_left, 999)
else:
data['days_remaining'] = 999
return data
@router.get("/all/customers", status_code=200) @router.get("/all/customers", status_code=200)
def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)): def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
@@ -31,7 +72,10 @@ def get_delivery_customers(current_user: Auth_User = Depends(get_current_user)):
.all() .all()
) )
return JSONResponse(content=jsonable_encoder(automatics), status_code=200) avg_hdd = _get_avg_hdd()
enriched = [_enrich_auto(a, avg_hdd) for a in automatics]
return JSONResponse(content=enriched, status_code=200)
@router.get("/driver/{driver_employee_id}", status_code=200) @router.get("/driver/{driver_employee_id}", status_code=200)
@@ -112,7 +156,7 @@ def get_autos_customers_extended(customer_id, current_user: Auth_User = Depends(
@router.get("/auto/customer/{customer_id}", status_code=200) @router.get("/auto/customer/{customer_id}", status_code=200)
def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends(get_current_user)): def get_auto_delivery_by_customer(customer_id: int, current_user: Auth_User = Depends(get_current_user)):
logger.info(f"GET /delivery/auto/customer/{customer_id} - User: {current_user.username}") logger.info(f"GET /delivery/auto/customer/{customer_id} - User: {current_user.username}")
get_auto_delivery = ( get_auto_delivery = (
session.query(Auto_Delivery) session.query(Auto_Delivery)
@@ -120,7 +164,47 @@ def get_auto_delivery_by_customer(customer_id, current_user: Auth_User = Depends
.first() .first()
) )
return JSONResponse(content=jsonable_encoder(get_auto_delivery), status_code=200) if not get_auto_delivery:
# Auto-create record from customer data
customer = session.query(Customer_Customer).filter(
Customer_Customer.id == customer_id
).first()
if not customer:
return JSONResponse(content={"error": "Customer not found"}, status_code=404)
# Use division average K-factor as default
div_avg = session.query(func.avg(Auto_Delivery.house_factor)).filter(
Auto_Delivery.house_factor.isnot(None),
Auto_Delivery.house_factor > 0
).scalar()
default_k = float(div_avg) if div_avg else 0.12
get_auto_delivery = Auto_Delivery(
customer_id=customer.id,
account_number=customer.account_number,
customer_town=customer.customer_town,
customer_state=customer.customer_state,
customer_address=customer.customer_address,
customer_zip=customer.customer_zip,
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
estimated_gallons_left=Decimal('100'),
estimated_gallons_left_prev_day=Decimal('100'),
tank_size='275',
house_factor=Decimal(str(round(default_k, 4))),
auto_status=1,
hot_water_summer=0,
confidence_score=20,
k_factor_source='default'
)
session.add(get_auto_delivery)
session.commit()
session.refresh(get_auto_delivery)
logger.info(f"Auto-created Auto_Delivery record for customer {customer_id}")
avg_hdd = _get_avg_hdd()
enriched = _enrich_auto(get_auto_delivery, avg_hdd)
return JSONResponse(content=enriched, status_code=200)
@router.put("/update_status/{auto_id}", status_code=200) @router.put("/update_status/{auto_id}", status_code=200)
@@ -137,3 +221,26 @@ def update_auto_status(auto_id: int, current_user: Auth_User = Depends(get_curre
session.commit() session.commit()
return {"message": "Auto status updated to 3"} return {"message": "Auto status updated to 3"}
return {"error": "Auto delivery not found"} return {"error": "Auto delivery not found"}
@router.put("/auto/customer/{customer_id}/house_factor", status_code=200)
def update_house_factor(customer_id: int, body: HouseFactorUpdate, current_user: Auth_User = Depends(get_current_user)):
logger.info(f"PUT /delivery/auto/customer/{customer_id}/house_factor - User: {current_user.username}")
auto_delivery = (
session.query(Auto_Delivery)
.filter(Auto_Delivery.customer_id == customer_id)
.first()
)
if not auto_delivery:
return JSONResponse(content={"error": "Auto delivery record not found"}, status_code=404)
auto_delivery.house_factor = Decimal(str(round(body.house_factor, 4)))
auto_delivery.k_factor_source = 'manual'
session.commit()
session.refresh(auto_delivery)
avg_hdd = _get_avg_hdd()
enriched = _enrich_auto(auto_delivery, avg_hdd)
return JSONResponse(content=enriched, status_code=200)

View File

@@ -7,9 +7,10 @@ from sqlalchemy import func
from datetime import date from datetime import date
from decimal import Decimal from decimal import Decimal
from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp from app.models.auto import Auto_Delivery, Tickets_Auto_Delivery, Auto_Temp, KFactorHistory
from app.models.delivery import Delivery from app.models.delivery import Delivery
from app.constants import DEFAULT_TANK_SIZE_GALLONS from app.constants import DEFAULT_TANK_SIZE_GALLONS
from app.script.fuel_estimator import FuelEstimator
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -213,3 +214,114 @@ def estimate_customer_gallons(update_db: int):
session.commit() session.commit()
return JSONResponse(content=jsonable_encoder(estimates)) return JSONResponse(content=jsonable_encoder(estimates))
@router.get("/backfill_kfactor_history", status_code=200)
def backfill_kfactor_history():
"""
Backfill the auto_kfactor_history table from existing ticket data.
For each auto customer with 2+ tickets, calculates K-factor for each
consecutive ticket pair and inserts into history. Then runs the rolling
K-factor calculation to set the customer's house_factor, confidence, and source.
"""
logger.info("GET /fixstuff/backfill_kfactor_history - Starting K-factor history backfill")
estimator = FuelEstimator(session=session)
auto_deliveries = session.query(Auto_Delivery).all()
stats = {
"customers_processed": 0,
"customers_skipped": 0,
"history_entries_created": 0,
"customers_updated": 0,
}
for ad in auto_deliveries:
tickets = session.query(Tickets_Auto_Delivery).filter(
Tickets_Auto_Delivery.customer_id == ad.customer_id,
Tickets_Auto_Delivery.fill_date.isnot(None)
).order_by(Tickets_Auto_Delivery.fill_date).all()
if len(tickets) < 2:
stats["customers_skipped"] += 1
# Set division avg for customers with <2 tickets
if ad.confidence_score is None:
ad.confidence_score = 20
if ad.k_factor_source is None:
ad.k_factor_source = 'default'
continue
stats["customers_processed"] += 1
# Check if this customer already has history entries
existing = session.query(KFactorHistory).filter(
KFactorHistory.customer_id == ad.customer_id
).count()
if existing > 0:
continue
for i in range(len(tickets) - 1):
prev_ticket = tickets[i]
next_ticket = tickets[i + 1]
start_date = prev_ticket.fill_date
end_date = next_ticket.fill_date
num_days = (end_date - start_date).days
if num_days <= 0:
continue
# Calculate HDD for the interval
interval_temps = session.query(Auto_Temp).filter(
Auto_Temp.todays_date > start_date,
Auto_Temp.todays_date <= end_date
).all()
total_hdd = Decimal(sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps))
if total_hdd == 0:
continue
# Hot water adjustment
total_hot_water = Decimal('0.0')
if ad.hot_water_summer == 1:
total_hot_water = Decimal(num_days) * HOT_WATER_DAILY_USAGE
gallons_for_heating = next_ticket.gallons_delivered - total_hot_water
k_factor_obs = None
if gallons_for_heating > 0 and total_hdd > 0:
k_factor_obs = gallons_for_heating / total_hdd
is_budget = estimator._is_budget_fill(next_ticket.gallons_delivered)
# Flag the ticket too
next_ticket.is_budget_fill = is_budget
history_entry = KFactorHistory(
customer_id=ad.customer_id,
ticket_id=next_ticket.id,
fill_date=next_ticket.fill_date,
gallons_delivered=next_ticket.gallons_delivered,
total_hdd=total_hdd,
days_in_period=num_days,
k_factor=k_factor_obs,
is_budget_fill=is_budget,
is_outlier=False,
created_at=date.today()
)
session.add(history_entry)
stats["history_entries_created"] += 1
# Flush so rolling calc can see the new entries
session.flush()
# Run rolling K-factor calculation to set customer values
new_k, confidence, source = estimator._calculate_rolling_k_factor(ad.customer_id)
ad.house_factor = new_k
ad.confidence_score = confidence
ad.k_factor_source = source
stats["customers_updated"] += 1
session.commit()
logger.info(f"Backfill complete: {stats}")
return JSONResponse(content=jsonable_encoder(stats))

View File

@@ -2,6 +2,7 @@ import logging
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from database import session from database import session
from sqlalchemy import func from sqlalchemy import func
from datetime import date from datetime import date
@@ -27,6 +28,9 @@ TANK_MAX_FILLS = {
} }
class HouseFactorUpdate(BaseModel):
house_factor: float
router = APIRouter( router = APIRouter(
prefix="/fixstuff_customer", prefix="/fixstuff_customer",
@@ -256,10 +260,32 @@ def estimate_customer_gallons_specific(customer_id: int):
).first() ).first()
if not customer_estimate: if not customer_estimate:
return JSONResponse(content={ # Auto-create record from customer data
"error": f"No fuel estimation data found for customer {customer_id}", customer = session.query(Customer_Customer).filter(
"solution": "Run the populate_estimates endpoint first to initialize customer data." Customer_Customer.id == customer_id
}) ).first()
if not customer:
return JSONResponse(content={"error": f"Customer {customer_id} not found"}, status_code=404)
customer_estimate = Customer_estimate_gallons(
customer_id=customer.id,
account_number=customer.account_number,
customer_town=customer.customer_town,
customer_state=customer.customer_state,
customer_address=customer.customer_address,
customer_zip=customer.customer_zip,
customer_full_name=f"{customer.customer_first_name} {customer.customer_last_name}".strip(),
estimated_gallons_left=Decimal('100'),
estimated_gallons_left_prev_day=Decimal('100'),
tank_size='275',
house_factor=Decimal('0.12'),
auto_status=1,
hot_water_summer=0
)
session.add(customer_estimate)
session.commit()
session.refresh(customer_estimate)
logger.info(f"Auto-created Customer_estimate_gallons record for customer {customer_id}")
deliveries = session.query(Delivery).filter( deliveries = session.query(Delivery).filter(
Delivery.customer_id == customer_estimate.customer_id, Delivery.customer_id == customer_estimate.customer_id,
@@ -448,3 +474,25 @@ def populate_customer_estimates():
} }
return JSONResponse(content=jsonable_encoder(result)) return JSONResponse(content=jsonable_encoder(result))
@router.put("/house_factor/{customer_id}", status_code=200)
def update_customer_house_factor(customer_id: int, body: HouseFactorUpdate):
logger.info(f"PUT /fixstuff_customer/house_factor/{customer_id}")
customer_estimate = session.query(Customer_estimate_gallons).filter(
Customer_estimate_gallons.customer_id == customer_id
).first()
if not customer_estimate:
return JSONResponse(content={"error": "Customer estimate record not found"}, status_code=404)
customer_estimate.house_factor = Decimal(str(round(body.house_factor, 4)))
session.commit()
session.refresh(customer_estimate)
return JSONResponse(content=jsonable_encoder({
"id": customer_estimate.id,
"customer_id": customer_estimate.customer_id,
"house_factor": float(customer_estimate.house_factor),
"message": "House factor updated"
}), status_code=200)

View File

@@ -3,22 +3,15 @@ from sqlalchemy.orm import Session
from sqlalchemy import func from sqlalchemy import func
from datetime import date, timedelta from datetime import date, timedelta
from decimal import Decimal from decimal import Decimal
import statistics
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Import your existing database models from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery, KFactorHistory
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery
# --- Constants for the Model --- # --- Constants ---
# This is a baseline daily usage for homes that use oil for hot water.
# A typical value is 0.5 to 1.0 gallons per day. Adjust as needed.
HOT_WATER_DAILY_USAGE = Decimal('1.0') HOT_WATER_DAILY_USAGE = Decimal('1.0')
# This determines how quickly the K-Factor adjusts.
# 0.7 means 70% weight is given to the historical factor and 30% to the new one.
# This prevents wild swings from a single unusual delivery period.
K_FACTOR_SMOOTHING_WEIGHT = Decimal('0.7')
TANK_MAX_FILLS = { TANK_MAX_FILLS = {
275: 240, 275: 240,
330: 280, 330: 280,
@@ -26,7 +19,23 @@ TANK_MAX_FILLS = {
550: 500 550: 500
} }
PARTIAL_DELIVERIES = [100, 125, 150, 200] # K-Factor rolling average settings
K_FACTOR_ROLLING_WINDOW = 5
K_FACTOR_HISTORY_SIZE = 10
OUTLIER_SIGMA_THRESHOLD = 2.0
# Budget fill detection
BUDGET_FILL_AMOUNTS = {100, 125, 150, 200}
BUDGET_FILL_TOLERANCE = 0.5
# Confidence scoring
CONFIDENCE_PER_DELIVERY = 8
CONFIDENCE_MAX = 100
CONFIDENCE_MIN = 20
CONFIDENCE_VARIANCE_PENALTY = 10
# Default K-factor when no data available
DEFAULT_K_FACTOR = Decimal('0.12')
class FuelEstimator: class FuelEstimator:
@@ -34,64 +43,117 @@ class FuelEstimator:
self.session = session self.session = session
def _get_weather_for_date(self, target_date: date) -> Auto_Temp | None: def _get_weather_for_date(self, target_date: date) -> Auto_Temp | None:
"""Helper to fetch weather data for a specific date."""
return self.session.query(Auto_Temp).filter(Auto_Temp.todays_date == target_date).first() return self.session.query(Auto_Temp).filter(Auto_Temp.todays_date == target_date).first()
def _estimate_initial_house_factor(self, customer: Auto_Delivery) -> Decimal: @staticmethod
""" def _is_budget_fill(gallons) -> bool:
Generic function to estimate initial house factor for customers with only one delivery. """Returns True if gallons is within +/-0.5 of a budget fill amount."""
This can be improved with more sophisticated logic (e.g., averaging similar customers). gal = float(gallons)
""" for amount in BUDGET_FILL_AMOUNTS:
# Default generic house factor: 0.12 gallons per degree day (average based on existing customer data) if abs(gal - amount) <= BUDGET_FILL_TOLERANCE:
# This represents typical heating usage and can be adjusted based on future data analysis return True
return Decimal('0.12') return False
def _verify_house_factor_correctness(self, customer: Auto_Delivery) -> bool: def _get_division_average_k(self) -> Decimal:
""" """Average K from all valid (non-budget, non-outlier) history entries.
Verify and correct house_factor based on delivery history. Fallback for new customers instead of hardcoded 0.12."""
Returns True if correction was made. result = self.session.query(func.avg(KFactorHistory.k_factor)).filter(
""" KFactorHistory.is_budget_fill == False,
# Count deliveries for this customer KFactorHistory.is_outlier == False,
delivery_count = self.session.query(func.count(Tickets_Auto_Delivery.id)).filter( KFactorHistory.k_factor.isnot(None),
Tickets_Auto_Delivery.customer_id == customer.customer_id KFactorHistory.k_factor > 0
).scalar() ).scalar()
if result and result > 0:
return Decimal(str(round(float(result), 4)))
return DEFAULT_K_FACTOR
corrected = False def _calculate_rolling_k_factor(self, customer_id: int):
"""Returns (k_factor, confidence_score, source).
if delivery_count <= 1: 1. Query last 10 non-budget history entries
# Customers with 0 or 1 delivery should have house_factor = 0.12 (initial average) 2. Take last 5 for rolling window
if customer.house_factor != Decimal('0.12'): 3. Calculate mean + std dev
logger.info(f"Correcting house_factor for customer {customer.customer_id} from {customer.house_factor} to 0.12 (1 or fewer deliveries)") 4. Exclude entries >2 sigma from mean, mark as outliers
customer.house_factor = Decimal('0.12') 5. Recalculate mean from filtered set
corrected = True 6. Confidence = min(100, qualifying_deliveries * 8) - penalty for high variance
# For customers with 2+ deliveries, keep their calculated factor (no correction needed) """
# Get last HISTORY_SIZE non-budget entries ordered most recent first
history = self.session.query(KFactorHistory).filter(
KFactorHistory.customer_id == customer_id,
KFactorHistory.is_budget_fill == False,
KFactorHistory.k_factor.isnot(None),
KFactorHistory.k_factor > 0
).order_by(KFactorHistory.fill_date.desc()).limit(K_FACTOR_HISTORY_SIZE).all()
return corrected if not history:
div_avg = self._get_division_average_k()
return (div_avg, CONFIDENCE_MIN, 'division_avg')
# Take last ROLLING_WINDOW for calculation
window = history[:K_FACTOR_ROLLING_WINDOW]
k_values = [float(h.k_factor) for h in window]
if len(k_values) < 2:
k = Decimal(str(round(k_values[0], 4)))
confidence = min(CONFIDENCE_MAX, CONFIDENCE_PER_DELIVERY)
return (k, max(CONFIDENCE_MIN, confidence), 'calculated')
mean_k = statistics.mean(k_values)
stdev_k = statistics.stdev(k_values)
# Mark outliers (>2 sigma from mean)
filtered = []
for h in window:
kf = float(h.k_factor)
if stdev_k > 0 and abs(kf - mean_k) > OUTLIER_SIGMA_THRESHOLD * stdev_k:
if not h.is_outlier:
h.is_outlier = True
else:
filtered.append(kf)
if h.is_outlier:
h.is_outlier = False
if not filtered:
# All were outliers - use full set
filtered = k_values
final_k = Decimal(str(round(statistics.mean(filtered), 4)))
# Confidence scoring
qualifying = len([h for h in history if not h.is_budget_fill and not h.is_outlier])
confidence = min(CONFIDENCE_MAX, qualifying * CONFIDENCE_PER_DELIVERY)
# Penalty for high variance (coefficient of variation)
if len(filtered) >= 2:
cv = statistics.stdev(filtered) / statistics.mean(filtered) if statistics.mean(filtered) > 0 else 0
if cv > 0.3:
confidence -= CONFIDENCE_VARIANCE_PENALTY * 2
elif cv > 0.15:
confidence -= CONFIDENCE_VARIANCE_PENALTY
confidence = max(CONFIDENCE_MIN, confidence)
return (final_k, confidence, 'calculated')
def run_daily_update(self): def run_daily_update(self):
""" """
Main function to run once per day. It updates the estimated fuel level Main function to run once per day. Updates estimated fuel level
for all active automatic delivery customers. The calling function must commit the session. for all active automatic delivery customers.
""" """
today = date.today() today = date.today()
# 1. Check if the update has already run today
if self.session.query(Auto_Update).filter(Auto_Update.last_updated == today).first(): if self.session.query(Auto_Update).filter(Auto_Update.last_updated == today).first():
logger.info(f"Daily update for {today} has already been completed.") logger.info(f"Daily update for {today} has already been completed.")
return {"ok": True, "message": "Update already run today."} return {"ok": True, "message": "Update already run today."}
# 2. Get today's weather data (specifically the Heating Degree Days)
todays_weather = self._get_weather_for_date(today) todays_weather = self._get_weather_for_date(today)
if not todays_weather: if not todays_weather:
logger.info(f"Error: Weather data for {today} not found. Cannot run update.") logger.info(f"Error: Weather data for {today} not found. Cannot run update.")
return {"ok": False, "message": f"Weather data for {today} not found."} return {"ok": False, "message": f"Weather data for {today} not found."}
# Degree days can't be negative for this calculation. If it's warm, HDD = 0.
degree_day = Decimal(max(0, 65 - float(todays_weather.temp_avg))) degree_day = Decimal(max(0, 65 - float(todays_weather.temp_avg)))
# 3. Get all active automatic customers
auto_customers = self.session.query(Auto_Delivery).filter( auto_customers = self.session.query(Auto_Delivery).filter(
Auto_Delivery.auto_status == 1 # Assuming 1 means active Auto_Delivery.auto_status == 1
).all() ).all()
if not auto_customers: if not auto_customers:
@@ -100,14 +162,7 @@ class FuelEstimator:
logger.info(f"Staging daily fuel update for {len(auto_customers)} customers...") logger.info(f"Staging daily fuel update for {len(auto_customers)} customers...")
corrections_made = 0
# 4. Loop through each customer and update their fuel level
for customer in auto_customers: for customer in auto_customers:
# Verify and correct house_factor if needed
if self._verify_house_factor_correctness(customer):
corrections_made += 1
heating_usage = customer.house_factor * degree_day heating_usage = customer.house_factor * degree_day
hot_water_usage = Decimal('0.0') hot_water_usage = Decimal('0.0')
@@ -123,21 +178,16 @@ class FuelEstimator:
if customer.days_since_last_fill is not None: if customer.days_since_last_fill is not None:
customer.days_since_last_fill += 1 customer.days_since_last_fill += 1
# 5. Log that today's update is complete
new_update_log = Auto_Update(last_updated=today) new_update_log = Auto_Update(last_updated=today)
self.session.add(new_update_log) self.session.add(new_update_log)
logger.info("Daily update staged. Awaiting commit.") logger.info("Daily update staged. Awaiting commit.")
message = f"Successfully staged updates for {len(auto_customers)} customers." return {"ok": True, "message": f"Successfully staged updates for {len(auto_customers)} customers."}
if corrections_made > 0:
message += f" Corrected house factors for {corrections_made} customers."
return {"ok": True, "message": message}
def refine_factor_after_delivery(self, ticket: Tickets_Auto_Delivery): def refine_factor_after_delivery(self, ticket: Tickets_Auto_Delivery):
""" """
This is the self-correction logic. It recalculates and refines the customer's Recalculates and refines the customer's K-Factor after a delivery.
K-Factor (house_factor) after a delivery. The calling function must commit the session. Uses K-factor history with rolling averages and outlier detection.
""" """
customer = self.session.query(Auto_Delivery).filter( customer = self.session.query(Auto_Delivery).filter(
Auto_Delivery.customer_id == ticket.customer_id Auto_Delivery.customer_id == ticket.customer_id
@@ -147,20 +197,31 @@ class FuelEstimator:
logger.info(f"Customer {ticket.customer_id} not found.") logger.info(f"Customer {ticket.customer_id} not found.")
return return
# 1. Detect and flag budget fill
is_budget = self._is_budget_fill(ticket.gallons_delivered)
ticket.is_budget_fill = is_budget
if is_budget:
logger.info(f"Budget fill detected for customer {ticket.customer_id}: {ticket.gallons_delivered} gal")
# 2. First delivery - no previous fill to compare against
if not customer.last_fill: if not customer.last_fill:
logger.info(f"Setting initial K-Factor for new customer {ticket.customer_id} with only one delivery.") logger.info(f"First delivery for customer {ticket.customer_id}. Setting division average K-factor.")
customer.house_factor = self._estimate_initial_house_factor(customer) div_avg = self._get_division_average_k()
self._update_tank_after_fill(customer, ticket) customer.house_factor = div_avg
customer.confidence_score = CONFIDENCE_MIN
customer.k_factor_source = 'division_avg'
self._update_tank_after_fill(customer, ticket, is_budget)
return return
start_date = customer.last_fill start_date = customer.last_fill
end_date = ticket.fill_date end_date = ticket.fill_date
if start_date >= end_date: if start_date >= end_date:
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: New fill date is not after the last one. Resetting tank only.") logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: fill date not after last fill. Resetting tank only.")
self._update_tank_after_fill(customer, ticket) self._update_tank_after_fill(customer, ticket, is_budget)
return return
# 3. Calculate HDD for interval
interval_temps = self.session.query(Auto_Temp).filter( interval_temps = self.session.query(Auto_Temp).filter(
Auto_Temp.todays_date > start_date, Auto_Temp.todays_date > start_date,
Auto_Temp.todays_date <= end_date Auto_Temp.todays_date <= end_date
@@ -168,53 +229,71 @@ class FuelEstimator:
total_degree_days = sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps) total_degree_days = sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps)
total_hdd = Decimal(total_degree_days) total_hdd = Decimal(total_degree_days)
# Hot water adjustment
num_days = (end_date - start_date).days
total_hot_water_usage = Decimal('0.0') total_hot_water_usage = Decimal('0.0')
if customer.hot_water_summer == 1: if customer.hot_water_summer == 1:
num_days = (end_date - start_date).days
total_hot_water_usage = Decimal(num_days) * HOT_WATER_DAILY_USAGE total_hot_water_usage = Decimal(num_days) * HOT_WATER_DAILY_USAGE
gallons_for_heating = ticket.gallons_delivered - total_hot_water_usage gallons_for_heating = ticket.gallons_delivered - total_hot_water_usage
if gallons_for_heating <= 0 or total_hdd == 0:
logger.info(f"Cannot calculate new K-Factor for customer {ticket.customer_id}. (HDD: {total_hdd}, Heating Gallons: {gallons_for_heating}). Resetting tank only.")
self._update_tank_after_fill(customer, ticket)
return
new_k_factor = gallons_for_heating / total_hdd # Calculate K-factor for this observation
k_factor_obs = None
if gallons_for_heating > 0 and total_hdd > 0:
k_factor_obs = gallons_for_heating / total_hdd
current_k_factor = customer.house_factor # 4. Store K-factor observation in history (even budget fills, flagged)
smoothed_k_factor = (current_k_factor * K_FACTOR_SMOOTHING_WEIGHT) + (new_k_factor * (Decimal('1.0') - K_FACTOR_SMOOTHING_WEIGHT)) history_entry = KFactorHistory(
customer_id=ticket.customer_id,
ticket_id=ticket.id,
fill_date=ticket.fill_date,
gallons_delivered=ticket.gallons_delivered,
total_hdd=total_hdd,
days_in_period=num_days,
k_factor=k_factor_obs,
is_budget_fill=is_budget,
is_outlier=False,
created_at=date.today()
)
self.session.add(history_entry)
logger.info(f"Refining K-Factor for Customer ID {customer.customer_id}:") # 5. Run rolling K-factor calculation
logger.info(f" - Old K-Factor: {current_k_factor:.4f}, New Smoothed K-Factor: {smoothed_k_factor:.4f}") # Flush so the new entry is visible to the query
self.session.flush()
new_k, confidence, source = self._calculate_rolling_k_factor(ticket.customer_id)
customer.house_factor = smoothed_k_factor logger.info(f"Refining K-Factor for Customer {ticket.customer_id}:")
self._update_tank_after_fill(customer, ticket) logger.info(f" Old K: {customer.house_factor:.4f}, New K: {new_k:.4f}, Confidence: {confidence}, Source: {source}")
# 6. Update customer
customer.house_factor = new_k
customer.confidence_score = confidence
customer.k_factor_source = source
# 7. Update tank after fill
self._update_tank_after_fill(customer, ticket, is_budget)
logger.info(f"K-Factor and tank status for Customer {customer.customer_id} staged for update.") logger.info(f"K-Factor and tank status for Customer {customer.customer_id} staged for update.")
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery): def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery, is_budget: bool = False):
"""Helper to update customer tank status after a fill-up or partial delivery.""" """Update customer tank status after a fill-up."""
customer.last_fill = ticket.fill_date customer.last_fill = ticket.fill_date
customer.days_since_last_fill = 0 customer.days_since_last_fill = 0
# Determine max fill capacity
if customer.tank_size and Decimal(customer.tank_size) > 0: if customer.tank_size and Decimal(customer.tank_size) > 0:
tank_size = float(Decimal(customer.tank_size)) tank_size = float(Decimal(customer.tank_size))
max_fill = TANK_MAX_FILLS.get(tank_size, tank_size) max_fill = TANK_MAX_FILLS.get(tank_size, tank_size)
else: else:
# Default to legal max for common tank size (275 gallons = 240)
max_fill = 240.0 max_fill = 240.0
# Check if this is a partial delivery if is_budget:
if float(ticket.gallons_delivered) in PARTIAL_DELIVERIES: # Budget fill: ADD gallons to current level, cap at max_fill
# Partial delivery: add to current level, cap at max_fill
customer.estimated_gallons_left += ticket.gallons_delivered customer.estimated_gallons_left += ticket.gallons_delivered
customer.estimated_gallons_left = min(customer.estimated_gallons_left, Decimal(str(max_fill))) customer.estimated_gallons_left = min(customer.estimated_gallons_left, Decimal(str(max_fill)))
else: else:
# Full delivery: set to max_fill # Full delivery: RESET to max_fill
customer.estimated_gallons_left = Decimal(str(max_fill)) customer.estimated_gallons_left = Decimal(str(max_fill))
# The previous day's value should match the new value on a fill day.
customer.estimated_gallons_left_prev_day = customer.estimated_gallons_left customer.estimated_gallons_left_prev_day = customer.estimated_gallons_left
customer.last_updated = date.today() customer.last_updated = date.today()
customer.auto_status = 1 # Reactivate the customer customer.auto_status = 1