feat: rewrite K-factor engine with history tracking and outlier detection
Replace simple exponential smoothing with a rolling-average K-factor system backed by a new auto_kfactor_history table. Budget fills are detected and excluded from calculations, outliers beyond 2-sigma are flagged, and confidence scores track data quality per customer. Adds backfill endpoint, auto-create for missing estimation records, and manual house_factor PUT endpoints for both auto and regular customers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,22 +3,15 @@ from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
import statistics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import your existing database models
|
||||
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery
|
||||
from app.models.auto import Auto_Delivery, Auto_Temp, Auto_Update, Tickets_Auto_Delivery, KFactorHistory
|
||||
|
||||
# --- Constants for the Model ---
|
||||
# This is a baseline daily usage for homes that use oil for hot water.
|
||||
# A typical value is 0.5 to 1.0 gallons per day. Adjust as needed.
|
||||
# --- Constants ---
|
||||
HOT_WATER_DAILY_USAGE = Decimal('1.0')
|
||||
|
||||
# This determines how quickly the K-Factor adjusts.
|
||||
# 0.7 means 70% weight is given to the historical factor and 30% to the new one.
|
||||
# This prevents wild swings from a single unusual delivery period.
|
||||
K_FACTOR_SMOOTHING_WEIGHT = Decimal('0.7')
|
||||
|
||||
TANK_MAX_FILLS = {
|
||||
275: 240,
|
||||
330: 280,
|
||||
@@ -26,7 +19,23 @@ TANK_MAX_FILLS = {
|
||||
550: 500
|
||||
}
|
||||
|
||||
PARTIAL_DELIVERIES = [100, 125, 150, 200]
|
||||
# K-Factor rolling average settings
|
||||
K_FACTOR_ROLLING_WINDOW = 5
|
||||
K_FACTOR_HISTORY_SIZE = 10
|
||||
OUTLIER_SIGMA_THRESHOLD = 2.0
|
||||
|
||||
# Budget fill detection
|
||||
BUDGET_FILL_AMOUNTS = {100, 125, 150, 200}
|
||||
BUDGET_FILL_TOLERANCE = 0.5
|
||||
|
||||
# Confidence scoring
|
||||
CONFIDENCE_PER_DELIVERY = 8
|
||||
CONFIDENCE_MAX = 100
|
||||
CONFIDENCE_MIN = 20
|
||||
CONFIDENCE_VARIANCE_PENALTY = 10
|
||||
|
||||
# Default K-factor when no data available
|
||||
DEFAULT_K_FACTOR = Decimal('0.12')
|
||||
|
||||
|
||||
class FuelEstimator:
|
||||
@@ -34,64 +43,117 @@ class FuelEstimator:
|
||||
self.session = session
|
||||
|
||||
def _get_weather_for_date(self, target_date: date) -> Auto_Temp | None:
|
||||
"""Helper to fetch weather data for a specific date."""
|
||||
return self.session.query(Auto_Temp).filter(Auto_Temp.todays_date == target_date).first()
|
||||
|
||||
def _estimate_initial_house_factor(self, customer: Auto_Delivery) -> Decimal:
|
||||
"""
|
||||
Generic function to estimate initial house factor for customers with only one delivery.
|
||||
This can be improved with more sophisticated logic (e.g., averaging similar customers).
|
||||
"""
|
||||
# Default generic house factor: 0.12 gallons per degree day (average based on existing customer data)
|
||||
# This represents typical heating usage and can be adjusted based on future data analysis
|
||||
return Decimal('0.12')
|
||||
@staticmethod
|
||||
def _is_budget_fill(gallons) -> bool:
|
||||
"""Returns True if gallons is within +/-0.5 of a budget fill amount."""
|
||||
gal = float(gallons)
|
||||
for amount in BUDGET_FILL_AMOUNTS:
|
||||
if abs(gal - amount) <= BUDGET_FILL_TOLERANCE:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _verify_house_factor_correctness(self, customer: Auto_Delivery) -> bool:
|
||||
"""
|
||||
Verify and correct house_factor based on delivery history.
|
||||
Returns True if correction was made.
|
||||
"""
|
||||
# Count deliveries for this customer
|
||||
delivery_count = self.session.query(func.count(Tickets_Auto_Delivery.id)).filter(
|
||||
Tickets_Auto_Delivery.customer_id == customer.customer_id
|
||||
def _get_division_average_k(self) -> Decimal:
|
||||
"""Average K from all valid (non-budget, non-outlier) history entries.
|
||||
Fallback for new customers instead of hardcoded 0.12."""
|
||||
result = self.session.query(func.avg(KFactorHistory.k_factor)).filter(
|
||||
KFactorHistory.is_budget_fill == False,
|
||||
KFactorHistory.is_outlier == False,
|
||||
KFactorHistory.k_factor.isnot(None),
|
||||
KFactorHistory.k_factor > 0
|
||||
).scalar()
|
||||
if result and result > 0:
|
||||
return Decimal(str(round(float(result), 4)))
|
||||
return DEFAULT_K_FACTOR
|
||||
|
||||
corrected = False
|
||||
def _calculate_rolling_k_factor(self, customer_id: int):
|
||||
"""Returns (k_factor, confidence_score, source).
|
||||
|
||||
if delivery_count <= 1:
|
||||
# Customers with 0 or 1 delivery should have house_factor = 0.12 (initial average)
|
||||
if customer.house_factor != Decimal('0.12'):
|
||||
logger.info(f"Correcting house_factor for customer {customer.customer_id} from {customer.house_factor} to 0.12 (1 or fewer deliveries)")
|
||||
customer.house_factor = Decimal('0.12')
|
||||
corrected = True
|
||||
# For customers with 2+ deliveries, keep their calculated factor (no correction needed)
|
||||
1. Query last 10 non-budget history entries
|
||||
2. Take last 5 for rolling window
|
||||
3. Calculate mean + std dev
|
||||
4. Exclude entries >2 sigma from mean, mark as outliers
|
||||
5. Recalculate mean from filtered set
|
||||
6. Confidence = min(100, qualifying_deliveries * 8) - penalty for high variance
|
||||
"""
|
||||
# Get last HISTORY_SIZE non-budget entries ordered most recent first
|
||||
history = self.session.query(KFactorHistory).filter(
|
||||
KFactorHistory.customer_id == customer_id,
|
||||
KFactorHistory.is_budget_fill == False,
|
||||
KFactorHistory.k_factor.isnot(None),
|
||||
KFactorHistory.k_factor > 0
|
||||
).order_by(KFactorHistory.fill_date.desc()).limit(K_FACTOR_HISTORY_SIZE).all()
|
||||
|
||||
return corrected
|
||||
if not history:
|
||||
div_avg = self._get_division_average_k()
|
||||
return (div_avg, CONFIDENCE_MIN, 'division_avg')
|
||||
|
||||
# Take last ROLLING_WINDOW for calculation
|
||||
window = history[:K_FACTOR_ROLLING_WINDOW]
|
||||
k_values = [float(h.k_factor) for h in window]
|
||||
|
||||
if len(k_values) < 2:
|
||||
k = Decimal(str(round(k_values[0], 4)))
|
||||
confidence = min(CONFIDENCE_MAX, CONFIDENCE_PER_DELIVERY)
|
||||
return (k, max(CONFIDENCE_MIN, confidence), 'calculated')
|
||||
|
||||
mean_k = statistics.mean(k_values)
|
||||
stdev_k = statistics.stdev(k_values)
|
||||
|
||||
# Mark outliers (>2 sigma from mean)
|
||||
filtered = []
|
||||
for h in window:
|
||||
kf = float(h.k_factor)
|
||||
if stdev_k > 0 and abs(kf - mean_k) > OUTLIER_SIGMA_THRESHOLD * stdev_k:
|
||||
if not h.is_outlier:
|
||||
h.is_outlier = True
|
||||
else:
|
||||
filtered.append(kf)
|
||||
if h.is_outlier:
|
||||
h.is_outlier = False
|
||||
|
||||
if not filtered:
|
||||
# All were outliers - use full set
|
||||
filtered = k_values
|
||||
|
||||
final_k = Decimal(str(round(statistics.mean(filtered), 4)))
|
||||
|
||||
# Confidence scoring
|
||||
qualifying = len([h for h in history if not h.is_budget_fill and not h.is_outlier])
|
||||
confidence = min(CONFIDENCE_MAX, qualifying * CONFIDENCE_PER_DELIVERY)
|
||||
|
||||
# Penalty for high variance (coefficient of variation)
|
||||
if len(filtered) >= 2:
|
||||
cv = statistics.stdev(filtered) / statistics.mean(filtered) if statistics.mean(filtered) > 0 else 0
|
||||
if cv > 0.3:
|
||||
confidence -= CONFIDENCE_VARIANCE_PENALTY * 2
|
||||
elif cv > 0.15:
|
||||
confidence -= CONFIDENCE_VARIANCE_PENALTY
|
||||
|
||||
confidence = max(CONFIDENCE_MIN, confidence)
|
||||
return (final_k, confidence, 'calculated')
|
||||
|
||||
def run_daily_update(self):
|
||||
"""
|
||||
Main function to run once per day. It updates the estimated fuel level
|
||||
for all active automatic delivery customers. The calling function must commit the session.
|
||||
Main function to run once per day. Updates estimated fuel level
|
||||
for all active automatic delivery customers.
|
||||
"""
|
||||
today = date.today()
|
||||
|
||||
# 1. Check if the update has already run today
|
||||
if self.session.query(Auto_Update).filter(Auto_Update.last_updated == today).first():
|
||||
logger.info(f"Daily update for {today} has already been completed.")
|
||||
return {"ok": True, "message": "Update already run today."}
|
||||
|
||||
# 2. Get today's weather data (specifically the Heating Degree Days)
|
||||
todays_weather = self._get_weather_for_date(today)
|
||||
if not todays_weather:
|
||||
logger.info(f"Error: Weather data for {today} not found. Cannot run update.")
|
||||
return {"ok": False, "message": f"Weather data for {today} not found."}
|
||||
|
||||
# Degree days can't be negative for this calculation. If it's warm, HDD = 0.
|
||||
degree_day = Decimal(max(0, 65 - float(todays_weather.temp_avg)))
|
||||
|
||||
# 3. Get all active automatic customers
|
||||
auto_customers = self.session.query(Auto_Delivery).filter(
|
||||
Auto_Delivery.auto_status == 1 # Assuming 1 means active
|
||||
Auto_Delivery.auto_status == 1
|
||||
).all()
|
||||
|
||||
if not auto_customers:
|
||||
@@ -100,14 +162,7 @@ class FuelEstimator:
|
||||
|
||||
logger.info(f"Staging daily fuel update for {len(auto_customers)} customers...")
|
||||
|
||||
corrections_made = 0
|
||||
|
||||
# 4. Loop through each customer and update their fuel level
|
||||
for customer in auto_customers:
|
||||
# Verify and correct house_factor if needed
|
||||
if self._verify_house_factor_correctness(customer):
|
||||
corrections_made += 1
|
||||
|
||||
heating_usage = customer.house_factor * degree_day
|
||||
|
||||
hot_water_usage = Decimal('0.0')
|
||||
@@ -123,21 +178,16 @@ class FuelEstimator:
|
||||
if customer.days_since_last_fill is not None:
|
||||
customer.days_since_last_fill += 1
|
||||
|
||||
# 5. Log that today's update is complete
|
||||
new_update_log = Auto_Update(last_updated=today)
|
||||
self.session.add(new_update_log)
|
||||
|
||||
logger.info("Daily update staged. Awaiting commit.")
|
||||
message = f"Successfully staged updates for {len(auto_customers)} customers."
|
||||
if corrections_made > 0:
|
||||
message += f" Corrected house factors for {corrections_made} customers."
|
||||
|
||||
return {"ok": True, "message": message}
|
||||
return {"ok": True, "message": f"Successfully staged updates for {len(auto_customers)} customers."}
|
||||
|
||||
def refine_factor_after_delivery(self, ticket: Tickets_Auto_Delivery):
|
||||
"""
|
||||
This is the self-correction logic. It recalculates and refines the customer's
|
||||
K-Factor (house_factor) after a delivery. The calling function must commit the session.
|
||||
Recalculates and refines the customer's K-Factor after a delivery.
|
||||
Uses K-factor history with rolling averages and outlier detection.
|
||||
"""
|
||||
customer = self.session.query(Auto_Delivery).filter(
|
||||
Auto_Delivery.customer_id == ticket.customer_id
|
||||
@@ -147,74 +197,103 @@ class FuelEstimator:
|
||||
logger.info(f"Customer {ticket.customer_id} not found.")
|
||||
return
|
||||
|
||||
# 1. Detect and flag budget fill
|
||||
is_budget = self._is_budget_fill(ticket.gallons_delivered)
|
||||
ticket.is_budget_fill = is_budget
|
||||
if is_budget:
|
||||
logger.info(f"Budget fill detected for customer {ticket.customer_id}: {ticket.gallons_delivered} gal")
|
||||
|
||||
# 2. First delivery - no previous fill to compare against
|
||||
if not customer.last_fill:
|
||||
logger.info(f"Setting initial K-Factor for new customer {ticket.customer_id} with only one delivery.")
|
||||
customer.house_factor = self._estimate_initial_house_factor(customer)
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
logger.info(f"First delivery for customer {ticket.customer_id}. Setting division average K-factor.")
|
||||
div_avg = self._get_division_average_k()
|
||||
customer.house_factor = div_avg
|
||||
customer.confidence_score = CONFIDENCE_MIN
|
||||
customer.k_factor_source = 'division_avg'
|
||||
self._update_tank_after_fill(customer, ticket, is_budget)
|
||||
return
|
||||
|
||||
start_date = customer.last_fill
|
||||
end_date = ticket.fill_date
|
||||
|
||||
|
||||
if start_date >= end_date:
|
||||
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: New fill date is not after the last one. Resetting tank only.")
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
logger.info(f"Cannot refine K-Factor for customer {ticket.customer_id}: fill date not after last fill. Resetting tank only.")
|
||||
self._update_tank_after_fill(customer, ticket, is_budget)
|
||||
return
|
||||
|
||||
# 3. Calculate HDD for interval
|
||||
interval_temps = self.session.query(Auto_Temp).filter(
|
||||
Auto_Temp.todays_date > start_date,
|
||||
Auto_Temp.todays_date <= end_date
|
||||
).all()
|
||||
total_degree_days = sum(max(0, 65 - float(temp.temp_avg)) for temp in interval_temps)
|
||||
total_hdd = Decimal(total_degree_days)
|
||||
|
||||
|
||||
# Hot water adjustment
|
||||
num_days = (end_date - start_date).days
|
||||
total_hot_water_usage = Decimal('0.0')
|
||||
if customer.hot_water_summer == 1:
|
||||
num_days = (end_date - start_date).days
|
||||
total_hot_water_usage = Decimal(num_days) * HOT_WATER_DAILY_USAGE
|
||||
|
||||
gallons_for_heating = ticket.gallons_delivered - total_hot_water_usage
|
||||
if gallons_for_heating <= 0 or total_hdd == 0:
|
||||
logger.info(f"Cannot calculate new K-Factor for customer {ticket.customer_id}. (HDD: {total_hdd}, Heating Gallons: {gallons_for_heating}). Resetting tank only.")
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
return
|
||||
|
||||
new_k_factor = gallons_for_heating / total_hdd
|
||||
|
||||
current_k_factor = customer.house_factor
|
||||
smoothed_k_factor = (current_k_factor * K_FACTOR_SMOOTHING_WEIGHT) + (new_k_factor * (Decimal('1.0') - K_FACTOR_SMOOTHING_WEIGHT))
|
||||
|
||||
logger.info(f"Refining K-Factor for Customer ID {customer.customer_id}:")
|
||||
logger.info(f" - Old K-Factor: {current_k_factor:.4f}, New Smoothed K-Factor: {smoothed_k_factor:.4f}")
|
||||
# Calculate K-factor for this observation
|
||||
k_factor_obs = None
|
||||
if gallons_for_heating > 0 and total_hdd > 0:
|
||||
k_factor_obs = gallons_for_heating / total_hdd
|
||||
|
||||
# 4. Store K-factor observation in history (even budget fills, flagged)
|
||||
history_entry = KFactorHistory(
|
||||
customer_id=ticket.customer_id,
|
||||
ticket_id=ticket.id,
|
||||
fill_date=ticket.fill_date,
|
||||
gallons_delivered=ticket.gallons_delivered,
|
||||
total_hdd=total_hdd,
|
||||
days_in_period=num_days,
|
||||
k_factor=k_factor_obs,
|
||||
is_budget_fill=is_budget,
|
||||
is_outlier=False,
|
||||
created_at=date.today()
|
||||
)
|
||||
self.session.add(history_entry)
|
||||
|
||||
# 5. Run rolling K-factor calculation
|
||||
# Flush so the new entry is visible to the query
|
||||
self.session.flush()
|
||||
new_k, confidence, source = self._calculate_rolling_k_factor(ticket.customer_id)
|
||||
|
||||
logger.info(f"Refining K-Factor for Customer {ticket.customer_id}:")
|
||||
logger.info(f" Old K: {customer.house_factor:.4f}, New K: {new_k:.4f}, Confidence: {confidence}, Source: {source}")
|
||||
|
||||
# 6. Update customer
|
||||
customer.house_factor = new_k
|
||||
customer.confidence_score = confidence
|
||||
customer.k_factor_source = source
|
||||
|
||||
# 7. Update tank after fill
|
||||
self._update_tank_after_fill(customer, ticket, is_budget)
|
||||
|
||||
customer.house_factor = smoothed_k_factor
|
||||
self._update_tank_after_fill(customer, ticket)
|
||||
|
||||
logger.info(f"K-Factor and tank status for Customer {customer.customer_id} staged for update.")
|
||||
|
||||
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery):
|
||||
"""Helper to update customer tank status after a fill-up or partial delivery."""
|
||||
def _update_tank_after_fill(self, customer: Auto_Delivery, ticket: Tickets_Auto_Delivery, is_budget: bool = False):
|
||||
"""Update customer tank status after a fill-up."""
|
||||
customer.last_fill = ticket.fill_date
|
||||
customer.days_since_last_fill = 0
|
||||
|
||||
# Determine max fill capacity
|
||||
if customer.tank_size and Decimal(customer.tank_size) > 0:
|
||||
tank_size = float(Decimal(customer.tank_size))
|
||||
max_fill = TANK_MAX_FILLS.get(tank_size, tank_size)
|
||||
else:
|
||||
# Default to legal max for common tank size (275 gallons = 240)
|
||||
max_fill = 240.0
|
||||
|
||||
# Check if this is a partial delivery
|
||||
if float(ticket.gallons_delivered) in PARTIAL_DELIVERIES:
|
||||
# Partial delivery: add to current level, cap at max_fill
|
||||
if is_budget:
|
||||
# Budget fill: ADD gallons to current level, cap at max_fill
|
||||
customer.estimated_gallons_left += ticket.gallons_delivered
|
||||
customer.estimated_gallons_left = min(customer.estimated_gallons_left, Decimal(str(max_fill)))
|
||||
else:
|
||||
# Full delivery: set to max_fill
|
||||
# Full delivery: RESET to max_fill
|
||||
customer.estimated_gallons_left = Decimal(str(max_fill))
|
||||
|
||||
# The previous day's value should match the new value on a fill day.
|
||||
customer.estimated_gallons_left_prev_day = customer.estimated_gallons_left
|
||||
customer.last_updated = date.today()
|
||||
customer.auto_status = 1 # Reactivate the customer
|
||||
customer.auto_status = 1
|
||||
|
||||
Reference in New Issue
Block a user