Skip to main content

Model Selection

Compare models using AIC, BIC, and cross-validation.

Comparing Models

WITH models AS (
SELECT
'OLS' as model_name,
result.aic, result.bic, result.r_squared
FROM sales_data
APPLY anofox_statistics_ols_agg(revenue, ARRAY[spend, team_size])

UNION ALL
SELECT
'Ridge',
result.aic, result.bic, result.r_squared
FROM sales_data
APPLY anofox_statistics_ridge_agg(revenue, ARRAY[spend, team_size])
)
SELECT * FROM models ORDER BY aic;

Information Criteria

  • Lower AIC = Better trade-off between fit and complexity
  • Lower BIC = Stronger penalty for complexity

Cross-Validation

-- K-fold cross-validation
WITH folds AS (
SELECT
(ROW_NUMBER() OVER (ORDER BY date) - 1) % 5 + 1 as fold,
date, revenue, spend
FROM sales_data
),
cv_results AS (
SELECT
fold,
SQRT(AVG((revenue - pred)^2)) as fold_rmse
FROM folds f
CROSS JOIN LATERAL anofox_statistics_ols_predict_interval(...) p
WHERE f.fold != overall_fold
GROUP BY fold
)
SELECT AVG(fold_rmse) as cv_rmse FROM cv_results;

Next Steps

🍪 Cookie Settings