Model Selection
Compare models using AIC, BIC, and cross-validation.
Comparing Models
WITH models AS (
SELECT
'OLS' as model_name,
result.aic, result.bic, result.r_squared
FROM sales_data
APPLY anofox_statistics_ols_agg(revenue, ARRAY[spend, team_size])
UNION ALL
SELECT
'Ridge',
result.aic, result.bic, result.r_squared
FROM sales_data
APPLY anofox_statistics_ridge_agg(revenue, ARRAY[spend, team_size])
)
SELECT * FROM models ORDER BY aic;
Information Criteria
- Lower AIC = Better trade-off between fit and complexity
- Lower BIC = Stronger penalty for complexity
Cross-Validation
-- K-fold cross-validation
WITH folds AS (
SELECT
(ROW_NUMBER() OVER (ORDER BY date) - 1) % 5 + 1 as fold,
date, revenue, spend
FROM sales_data
),
cv_results AS (
SELECT
fold,
SQRT(AVG((revenue - pred)^2)) as fold_rmse
FROM folds f
CROSS JOIN LATERAL anofox_statistics_ols_predict_interval(...) p
WHERE f.fold != overall_fold
GROUP BY fold
)
SELECT AVG(fold_rmse) as cv_rmse FROM cv_results;
Next Steps
- Handling Multicollinearity — Ridge regression
- Production Deployment — Deploy best model