Basic Regression Workflow

Complete end-to-end example: data preparation → modeling → validation → deployment.

The 10-Step Workflow

Step 1: Prepare Data

CREATE TABLE sales_with_features AS
SELECT
    month_id,
    revenue,
    marketing_spend,
    team_size,
    CASE WHEN month_id IN (11, 12) THEN 1 ELSE 0 END as holiday_season
FROM raw_sales
WHERE revenue IS NOT NULL
  AND marketing_spend IS NOT NULL;

Step 2: Split Train-Test

CREATE TABLE train_data AS
SELECT * FROM sales_with_features WHERE month_id &lt;= 36;

CREATE TABLE test_data AS
SELECT * FROM sales_with_features WHERE month_id > 36;

Step 3: Check Multicollinearity

SELECT variable, vif
FROM anofox_statistics_vif(
    ARRAY['marketing_spend', 'team_size', 'holiday_season']
);
-- All VIF < 10? Continue. Otherwise, use Ridge.

Step 4: Fit OLS Model

CREATE TABLE model_results AS
SELECT
    coefficient,
    std_error,
    t_statistic,
    p_value,
    r_squared,
    adjusted_r_squared,
    aic,
    bic,
    rmse
FROM anofox_statistics_ols(
    'train_data',
    'revenue',
    ARRAY['marketing_spend', 'team_size', 'holiday_season']
);

Step 5: Validate Assumptions

-- Check normality
SELECT * FROM anofox_statistics_normality_test(residuals);

-- Check diagnostics
SELECT
    y_actual,
    y_predicted,
    cooks_distance
FROM anofox_statistics_residual_diagnostics(actual, predicted);

Step 6: Test Significance

SELECT
    'marketing_spend' as variable,
    coefficient[1] as coeff,
    p_value[1] as p_val,
    CASE WHEN p_value[1] &lt; 0.05 THEN 'Significant' ELSE 'Not significant' END as result
FROM model_results
UNION ALL
SELECT 'team_size', coefficient[2], p_value[2], ... FROM model_results
UNION ALL
SELECT 'holiday_season', coefficient[3], p_value[3], ... FROM model_results;

Step 7: Generate Test Predictions

CREATE TABLE test_predictions AS
SELECT
    test_data.*,
    pred.point_estimate as predicted_revenue,
    pred.lower_95,
    pred.upper_95,
    (test_data.revenue - pred.point_estimate) as residual
FROM test_data
CROSS JOIN LATERAL anofox_statistics_ols_predict_interval(
    'train_data',
    'revenue',
    ARRAY['marketing_spend', 'team_size', 'holiday_season'],
    test_data,
    0.95
) as pred;

Step 8: Calculate Test Metrics

SELECT
    SQRT(AVG(residual^2))::DECIMAL(10,2) as test_rmse,
    AVG(ABS(residual)/revenue)::DECIMAL(5,4) as test_mape,
    SUM(CASE WHEN revenue BETWEEN lower_95 AND upper_95 THEN 1 ELSE 0 END)
        ::FLOAT / COUNT(*)::FLOAT as coverage
FROM test_predictions;

Step 9: Compare to Baselines

SELECT
    'Naive (mean)' as model,
    SQRT(AVG((revenue - (SELECT AVG(revenue) FROM train_data))^2)) as rmse
FROM test_data
UNION ALL
SELECT
    'OLS',
    SQRT(AVG(residual^2))
FROM test_predictions;

Step 10: Deploy & Monitor

-- Save model for production
SELECT
    coefficient,
    std_error,
    CURRENT_TIMESTAMP as created_at,
    'v1.0' as model_version
INTO model_production
FROM model_results;

-- Monitor performance weekly
CREATE VIEW model_performance_weekly AS
SELECT
    DATE_TRUNC('week', date) as week,
    SQRT(AVG((revenue - prediction)^2)) as weekly_rmse,
    COUNT(*) as n_observations
FROM production_predictions
GROUP BY week
ORDER BY week DESC;

Complete Copy-Paste Script

LOAD anofox_statistics;

-- Prepare
CREATE TABLE train_data AS SELECT * FROM sales WHERE month_id <= 36;
CREATE TABLE test_data AS SELECT * FROM sales WHERE month_id > 36;

-- Fit
CREATE TABLE model AS SELECT * FROM anofox_statistics_ols(
    'train_data', 'revenue', ARRAY['marketing_spend', 'team_size']
);

-- Predict
CREATE TABLE predictions AS
SELECT test_data.*,
       pred.point_estimate as predicted
FROM test_data
CROSS JOIN LATERAL anofox_statistics_ols_predict_interval(...) pred;

-- Evaluate
SELECT SQRT(AVG((revenue - predicted)^2))::DECIMAL(10,2) as rmse
FROM predictions;

Next Steps

Grouped Analysis — Per-segment models
Model Selection — Comparing models
Production Deployment — Scaling

The 10-Step Workflow​

Step 1: Prepare Data​

Step 2: Split Train-Test​

Step 3: Check Multicollinearity​

Step 4: Fit OLS Model​

Step 5: Validate Assumptions​

Step 6: Test Significance​

Step 7: Generate Test Predictions​

Step 8: Calculate Test Metrics​

Step 9: Compare to Baselines​

Step 10: Deploy & Monitor​

Complete Copy-Paste Script​

Next Steps​

The 10-Step Workflow

Step 1: Prepare Data

Step 2: Split Train-Test

Step 3: Check Multicollinearity

Step 4: Fit OLS Model

Step 5: Validate Assumptions

Step 6: Test Significance

Step 7: Generate Test Predictions

Step 8: Calculate Test Metrics

Step 9: Compare to Baselines

Step 10: Deploy & Monitor

Complete Copy-Paste Script

Next Steps