-- Fix for the overdue percentage calculation -- Original line 32 had syntax error -- CORRECTED Query 1: WITH customer_stats AS ( SELECT cust_number, COUNT(*) as total_invoices, COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) as cleared_count, AVG(days_to_clear) as avg_days, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days, STDDEV(days_to_clear) as std_days, MIN(days_to_clear) as min_days, MAX(days_to_clear) as max_days, AVG(total_open_amount) as avg_amount, SUM(total_open_amount) as total_amount, -- FIXED: Overdue percentage calculation CASE WHEN COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) > 0 THEN (CAST(COUNT(CASE WHEN is_overdue = TRUE THEN 1 END) AS NUMERIC) / CAST(COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) AS NUMERIC) * 100) ELSE 0.0 END as pct_overdue, (SELECT cust_payment_terms FROM invoices_history WHERE cust_number = $1 GROUP BY cust_payment_terms ORDER BY COUNT(*) DESC LIMIT 1) as most_common_payment_term, (SELECT business_code FROM invoices_history WHERE cust_number = $1 GROUP BY business_code ORDER BY COUNT(*) DESC LIMIT 1) as most_common_business_code, (SELECT invoice_currency FROM invoices_history WHERE cust_number = $1 GROUP BY invoice_currency ORDER BY COUNT(*) DESC LIMIT 1) as most_common_currency FROM invoices_history WHERE cust_number = $1 GROUP BY cust_number ) SELECT cust_number, total_invoices as cust_invoice_count, cleared_count as cust_cleared_count, ROUND(avg_days, 2) as cust_avg_days, ROUND(median_days, 2) as cust_median_days, ROUND(COALESCE(std_days, 0), 2) as cust_std_days, min_days as cust_min_days, max_days as cust_max_days, ROUND(avg_amount, 2) as cust_avg_amount, ROUND(total_amount, 2) as cust_total_amount, ROUND(pct_overdue, 2) as cust_pct_overdue, most_common_payment_term, most_common_business_code, most_common_currency FROM customer_stats; -- ============================================ -- QUERY 2: Batch Compute All Customer Aggregates -- Usage: Nightly ETL job -- ============================================ -- Name: compute_all_customer_aggregates -- Description: Computes aggregates for ALL customers with cleared invoices WITH customer_stats AS ( SELECT cust_number, COUNT(*) as total_invoices, COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) as cleared_count, AVG(days_to_clear) as avg_days, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days, STDDEV(days_to_clear) as std_days, MIN(days_to_clear) as min_days, MAX(days_to_clear) as max_days, AVG(total_open_amount) as avg_amount, SUM(total_open_amount) as total_amount, COUNT(CASE WHEN is_overdue = TRUE THEN 1 END)::NUMERIC / NULLIF(COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END), 0) * 100 as pct_overdue, MODE() WITHIN GROUP (ORDER BY cust_payment_terms) as most_common_payment_term, MODE() WITHIN GROUP (ORDER BY business_code) as most_common_business_code, MODE() WITHIN GROUP (ORDER BY invoice_currency) as most_common_currency FROM invoices_history WHERE clear_date IS NOT NULL -- Only customers with history GROUP BY cust_number HAVING COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) > 0 ) SELECT cust_number, total_invoices as cust_invoice_count, cleared_count as cust_cleared_count, ROUND(avg_days, 2) as cust_avg_days, ROUND(median_days, 2) as cust_median_days, ROUND(COALESCE(std_days, 0), 2) as cust_std_days, min_days as cust_min_days, max_days as cust_max_days, ROUND(avg_amount, 2) as cust_avg_amount, ROUND(total_amount, 2) as cust_total_amount, ROUND(COALESCE(pct_overdue, 0), 2) as cust_pct_overdue, most_common_payment_term, most_common_business_code, most_common_currency, NOW() as last_computed_at FROM customer_stats; -- ============================================ -- QUERY 3: Upsert Customer Aggregates -- Usage: Insert or update customer_aggregates table -- ============================================ -- Name: upsert_customer_aggregates -- Description: Insert/update aggregates with conflict handling -- Parameters: All customer aggregate fields INSERT INTO customer_aggregates ( cust_number, cust_invoice_count, cust_cleared_count, cust_avg_days, cust_median_days, cust_std_days, cust_min_days, cust_max_days, cust_avg_amount, cust_total_amount, cust_pct_overdue, most_common_payment_term, most_common_business_code, most_common_currency, last_computed_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, NOW() ) ON CONFLICT (cust_number) DO UPDATE SET cust_invoice_count = EXCLUDED.cust_invoice_count, cust_cleared_count = EXCLUDED.cust_cleared_count, cust_avg_days = EXCLUDED.cust_avg_days, cust_median_days = EXCLUDED.cust_median_days, cust_std_days = EXCLUDED.cust_std_days, cust_min_days = EXCLUDED.cust_min_days, cust_max_days = EXCLUDED.cust_max_days, cust_avg_amount = EXCLUDED.cust_avg_amount, cust_total_amount = EXCLUDED.cust_total_amount, cust_pct_overdue = EXCLUDED.cust_pct_overdue, most_common_payment_term = EXCLUDED.most_common_payment_term, most_common_business_code = EXCLUDED.most_common_business_code, most_common_currency = EXCLUDED.most_common_currency, last_computed_at = NOW(); -- ============================================ -- QUERY 4: Compute Payment Terms Aggregates -- Usage: Pre-compute payment term statistics -- ============================================ -- Name: compute_payment_terms_aggregates WITH payment_stats AS ( SELECT cust_payment_terms, AVG(days_to_clear) as avg_days, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days, COUNT(*) as invoice_count FROM invoices_history WHERE clear_date IS NOT NULL AND cust_payment_terms IS NOT NULL GROUP BY cust_payment_terms ) SELECT cust_payment_terms, ROUND(avg_days, 2) as payment_terms_avg_days, ROUND(median_days, 2) as payment_terms_median_days, invoice_count as payment_terms_count, NOW() as last_computed_at FROM payment_stats; -- ============================================ -- QUERY 5: Compute Business Code Aggregates -- Usage: Pre-compute business code statistics -- ============================================ -- Name: compute_business_code_aggregates WITH business_stats AS ( SELECT business_code, AVG(days_to_clear) as avg_days, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days, COUNT(*) as invoice_count FROM invoices_history WHERE clear_date IS NOT NULL AND business_code IS NOT NULL GROUP BY business_code ) SELECT business_code, ROUND(avg_days, 2) as business_avg_days, ROUND(median_days, 2) as business_median_days, invoice_count as business_count, NOW() as last_computed_at FROM business_stats; -- ============================================ -- QUERY 6: Get Customer Features (for inference) -- Usage: Retrieve all features for a customer -- ============================================ -- Name: get_customer_features -- Description: Get customer aggregates for prediction -- Parameters: $1 = cust_number SELECT cust_number, cust_invoice_count, cust_cleared_count, cust_avg_days, cust_median_days, cust_std_days, cust_min_days, cust_max_days, cust_avg_amount, cust_total_amount, cust_pct_overdue, most_common_payment_term, most_common_business_code, most_common_currency, last_computed_at FROM customer_aggregates WHERE cust_number = $1; -- ============================================ -- QUERY 7: Get Payment Terms Features -- Usage: Retrieve payment term stats -- ============================================ -- Name: get_payment_terms_features -- Parameters: $1 = cust_payment_terms SELECT cust_payment_terms, payment_terms_avg_days, payment_terms_median_days, payment_terms_count FROM payment_terms_aggregates WHERE cust_payment_terms = $1; -- ============================================ -- QUERY 8: Get Business Code Features -- Usage: Retrieve business code stats -- ============================================ -- Name: get_business_code_features -- Parameters: $1 = business_code SELECT business_code, business_avg_days, business_median_days, business_count FROM business_code_aggregates WHERE business_code = $1; -- ============================================ -- QUERY 9: Insert Invoice (with upsert) -- Usage: Ingest new invoice data -- ============================================ -- Name: upsert_invoice -- Parameters: All invoice fields INSERT INTO invoices_history ( invoice_id, business_code, cust_number, name_customer, posting_date, document_create_date, document_create_date_alt, due_in_date, baseline_create_date, clear_date, total_open_amount, invoice_currency, document_type, cust_payment_terms, posting_id, business_year ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16 ) ON CONFLICT (invoice_id) DO UPDATE SET clear_date = EXCLUDED.clear_date, is_open = EXCLUDED.is_open, updated_at = NOW(); -- ============================================ -- QUERY 10: Insert Prediction Log -- Usage: Record prediction for monitoring -- ============================================ -- Name: insert_prediction_log -- Parameters: prediction fields INSERT INTO predictions_log ( invoice_id, cust_number, posting_date, total_open_amount, business_code, cust_payment_terms, features_json, predicted_days_to_clear, predicted_clear_date, model_version, model_path ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11 ) RETURNING prediction_id; -- ============================================ -- QUERY 11: Update Prediction with Actual Outcome -- Usage: Record actual outcome for model monitoring -- ============================================ -- Name: update_prediction_outcome -- Parameters: $1 = prediction_id, $2 = actual_clear_date UPDATE predictions_log SET actual_clear_date = $2, actual_days_to_clear = EXTRACT(DAY FROM ($2 - posting_date))::INTEGER, prediction_error = EXTRACT(DAY FROM ($2 - posting_date))::INTEGER - predicted_days_to_clear, absolute_error = ABS(EXTRACT(DAY FROM ($2 - posting_date))::INTEGER - predicted_days_to_clear), outcome_recorded_at = NOW() WHERE prediction_id = $1; -- ============================================ -- QUERY 12: Get Recent Predictions Performance -- Usage: Monitor model accuracy -- ============================================ -- Name: get_prediction_metrics -- Description: Calculate model performance over last N days -- Parameters: $1 = days_back (e.g., 30) SELECT COUNT(*) as total_predictions, COUNT(actual_days_to_clear) as predictions_with_outcome, ROUND(AVG(ABS(prediction_error)), 2) as mae, ROUND(SQRT(AVG(prediction_error * prediction_error)), 2) as rmse, ROUND(AVG(CASE WHEN ABS(prediction_error) <= 3 THEN 1.0 ELSE 0.0 END) * 100, 2) as pct_within_3_days, ROUND(AVG(CASE WHEN ABS(prediction_error) <= 7 THEN 1.0 ELSE 0.0 END) * 100, 2) as pct_within_7_days FROM predictions_log WHERE predicted_at >= NOW() - INTERVAL '$1 days' AND actual_days_to_clear IS NOT NULL; -- ============================================ -- End of Query Templates -- ============================================