Dipan04's picture
Deploy Invoice Digitization Agent
8a859a8
-- Fix for the overdue percentage calculation
-- Original line 32 had syntax error
-- CORRECTED Query 1:
WITH customer_stats AS (
SELECT
cust_number,
COUNT(*) as total_invoices,
COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) as cleared_count,
AVG(days_to_clear) as avg_days,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days,
STDDEV(days_to_clear) as std_days,
MIN(days_to_clear) as min_days,
MAX(days_to_clear) as max_days,
AVG(total_open_amount) as avg_amount,
SUM(total_open_amount) as total_amount,
-- FIXED: Overdue percentage calculation
CASE
WHEN COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) > 0
THEN (CAST(COUNT(CASE WHEN is_overdue = TRUE THEN 1 END) AS NUMERIC) /
CAST(COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) AS NUMERIC) * 100)
ELSE 0.0
END as pct_overdue,
(SELECT cust_payment_terms FROM invoices_history WHERE cust_number = $1 GROUP BY cust_payment_terms ORDER BY COUNT(*) DESC LIMIT 1) as most_common_payment_term,
(SELECT business_code FROM invoices_history WHERE cust_number = $1 GROUP BY business_code ORDER BY COUNT(*) DESC LIMIT 1) as most_common_business_code,
(SELECT invoice_currency FROM invoices_history WHERE cust_number = $1 GROUP BY invoice_currency ORDER BY COUNT(*) DESC LIMIT 1) as most_common_currency
FROM invoices_history
WHERE cust_number = $1
GROUP BY cust_number
)
SELECT
cust_number,
total_invoices as cust_invoice_count,
cleared_count as cust_cleared_count,
ROUND(avg_days, 2) as cust_avg_days,
ROUND(median_days, 2) as cust_median_days,
ROUND(COALESCE(std_days, 0), 2) as cust_std_days,
min_days as cust_min_days,
max_days as cust_max_days,
ROUND(avg_amount, 2) as cust_avg_amount,
ROUND(total_amount, 2) as cust_total_amount,
ROUND(pct_overdue, 2) as cust_pct_overdue,
most_common_payment_term,
most_common_business_code,
most_common_currency
FROM customer_stats;
-- ============================================
-- QUERY 2: Batch Compute All Customer Aggregates
-- Usage: Nightly ETL job
-- ============================================
-- Name: compute_all_customer_aggregates
-- Description: Computes aggregates for ALL customers with cleared invoices
WITH customer_stats AS (
SELECT
cust_number,
COUNT(*) as total_invoices,
COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) as cleared_count,
AVG(days_to_clear) as avg_days,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days,
STDDEV(days_to_clear) as std_days,
MIN(days_to_clear) as min_days,
MAX(days_to_clear) as max_days,
AVG(total_open_amount) as avg_amount,
SUM(total_open_amount) as total_amount,
COUNT(CASE WHEN is_overdue = TRUE THEN 1 END)::NUMERIC /
NULLIF(COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END), 0) * 100 as pct_overdue,
MODE() WITHIN GROUP (ORDER BY cust_payment_terms) as most_common_payment_term,
MODE() WITHIN GROUP (ORDER BY business_code) as most_common_business_code,
MODE() WITHIN GROUP (ORDER BY invoice_currency) as most_common_currency
FROM invoices_history
WHERE clear_date IS NOT NULL -- Only customers with history
GROUP BY cust_number
HAVING COUNT(CASE WHEN clear_date IS NOT NULL THEN 1 END) > 0
)
SELECT
cust_number,
total_invoices as cust_invoice_count,
cleared_count as cust_cleared_count,
ROUND(avg_days, 2) as cust_avg_days,
ROUND(median_days, 2) as cust_median_days,
ROUND(COALESCE(std_days, 0), 2) as cust_std_days,
min_days as cust_min_days,
max_days as cust_max_days,
ROUND(avg_amount, 2) as cust_avg_amount,
ROUND(total_amount, 2) as cust_total_amount,
ROUND(COALESCE(pct_overdue, 0), 2) as cust_pct_overdue,
most_common_payment_term,
most_common_business_code,
most_common_currency,
NOW() as last_computed_at
FROM customer_stats;
-- ============================================
-- QUERY 3: Upsert Customer Aggregates
-- Usage: Insert or update customer_aggregates table
-- ============================================
-- Name: upsert_customer_aggregates
-- Description: Insert/update aggregates with conflict handling
-- Parameters: All customer aggregate fields
INSERT INTO customer_aggregates (
cust_number,
cust_invoice_count,
cust_cleared_count,
cust_avg_days,
cust_median_days,
cust_std_days,
cust_min_days,
cust_max_days,
cust_avg_amount,
cust_total_amount,
cust_pct_overdue,
most_common_payment_term,
most_common_business_code,
most_common_currency,
last_computed_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, NOW()
)
ON CONFLICT (cust_number)
DO UPDATE SET
cust_invoice_count = EXCLUDED.cust_invoice_count,
cust_cleared_count = EXCLUDED.cust_cleared_count,
cust_avg_days = EXCLUDED.cust_avg_days,
cust_median_days = EXCLUDED.cust_median_days,
cust_std_days = EXCLUDED.cust_std_days,
cust_min_days = EXCLUDED.cust_min_days,
cust_max_days = EXCLUDED.cust_max_days,
cust_avg_amount = EXCLUDED.cust_avg_amount,
cust_total_amount = EXCLUDED.cust_total_amount,
cust_pct_overdue = EXCLUDED.cust_pct_overdue,
most_common_payment_term = EXCLUDED.most_common_payment_term,
most_common_business_code = EXCLUDED.most_common_business_code,
most_common_currency = EXCLUDED.most_common_currency,
last_computed_at = NOW();
-- ============================================
-- QUERY 4: Compute Payment Terms Aggregates
-- Usage: Pre-compute payment term statistics
-- ============================================
-- Name: compute_payment_terms_aggregates
WITH payment_stats AS (
SELECT
cust_payment_terms,
AVG(days_to_clear) as avg_days,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days,
COUNT(*) as invoice_count
FROM invoices_history
WHERE clear_date IS NOT NULL
AND cust_payment_terms IS NOT NULL
GROUP BY cust_payment_terms
)
SELECT
cust_payment_terms,
ROUND(avg_days, 2) as payment_terms_avg_days,
ROUND(median_days, 2) as payment_terms_median_days,
invoice_count as payment_terms_count,
NOW() as last_computed_at
FROM payment_stats;
-- ============================================
-- QUERY 5: Compute Business Code Aggregates
-- Usage: Pre-compute business code statistics
-- ============================================
-- Name: compute_business_code_aggregates
WITH business_stats AS (
SELECT
business_code,
AVG(days_to_clear) as avg_days,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY days_to_clear) as median_days,
COUNT(*) as invoice_count
FROM invoices_history
WHERE clear_date IS NOT NULL
AND business_code IS NOT NULL
GROUP BY business_code
)
SELECT
business_code,
ROUND(avg_days, 2) as business_avg_days,
ROUND(median_days, 2) as business_median_days,
invoice_count as business_count,
NOW() as last_computed_at
FROM business_stats;
-- ============================================
-- QUERY 6: Get Customer Features (for inference)
-- Usage: Retrieve all features for a customer
-- ============================================
-- Name: get_customer_features
-- Description: Get customer aggregates for prediction
-- Parameters: $1 = cust_number
SELECT
cust_number,
cust_invoice_count,
cust_cleared_count,
cust_avg_days,
cust_median_days,
cust_std_days,
cust_min_days,
cust_max_days,
cust_avg_amount,
cust_total_amount,
cust_pct_overdue,
most_common_payment_term,
most_common_business_code,
most_common_currency,
last_computed_at
FROM customer_aggregates
WHERE cust_number = $1;
-- ============================================
-- QUERY 7: Get Payment Terms Features
-- Usage: Retrieve payment term stats
-- ============================================
-- Name: get_payment_terms_features
-- Parameters: $1 = cust_payment_terms
SELECT
cust_payment_terms,
payment_terms_avg_days,
payment_terms_median_days,
payment_terms_count
FROM payment_terms_aggregates
WHERE cust_payment_terms = $1;
-- ============================================
-- QUERY 8: Get Business Code Features
-- Usage: Retrieve business code stats
-- ============================================
-- Name: get_business_code_features
-- Parameters: $1 = business_code
SELECT
business_code,
business_avg_days,
business_median_days,
business_count
FROM business_code_aggregates
WHERE business_code = $1;
-- ============================================
-- QUERY 9: Insert Invoice (with upsert)
-- Usage: Ingest new invoice data
-- ============================================
-- Name: upsert_invoice
-- Parameters: All invoice fields
INSERT INTO invoices_history (
invoice_id,
business_code,
cust_number,
name_customer,
posting_date,
document_create_date,
document_create_date_alt,
due_in_date,
baseline_create_date,
clear_date,
total_open_amount,
invoice_currency,
document_type,
cust_payment_terms,
posting_id,
business_year
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16
)
ON CONFLICT (invoice_id)
DO UPDATE SET
clear_date = EXCLUDED.clear_date,
is_open = EXCLUDED.is_open,
updated_at = NOW();
-- ============================================
-- QUERY 10: Insert Prediction Log
-- Usage: Record prediction for monitoring
-- ============================================
-- Name: insert_prediction_log
-- Parameters: prediction fields
INSERT INTO predictions_log (
invoice_id,
cust_number,
posting_date,
total_open_amount,
business_code,
cust_payment_terms,
features_json,
predicted_days_to_clear,
predicted_clear_date,
model_version,
model_path
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11
) RETURNING prediction_id;
-- ============================================
-- QUERY 11: Update Prediction with Actual Outcome
-- Usage: Record actual outcome for model monitoring
-- ============================================
-- Name: update_prediction_outcome
-- Parameters: $1 = prediction_id, $2 = actual_clear_date
UPDATE predictions_log
SET
actual_clear_date = $2,
actual_days_to_clear = EXTRACT(DAY FROM ($2 - posting_date))::INTEGER,
prediction_error = EXTRACT(DAY FROM ($2 - posting_date))::INTEGER - predicted_days_to_clear,
absolute_error = ABS(EXTRACT(DAY FROM ($2 - posting_date))::INTEGER - predicted_days_to_clear),
outcome_recorded_at = NOW()
WHERE prediction_id = $1;
-- ============================================
-- QUERY 12: Get Recent Predictions Performance
-- Usage: Monitor model accuracy
-- ============================================
-- Name: get_prediction_metrics
-- Description: Calculate model performance over last N days
-- Parameters: $1 = days_back (e.g., 30)
SELECT
COUNT(*) as total_predictions,
COUNT(actual_days_to_clear) as predictions_with_outcome,
ROUND(AVG(ABS(prediction_error)), 2) as mae,
ROUND(SQRT(AVG(prediction_error * prediction_error)), 2) as rmse,
ROUND(AVG(CASE
WHEN ABS(prediction_error) <= 3 THEN 1.0
ELSE 0.0
END) * 100, 2) as pct_within_3_days,
ROUND(AVG(CASE
WHEN ABS(prediction_error) <= 7 THEN 1.0
ELSE 0.0
END) * 100, 2) as pct_within_7_days
FROM predictions_log
WHERE predicted_at >= NOW() - INTERVAL '$1 days'
AND actual_days_to_clear IS NOT NULL;
-- ============================================
-- End of Query Templates
-- ============================================