Spaces:
Sleeping
Sleeping
| import sqlite3 | |
| from datetime import datetime, timedelta | |
| import random | |
| # Create database connection | |
| conn = sqlite3.connect('drift_detection.db') | |
| cursor = conn.cursor() | |
| # Create tables | |
| cursor.executescript(''' | |
| CREATE TABLE IF NOT EXISTS model_info ( | |
| model_id INTEGER PRIMARY KEY, | |
| model_name VARCHAR(255) NOT NULL, | |
| release_date DATE NOT NULL, | |
| prediction_period INTEGER | |
| ); | |
| CREATE TABLE IF NOT EXISTS total_result ( | |
| customer_id INTEGER NOT NULL, | |
| model_id INTEGER NOT NULL, | |
| score REAL NOT NULL CHECK(score > 0), | |
| prediction_date DATE NOT NULL, | |
| load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |
| PRIMARY KEY (customer_id, model_id), | |
| FOREIGN KEY (model_id) REFERENCES model_info(model_id) | |
| ); | |
| CREATE TABLE IF NOT EXISTS delivered ( | |
| customer_id INTEGER NOT NULL, | |
| model_id INTEGER NOT NULL, | |
| rank INTEGER NOT NULL, | |
| prediction_date DATE NOT NULL, | |
| load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |
| PRIMARY KEY (customer_id, model_id), | |
| FOREIGN KEY (model_id) REFERENCES model_info(model_id) | |
| ); | |
| CREATE TABLE IF NOT EXISTS drift_record ( | |
| model_id INTEGER NOT NULL, | |
| precision REAL CHECK(precision >= 0 AND precision <= 1), | |
| recall REAL CHECK(recall >= 0 AND recall <= 1), | |
| sample_numbers INTEGER NOT NULL CHECK(sample_numbers > 0), | |
| js_value REAL CHECK(js_value >= 0 AND js_value <= 1), | |
| wd_value REAL CHECK(wd_value >= 0), | |
| prediction_date TIMESTAMP NOT NULL, | |
| PRIMARY KEY (model_id, prediction_date), | |
| FOREIGN KEY (model_id) REFERENCES model_info(model_id) | |
| ); | |
| ''') | |
| # Insert sample data into model_info | |
| model_data = [ | |
| (1, 'Churn Prediction Model v1', '2024-11-15', 30), | |
| (2, 'Churn Prediction Model v2', '2024-11-20', 30), | |
| (3, 'Churn Prediction Model v3', '2024-12-10', 30), | |
| (4, 'Customer Lifetime Value Model', '2024-12-01', 60), | |
| (5, 'Product Recommendation Model', '2024-12-15', 15) | |
| ] | |
| cursor.executemany( | |
| 'INSERT OR IGNORE INTO model_info (model_id, model_name, release_date, prediction_period) VALUES (?, ?, ?, ?)', | |
| model_data | |
| ) | |
| # Generate sample data for total_result | |
| base_date = datetime(2025, 1, 1) | |
| for customer_id in range(1, 101): | |
| for model_id in range(1, 6): | |
| score = random.uniform(0.1, 0.95) | |
| pred_date = base_date + timedelta(days=random.randint(0, 240)) | |
| cursor.execute( | |
| 'INSERT OR IGNORE INTO total_result (customer_id, model_id, score, prediction_date) VALUES (?, ?, ?, ?)', | |
| (customer_id, model_id, score, pred_date.strftime('%Y-%m-%d')) | |
| ) | |
| # Generate sample data for delivered | |
| for customer_id in range(1, 101): | |
| for model_id in range(1, 6): | |
| if random.random() > 0.3: # 70% of customers get delivery | |
| rank = random.randint(1, 100) | |
| pred_date = base_date + timedelta(days=random.randint(0, 240)) | |
| cursor.execute( | |
| 'INSERT OR IGNORE INTO delivered (customer_id, model_id, rank, prediction_date) VALUES (?, ?, ?, ?)', | |
| (customer_id, model_id, rank, pred_date.strftime('%Y-%m-%d')) | |
| ) | |
| # Generate sample data for drift_record (time series data) | |
| # Create records every 3-4 days for each model (Jan 2025 - Aug 2025) | |
| for model_id in range(1, 6): | |
| # Generate data every 3-4 days for 8 months (about 60-70 records per model) | |
| day_offset = 0 | |
| record_count = 0 | |
| while day_offset < 240: # 8 months = ~240 days | |
| # Add random time component (hours, minutes, seconds) | |
| pred_date = base_date + timedelta( | |
| days=day_offset, | |
| hours=random.randint(0, 23), | |
| minutes=random.randint(0, 59), | |
| seconds=random.randint(0, 59) | |
| ) | |
| # Add some trend and noise to make it realistic | |
| days_elapsed = day_offset / 30.0 # Convert to months for trend calculation | |
| base_precision = 0.85 - (days_elapsed * 0.01) # Slight degradation over time | |
| base_recall = 0.80 - (days_elapsed * 0.008) | |
| precision = max(0.6, min(0.95, base_precision + random.uniform(-0.05, 0.05))) | |
| recall = max(0.55, min(0.92, base_recall + random.uniform(-0.05, 0.05))) | |
| sample_numbers = random.randint(1000, 5000) | |
| js_value = min(0.3, 0.05 + (days_elapsed * 0.015) + random.uniform(-0.02, 0.02)) # Drift increases over time | |
| wd_value = min(2.5, 0.2 + (days_elapsed * 0.1) + random.uniform(-0.1, 0.1)) | |
| cursor.execute( | |
| '''INSERT OR IGNORE INTO drift_record | |
| (model_id, precision, recall, sample_numbers, js_value, wd_value, prediction_date) | |
| VALUES (?, ?, ?, ?, ?, ?, ?)''', | |
| (model_id, precision, recall, sample_numbers, js_value, wd_value, pred_date.strftime('%Y-%m-%d %H:%M:%S')) | |
| ) | |
| # Next record in 3-4 days | |
| day_offset += random.randint(3, 4) | |
| record_count += 1 | |
| conn.commit() | |
| conn.close() | |
| print("Database created successfully!") | |
| print("Database file: drift_detection.db") | |