Spaces:
Sleeping
Sleeping
File size: 4,911 Bytes
a413511 403ebf2 a413511 403ebf2 a413511 403ebf2 a413511 403ebf2 a413511 403ebf2 a413511 403ebf2 a413511 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import sqlite3
from datetime import datetime, timedelta
import random
# Create database connection
conn = sqlite3.connect('drift_detection.db')
cursor = conn.cursor()
# Create tables
cursor.executescript('''
CREATE TABLE IF NOT EXISTS model_info (
model_id INTEGER PRIMARY KEY,
model_name VARCHAR(255) NOT NULL,
release_date DATE NOT NULL,
prediction_period INTEGER
);
CREATE TABLE IF NOT EXISTS total_result (
customer_id INTEGER NOT NULL,
model_id INTEGER NOT NULL,
score REAL NOT NULL CHECK(score > 0),
prediction_date DATE NOT NULL,
load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (customer_id, model_id),
FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);
CREATE TABLE IF NOT EXISTS delivered (
customer_id INTEGER NOT NULL,
model_id INTEGER NOT NULL,
rank INTEGER NOT NULL,
prediction_date DATE NOT NULL,
load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (customer_id, model_id),
FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);
CREATE TABLE IF NOT EXISTS drift_record (
model_id INTEGER NOT NULL,
precision REAL CHECK(precision >= 0 AND precision <= 1),
recall REAL CHECK(recall >= 0 AND recall <= 1),
sample_numbers INTEGER NOT NULL CHECK(sample_numbers > 0),
js_value REAL CHECK(js_value >= 0 AND js_value <= 1),
wd_value REAL CHECK(wd_value >= 0),
prediction_date TIMESTAMP NOT NULL,
PRIMARY KEY (model_id, prediction_date),
FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);
''')
# Insert sample data into model_info
model_data = [
(1, 'Churn Prediction Model v1', '2024-11-15', 30),
(2, 'Churn Prediction Model v2', '2024-11-20', 30),
(3, 'Churn Prediction Model v3', '2024-12-10', 30),
(4, 'Customer Lifetime Value Model', '2024-12-01', 60),
(5, 'Product Recommendation Model', '2024-12-15', 15)
]
cursor.executemany(
'INSERT OR IGNORE INTO model_info (model_id, model_name, release_date, prediction_period) VALUES (?, ?, ?, ?)',
model_data
)
# Generate sample data for total_result
base_date = datetime(2025, 1, 1)
for customer_id in range(1, 101):
for model_id in range(1, 6):
score = random.uniform(0.1, 0.95)
pred_date = base_date + timedelta(days=random.randint(0, 240))
cursor.execute(
'INSERT OR IGNORE INTO total_result (customer_id, model_id, score, prediction_date) VALUES (?, ?, ?, ?)',
(customer_id, model_id, score, pred_date.strftime('%Y-%m-%d'))
)
# Generate sample data for delivered
for customer_id in range(1, 101):
for model_id in range(1, 6):
if random.random() > 0.3: # 70% of customers get delivery
rank = random.randint(1, 100)
pred_date = base_date + timedelta(days=random.randint(0, 240))
cursor.execute(
'INSERT OR IGNORE INTO delivered (customer_id, model_id, rank, prediction_date) VALUES (?, ?, ?, ?)',
(customer_id, model_id, rank, pred_date.strftime('%Y-%m-%d'))
)
# Generate sample data for drift_record (time series data)
# Create records every 3-4 days for each model (Jan 2025 - Aug 2025)
for model_id in range(1, 6):
# Generate data every 3-4 days for 8 months (about 60-70 records per model)
day_offset = 0
record_count = 0
while day_offset < 240: # 8 months = ~240 days
# Add random time component (hours, minutes, seconds)
pred_date = base_date + timedelta(
days=day_offset,
hours=random.randint(0, 23),
minutes=random.randint(0, 59),
seconds=random.randint(0, 59)
)
# Add some trend and noise to make it realistic
days_elapsed = day_offset / 30.0 # Convert to months for trend calculation
base_precision = 0.85 - (days_elapsed * 0.01) # Slight degradation over time
base_recall = 0.80 - (days_elapsed * 0.008)
precision = max(0.6, min(0.95, base_precision + random.uniform(-0.05, 0.05)))
recall = max(0.55, min(0.92, base_recall + random.uniform(-0.05, 0.05)))
sample_numbers = random.randint(1000, 5000)
js_value = min(0.3, 0.05 + (days_elapsed * 0.015) + random.uniform(-0.02, 0.02)) # Drift increases over time
wd_value = min(2.5, 0.2 + (days_elapsed * 0.1) + random.uniform(-0.1, 0.1))
cursor.execute(
'''INSERT OR IGNORE INTO drift_record
(model_id, precision, recall, sample_numbers, js_value, wd_value, prediction_date)
VALUES (?, ?, ?, ?, ?, ?, ?)''',
(model_id, precision, recall, sample_numbers, js_value, wd_value, pred_date.strftime('%Y-%m-%d %H:%M:%S'))
)
# Next record in 3-4 days
day_offset += random.randint(3, 4)
record_count += 1
conn.commit()
conn.close()
print("Database created successfully!")
print("Database file: drift_detection.db")
|