Spaces:

yoon-gu
/

drift-detection-ui

Sleeping

File size: 4,911 Bytes

import sqlite3
from datetime import datetime, timedelta
import random

# Create database connection
conn = sqlite3.connect('drift_detection.db')
cursor = conn.cursor()

# Create tables
cursor.executescript('''
CREATE TABLE IF NOT EXISTS model_info (
    model_id INTEGER PRIMARY KEY,
    model_name VARCHAR(255) NOT NULL,
    release_date DATE NOT NULL,
    prediction_period INTEGER
);

CREATE TABLE IF NOT EXISTS total_result (
    customer_id INTEGER NOT NULL,
    model_id INTEGER NOT NULL,
    score REAL NOT NULL CHECK(score > 0),
    prediction_date DATE NOT NULL,
    load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (customer_id, model_id),
    FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);

CREATE TABLE IF NOT EXISTS delivered (
    customer_id INTEGER NOT NULL,
    model_id INTEGER NOT NULL,
    rank INTEGER NOT NULL,
    prediction_date DATE NOT NULL,
    load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (customer_id, model_id),
    FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);

CREATE TABLE IF NOT EXISTS drift_record (
    model_id INTEGER NOT NULL,
    precision REAL CHECK(precision >= 0 AND precision <= 1),
    recall REAL CHECK(recall >= 0 AND recall <= 1),
    sample_numbers INTEGER NOT NULL CHECK(sample_numbers > 0),
    js_value REAL CHECK(js_value >= 0 AND js_value <= 1),
    wd_value REAL CHECK(wd_value >= 0),
    prediction_date TIMESTAMP NOT NULL,
    PRIMARY KEY (model_id, prediction_date),
    FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);
''')

# Insert sample data into model_info
model_data = [
    (1, 'Churn Prediction Model v1', '2024-11-15', 30),
    (2, 'Churn Prediction Model v2', '2024-11-20', 30),
    (3, 'Churn Prediction Model v3', '2024-12-10', 30),
    (4, 'Customer Lifetime Value Model', '2024-12-01', 60),
    (5, 'Product Recommendation Model', '2024-12-15', 15)
]

cursor.executemany(
    'INSERT OR IGNORE INTO model_info (model_id, model_name, release_date, prediction_period) VALUES (?, ?, ?, ?)',
    model_data
)

# Generate sample data for total_result
base_date = datetime(2025, 1, 1)
for customer_id in range(1, 101):
    for model_id in range(1, 6):
        score = random.uniform(0.1, 0.95)
        pred_date = base_date + timedelta(days=random.randint(0, 240))
        cursor.execute(
            'INSERT OR IGNORE INTO total_result (customer_id, model_id, score, prediction_date) VALUES (?, ?, ?, ?)',
            (customer_id, model_id, score, pred_date.strftime('%Y-%m-%d'))
        )

# Generate sample data for delivered
for customer_id in range(1, 101):
    for model_id in range(1, 6):
        if random.random() > 0.3:  # 70% of customers get delivery
            rank = random.randint(1, 100)
            pred_date = base_date + timedelta(days=random.randint(0, 240))
            cursor.execute(
                'INSERT OR IGNORE INTO delivered (customer_id, model_id, rank, prediction_date) VALUES (?, ?, ?, ?)',
                (customer_id, model_id, rank, pred_date.strftime('%Y-%m-%d'))
            )

# Generate sample data for drift_record (time series data)
# Create records every 3-4 days for each model (Jan 2025 - Aug 2025)
for model_id in range(1, 6):
    # Generate data every 3-4 days for 8 months (about 60-70 records per model)
    day_offset = 0
    record_count = 0
    while day_offset < 240:  # 8 months = ~240 days
        # Add random time component (hours, minutes, seconds)
        pred_date = base_date + timedelta(
            days=day_offset,
            hours=random.randint(0, 23),
            minutes=random.randint(0, 59),
            seconds=random.randint(0, 59)
        )

        # Add some trend and noise to make it realistic
        days_elapsed = day_offset / 30.0  # Convert to months for trend calculation
        base_precision = 0.85 - (days_elapsed * 0.01)  # Slight degradation over time
        base_recall = 0.80 - (days_elapsed * 0.008)

        precision = max(0.6, min(0.95, base_precision + random.uniform(-0.05, 0.05)))
        recall = max(0.55, min(0.92, base_recall + random.uniform(-0.05, 0.05)))
        sample_numbers = random.randint(1000, 5000)
        js_value = min(0.3, 0.05 + (days_elapsed * 0.015) + random.uniform(-0.02, 0.02))  # Drift increases over time
        wd_value = min(2.5, 0.2 + (days_elapsed * 0.1) + random.uniform(-0.1, 0.1))

        cursor.execute(
            '''INSERT OR IGNORE INTO drift_record
            (model_id, precision, recall, sample_numbers, js_value, wd_value, prediction_date)
            VALUES (?, ?, ?, ?, ?, ?, ?)''',
            (model_id, precision, recall, sample_numbers, js_value, wd_value, pred_date.strftime('%Y-%m-%d %H:%M:%S'))
        )

        # Next record in 3-4 days
        day_offset += random.randint(3, 4)
        record_count += 1

conn.commit()
conn.close()

print("Database created successfully!")
print("Database file: drift_detection.db")