File size: 4,911 Bytes
a413511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403ebf2
a413511
403ebf2
 
 
 
a413511
 
403ebf2
a413511
 
 
 
 
 
403ebf2
 
 
a413511
 
 
 
403ebf2
 
a413511
 
 
 
 
 
 
 
403ebf2
 
 
 
a413511
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import sqlite3
from datetime import datetime, timedelta
import random

# Create database connection
conn = sqlite3.connect('drift_detection.db')
cursor = conn.cursor()

# Create tables
cursor.executescript('''
CREATE TABLE IF NOT EXISTS model_info (
    model_id INTEGER PRIMARY KEY,
    model_name VARCHAR(255) NOT NULL,
    release_date DATE NOT NULL,
    prediction_period INTEGER
);

CREATE TABLE IF NOT EXISTS total_result (
    customer_id INTEGER NOT NULL,
    model_id INTEGER NOT NULL,
    score REAL NOT NULL CHECK(score > 0),
    prediction_date DATE NOT NULL,
    load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (customer_id, model_id),
    FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);

CREATE TABLE IF NOT EXISTS delivered (
    customer_id INTEGER NOT NULL,
    model_id INTEGER NOT NULL,
    rank INTEGER NOT NULL,
    prediction_date DATE NOT NULL,
    load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (customer_id, model_id),
    FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);

CREATE TABLE IF NOT EXISTS drift_record (
    model_id INTEGER NOT NULL,
    precision REAL CHECK(precision >= 0 AND precision <= 1),
    recall REAL CHECK(recall >= 0 AND recall <= 1),
    sample_numbers INTEGER NOT NULL CHECK(sample_numbers > 0),
    js_value REAL CHECK(js_value >= 0 AND js_value <= 1),
    wd_value REAL CHECK(wd_value >= 0),
    prediction_date TIMESTAMP NOT NULL,
    PRIMARY KEY (model_id, prediction_date),
    FOREIGN KEY (model_id) REFERENCES model_info(model_id)
);
''')

# Insert sample data into model_info
model_data = [
    (1, 'Churn Prediction Model v1', '2024-11-15', 30),
    (2, 'Churn Prediction Model v2', '2024-11-20', 30),
    (3, 'Churn Prediction Model v3', '2024-12-10', 30),
    (4, 'Customer Lifetime Value Model', '2024-12-01', 60),
    (5, 'Product Recommendation Model', '2024-12-15', 15)
]

cursor.executemany(
    'INSERT OR IGNORE INTO model_info (model_id, model_name, release_date, prediction_period) VALUES (?, ?, ?, ?)',
    model_data
)

# Generate sample data for total_result
base_date = datetime(2025, 1, 1)
for customer_id in range(1, 101):
    for model_id in range(1, 6):
        score = random.uniform(0.1, 0.95)
        pred_date = base_date + timedelta(days=random.randint(0, 240))
        cursor.execute(
            'INSERT OR IGNORE INTO total_result (customer_id, model_id, score, prediction_date) VALUES (?, ?, ?, ?)',
            (customer_id, model_id, score, pred_date.strftime('%Y-%m-%d'))
        )

# Generate sample data for delivered
for customer_id in range(1, 101):
    for model_id in range(1, 6):
        if random.random() > 0.3:  # 70% of customers get delivery
            rank = random.randint(1, 100)
            pred_date = base_date + timedelta(days=random.randint(0, 240))
            cursor.execute(
                'INSERT OR IGNORE INTO delivered (customer_id, model_id, rank, prediction_date) VALUES (?, ?, ?, ?)',
                (customer_id, model_id, rank, pred_date.strftime('%Y-%m-%d'))
            )

# Generate sample data for drift_record (time series data)
# Create records every 3-4 days for each model (Jan 2025 - Aug 2025)
for model_id in range(1, 6):
    # Generate data every 3-4 days for 8 months (about 60-70 records per model)
    day_offset = 0
    record_count = 0
    while day_offset < 240:  # 8 months = ~240 days
        # Add random time component (hours, minutes, seconds)
        pred_date = base_date + timedelta(
            days=day_offset,
            hours=random.randint(0, 23),
            minutes=random.randint(0, 59),
            seconds=random.randint(0, 59)
        )

        # Add some trend and noise to make it realistic
        days_elapsed = day_offset / 30.0  # Convert to months for trend calculation
        base_precision = 0.85 - (days_elapsed * 0.01)  # Slight degradation over time
        base_recall = 0.80 - (days_elapsed * 0.008)

        precision = max(0.6, min(0.95, base_precision + random.uniform(-0.05, 0.05)))
        recall = max(0.55, min(0.92, base_recall + random.uniform(-0.05, 0.05)))
        sample_numbers = random.randint(1000, 5000)
        js_value = min(0.3, 0.05 + (days_elapsed * 0.015) + random.uniform(-0.02, 0.02))  # Drift increases over time
        wd_value = min(2.5, 0.2 + (days_elapsed * 0.1) + random.uniform(-0.1, 0.1))

        cursor.execute(
            '''INSERT OR IGNORE INTO drift_record
            (model_id, precision, recall, sample_numbers, js_value, wd_value, prediction_date)
            VALUES (?, ?, ?, ?, ?, ?, ?)''',
            (model_id, precision, recall, sample_numbers, js_value, wd_value, pred_date.strftime('%Y-%m-%d %H:%M:%S'))
        )

        # Next record in 3-4 days
        day_offset += random.randint(3, 4)
        record_count += 1

conn.commit()
conn.close()

print("Database created successfully!")
print("Database file: drift_detection.db")