Spaces:

yoon-gu
/

drift-detection-ui

Sleeping

drift-detection-ui / create_database.py

Yoon-gu Hwang

데이터 5배 증가 및 모델 정보 테이블 추가

403ebf2 2 months ago

4.91 kB

	import sqlite3
	from datetime import datetime, timedelta
	import random

	# Create database connection
	conn = sqlite3.connect('drift_detection.db')
	cursor = conn.cursor()

	# Create tables
	cursor.executescript('''
	CREATE TABLE IF NOT EXISTS model_info (
	model_id INTEGER PRIMARY KEY,
	model_name VARCHAR(255) NOT NULL,
	release_date DATE NOT NULL,
	prediction_period INTEGER
	);

	CREATE TABLE IF NOT EXISTS total_result (
	customer_id INTEGER NOT NULL,
	model_id INTEGER NOT NULL,
	score REAL NOT NULL CHECK(score > 0),
	prediction_date DATE NOT NULL,
	load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	PRIMARY KEY (customer_id, model_id),
	FOREIGN KEY (model_id) REFERENCES model_info(model_id)
	);

	CREATE TABLE IF NOT EXISTS delivered (
	customer_id INTEGER NOT NULL,
	model_id INTEGER NOT NULL,
	rank INTEGER NOT NULL,
	prediction_date DATE NOT NULL,
	load_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	PRIMARY KEY (customer_id, model_id),
	FOREIGN KEY (model_id) REFERENCES model_info(model_id)
	);

	CREATE TABLE IF NOT EXISTS drift_record (
	model_id INTEGER NOT NULL,
	precision REAL CHECK(precision >= 0 AND precision <= 1),
	recall REAL CHECK(recall >= 0 AND recall <= 1),
	sample_numbers INTEGER NOT NULL CHECK(sample_numbers > 0),
	js_value REAL CHECK(js_value >= 0 AND js_value <= 1),
	wd_value REAL CHECK(wd_value >= 0),
	prediction_date TIMESTAMP NOT NULL,
	PRIMARY KEY (model_id, prediction_date),
	FOREIGN KEY (model_id) REFERENCES model_info(model_id)
	);
	''')

	# Insert sample data into model_info
	model_data = [
	(1, 'Churn Prediction Model v1', '2024-11-15', 30),
	(2, 'Churn Prediction Model v2', '2024-11-20', 30),
	(3, 'Churn Prediction Model v3', '2024-12-10', 30),
	(4, 'Customer Lifetime Value Model', '2024-12-01', 60),
	(5, 'Product Recommendation Model', '2024-12-15', 15)
	]

	cursor.executemany(
	'INSERT OR IGNORE INTO model_info (model_id, model_name, release_date, prediction_period) VALUES (?, ?, ?, ?)',
	model_data
	)

	# Generate sample data for total_result
	base_date = datetime(2025, 1, 1)
	for customer_id in range(1, 101):
	for model_id in range(1, 6):
	score = random.uniform(0.1, 0.95)
	pred_date = base_date + timedelta(days=random.randint(0, 240))
	cursor.execute(
	'INSERT OR IGNORE INTO total_result (customer_id, model_id, score, prediction_date) VALUES (?, ?, ?, ?)',
	(customer_id, model_id, score, pred_date.strftime('%Y-%m-%d'))
	)

	# Generate sample data for delivered
	for customer_id in range(1, 101):
	for model_id in range(1, 6):
	if random.random() > 0.3: # 70% of customers get delivery
	rank = random.randint(1, 100)
	pred_date = base_date + timedelta(days=random.randint(0, 240))
	cursor.execute(
	'INSERT OR IGNORE INTO delivered (customer_id, model_id, rank, prediction_date) VALUES (?, ?, ?, ?)',
	(customer_id, model_id, rank, pred_date.strftime('%Y-%m-%d'))
	)

	# Generate sample data for drift_record (time series data)
	# Create records every 3-4 days for each model (Jan 2025 - Aug 2025)
	for model_id in range(1, 6):
	# Generate data every 3-4 days for 8 months (about 60-70 records per model)
	day_offset = 0
	record_count = 0
	while day_offset < 240: # 8 months = ~240 days
	# Add random time component (hours, minutes, seconds)
	pred_date = base_date + timedelta(
	days=day_offset,
	hours=random.randint(0, 23),
	minutes=random.randint(0, 59),
	seconds=random.randint(0, 59)
	)

	# Add some trend and noise to make it realistic
	days_elapsed = day_offset / 30.0 # Convert to months for trend calculation
	base_precision = 0.85 - (days_elapsed * 0.01) # Slight degradation over time
	base_recall = 0.80 - (days_elapsed * 0.008)

	precision = max(0.6, min(0.95, base_precision + random.uniform(-0.05, 0.05)))
	recall = max(0.55, min(0.92, base_recall + random.uniform(-0.05, 0.05)))
	sample_numbers = random.randint(1000, 5000)
	js_value = min(0.3, 0.05 + (days_elapsed * 0.015) + random.uniform(-0.02, 0.02)) # Drift increases over time
	wd_value = min(2.5, 0.2 + (days_elapsed * 0.1) + random.uniform(-0.1, 0.1))

	cursor.execute(
	'''INSERT OR IGNORE INTO drift_record
	(model_id, precision, recall, sample_numbers, js_value, wd_value, prediction_date)
	VALUES (?, ?, ?, ?, ?, ?, ?)''',
	(model_id, precision, recall, sample_numbers, js_value, wd_value, pred_date.strftime('%Y-%m-%d %H:%M:%S'))
	)

	# Next record in 3-4 days
	day_offset += random.randint(3, 4)
	record_count += 1

	conn.commit()
	conn.close()

	print("Database created successfully!")
	print("Database file: drift_detection.db")