|
|
import sqlite3 |
|
|
import pandas as pd |
|
|
import os |
|
|
from huggingface_hub import hf_hub_download |
|
|
import io |
|
|
import requests |
|
|
from io import StringIO |
|
|
|
|
|
def download_dataset(url): |
|
|
|
|
|
raw_url = url.replace('blob/', '') |
|
|
raw_url = raw_url.replace('https://huggingface.co/', 'https://huggingface.co/') |
|
|
raw_url = raw_url.replace('/tree/main', '/resolve/main') |
|
|
|
|
|
headers = { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
|
} |
|
|
|
|
|
|
|
|
response = requests.get(raw_url, headers=headers) |
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
return pd.read_csv(StringIO(response.text)) |
|
|
|
|
|
def init_database(): |
|
|
|
|
|
conn = sqlite3.connect('data.db') |
|
|
|
|
|
try: |
|
|
|
|
|
bonus_data_path = hf_hub_download( |
|
|
repo_id="AIforAll16011991/bonus_data", |
|
|
filename="Bonus_Data.csv", |
|
|
repo_type="dataset" |
|
|
) |
|
|
|
|
|
player_kpi_path = hf_hub_download( |
|
|
repo_id="AIforAll16011991/bonus_data", |
|
|
filename="Player_KPIs.csv", |
|
|
repo_type="dataset" |
|
|
) |
|
|
|
|
|
|
|
|
bonus_data = pd.read_csv(bonus_data_path) |
|
|
player_kpi = pd.read_csv(player_kpi_path) |
|
|
|
|
|
|
|
|
bonus_data.to_sql('bonus_data', conn, if_exists='replace', index=False) |
|
|
player_kpi.to_sql('player_kpi', conn, if_exists='replace', index=False) |
|
|
|
|
|
print("Database initialized successfully with data from Hugging Face!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error initializing database: {str(e)}") |
|
|
raise |
|
|
finally: |
|
|
conn.close() |
|
|
|
|
|
def get_schema_info(): |
|
|
conn = sqlite3.connect('data.db') |
|
|
cursor = conn.cursor() |
|
|
|
|
|
schema_info = {} |
|
|
|
|
|
|
|
|
tables = cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall() |
|
|
|
|
|
for table in tables: |
|
|
table_name = table[0] |
|
|
|
|
|
columns = cursor.execute(f"PRAGMA table_info({table_name});").fetchall() |
|
|
|
|
|
|
|
|
sample_data = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 5", conn) |
|
|
|
|
|
|
|
|
schema_info[table_name] = { |
|
|
'columns': [col[1] for col in columns], |
|
|
'types': [col[2] for col in columns], |
|
|
'sample_values': {col: sample_data[col].tolist() for col in sample_data.columns} |
|
|
} |
|
|
|
|
|
conn.close() |
|
|
return schema_info |
|
|
|
|
|
if __name__ == "__main__": |
|
|
init_database() |
|
|
|