File size: 2,807 Bytes
972aab5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import sqlite3
import pandas as pd
import os
from huggingface_hub import hf_hub_download
import io
import requests
from io import StringIO
def download_dataset(url):
# Convert URL to raw content URL
raw_url = url.replace('blob/', '')
raw_url = raw_url.replace('https://huggingface.co/', 'https://huggingface.co/')
raw_url = raw_url.replace('/tree/main', '/resolve/main')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Download the data
response = requests.get(raw_url, headers=headers)
response.raise_for_status() # Raise an exception for bad status codes
# Read CSV data
return pd.read_csv(StringIO(response.text))
def init_database():
# Create database connection
conn = sqlite3.connect('data.db')
try:
# Download files from Hugging Face
bonus_data_path = hf_hub_download(
repo_id="AIforAll16011991/bonus_data",
filename="Bonus_Data.csv",
repo_type="dataset"
)
player_kpi_path = hf_hub_download(
repo_id="AIforAll16011991/bonus_data",
filename="Player_KPIs.csv",
repo_type="dataset"
)
# Read CSV files
bonus_data = pd.read_csv(bonus_data_path)
player_kpi = pd.read_csv(player_kpi_path)
# Write to SQLite database
bonus_data.to_sql('bonus_data', conn, if_exists='replace', index=False)
player_kpi.to_sql('player_kpi', conn, if_exists='replace', index=False)
print("Database initialized successfully with data from Hugging Face!")
except Exception as e:
print(f"Error initializing database: {str(e)}")
raise
finally:
conn.close()
def get_schema_info():
conn = sqlite3.connect('data.db')
cursor = conn.cursor()
schema_info = {}
# Get table information
tables = cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
for table in tables:
table_name = table[0]
# Get column information
columns = cursor.execute(f"PRAGMA table_info({table_name});").fetchall()
# Get sample data for each column
sample_data = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 5", conn)
# Store column information and data types
schema_info[table_name] = {
'columns': [col[1] for col in columns],
'types': [col[2] for col in columns],
'sample_values': {col: sample_data[col].tolist() for col in sample_data.columns}
}
conn.close()
return schema_info
if __name__ == "__main__":
init_database()
|