import sqlite3 import pandas as pd import os from huggingface_hub import hf_hub_download import io import requests from io import StringIO def download_dataset(url): # Convert URL to raw content URL raw_url = url.replace('blob/', '') raw_url = raw_url.replace('https://huggingface.co/', 'https://huggingface.co/') raw_url = raw_url.replace('/tree/main', '/resolve/main') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } # Download the data response = requests.get(raw_url, headers=headers) response.raise_for_status() # Raise an exception for bad status codes # Read CSV data return pd.read_csv(StringIO(response.text)) def init_database(): # Create database connection conn = sqlite3.connect('data.db') try: # Download files from Hugging Face bonus_data_path = hf_hub_download( repo_id="AIforAll16011991/bonus_data", filename="Bonus_Data.csv", repo_type="dataset" ) player_kpi_path = hf_hub_download( repo_id="AIforAll16011991/bonus_data", filename="Player_KPIs.csv", repo_type="dataset" ) # Read CSV files bonus_data = pd.read_csv(bonus_data_path) player_kpi = pd.read_csv(player_kpi_path) # Write to SQLite database bonus_data.to_sql('bonus_data', conn, if_exists='replace', index=False) player_kpi.to_sql('player_kpi', conn, if_exists='replace', index=False) print("Database initialized successfully with data from Hugging Face!") except Exception as e: print(f"Error initializing database: {str(e)}") raise finally: conn.close() def get_schema_info(): conn = sqlite3.connect('data.db') cursor = conn.cursor() schema_info = {} # Get table information tables = cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall() for table in tables: table_name = table[0] # Get column information columns = cursor.execute(f"PRAGMA table_info({table_name});").fetchall() # Get sample data for each column sample_data = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 5", conn) # Store column information and data types schema_info[table_name] = { 'columns': [col[1] for col in columns], 'types': [col[2] for col in columns], 'sample_values': {col: sample_data[col].tolist() for col in sample_data.columns} } conn.close() return schema_info if __name__ == "__main__": init_database()