import sqlite3 import pandas as pd import os from dotenv import load_dotenv from huggingface_hub import login from datasets import Dataset # Load env vars load_dotenv() load_dotenv("../.env.local") # Try loading from root .env.local as well HF_TOKEN = os.getenv("HF_TOKEN") HF_DATASET_NAME = os.getenv("HF_DATASET_NAME") if not HF_TOKEN or not HF_DATASET_NAME: print("Error: HF_TOKEN or HF_DATASET_NAME not found in environment.") exit(1) login(token=HF_TOKEN) DB_FILE = "innovations.db" def migrate(): conn = sqlite3.connect(DB_FILE) # 1. Migrate Files print("Migrating 'file' table...") try: df_files = pd.read_sql_query("SELECT * FROM file", conn) if not df_files.empty: ds = Dataset.from_pandas(df_files) ds.push_to_hub(HF_DATASET_NAME, config_name="files", token=HF_TOKEN) print(f"Pushed {len(df_files)} rows to 'files' config.") else: print("Table 'file' is empty.") except Exception as e: print(f"Error migrating 'file': {e}") # 2. Migrate Refined print("Migrating 'refined' table...") try: df_refined = pd.read_sql_query("SELECT * FROM refined", conn) if not df_refined.empty: ds = Dataset.from_pandas(df_refined) ds.push_to_hub(HF_DATASET_NAME, config_name="refined", token=HF_TOKEN) print(f"Pushed {len(df_refined)} rows to 'refined' config.") else: print("Table 'refined' is empty.") except Exception as e: print(f"Error migrating 'refined': {e}") # 3. Migrate Patterns print("Migrating 'pattern' table...") try: df_patterns = pd.read_sql_query("SELECT * FROM pattern", conn) if not df_patterns.empty: ds = Dataset.from_pandas(df_patterns) ds.push_to_hub(HF_DATASET_NAME, config_name="patterns", token=HF_TOKEN) print(f"Pushed {len(df_patterns)} rows to 'patterns' config.") else: print("Table 'pattern' is empty.") except Exception as e: print(f"Error migrating 'pattern': {e}") # 4. Migrate Results print("Migrating 'result' table...") try: df_results = pd.read_sql_query("SELECT * FROM result", conn) if not df_results.empty: ds = Dataset.from_pandas(df_results) ds.push_to_hub(HF_DATASET_NAME, config_name="results", token=HF_TOKEN) print(f"Pushed {len(df_results)} rows to 'results' config.") else: print("Table 'result' is empty.") except Exception as e: print(f"Error migrating 'result': {e}") conn.close() print("Migration complete!") if __name__ == "__main__": migrate()