File size: 2,715 Bytes
4b1a31e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import sqlite3
import pandas as pd
import os
from dotenv import load_dotenv
from huggingface_hub import login
from datasets import Dataset
# Load env vars
load_dotenv()
load_dotenv("../.env.local") # Try loading from root .env.local as well
HF_TOKEN = os.getenv("HF_TOKEN")
HF_DATASET_NAME = os.getenv("HF_DATASET_NAME")
if not HF_TOKEN or not HF_DATASET_NAME:
print("Error: HF_TOKEN or HF_DATASET_NAME not found in environment.")
exit(1)
login(token=HF_TOKEN)
DB_FILE = "innovations.db"
def migrate():
conn = sqlite3.connect(DB_FILE)
# 1. Migrate Files
print("Migrating 'file' table...")
try:
df_files = pd.read_sql_query("SELECT * FROM file", conn)
if not df_files.empty:
ds = Dataset.from_pandas(df_files)
ds.push_to_hub(HF_DATASET_NAME, config_name="files", token=HF_TOKEN)
print(f"Pushed {len(df_files)} rows to 'files' config.")
else:
print("Table 'file' is empty.")
except Exception as e:
print(f"Error migrating 'file': {e}")
# 2. Migrate Refined
print("Migrating 'refined' table...")
try:
df_refined = pd.read_sql_query("SELECT * FROM refined", conn)
if not df_refined.empty:
ds = Dataset.from_pandas(df_refined)
ds.push_to_hub(HF_DATASET_NAME, config_name="refined", token=HF_TOKEN)
print(f"Pushed {len(df_refined)} rows to 'refined' config.")
else:
print("Table 'refined' is empty.")
except Exception as e:
print(f"Error migrating 'refined': {e}")
# 3. Migrate Patterns
print("Migrating 'pattern' table...")
try:
df_patterns = pd.read_sql_query("SELECT * FROM pattern", conn)
if not df_patterns.empty:
ds = Dataset.from_pandas(df_patterns)
ds.push_to_hub(HF_DATASET_NAME, config_name="patterns", token=HF_TOKEN)
print(f"Pushed {len(df_patterns)} rows to 'patterns' config.")
else:
print("Table 'pattern' is empty.")
except Exception as e:
print(f"Error migrating 'pattern': {e}")
# 4. Migrate Results
print("Migrating 'result' table...")
try:
df_results = pd.read_sql_query("SELECT * FROM result", conn)
if not df_results.empty:
ds = Dataset.from_pandas(df_results)
ds.push_to_hub(HF_DATASET_NAME, config_name="results", token=HF_TOKEN)
print(f"Pushed {len(df_results)} rows to 'results' config.")
else:
print("Table 'result' is empty.")
except Exception as e:
print(f"Error migrating 'result': {e}")
conn.close()
print("Migration complete!")
if __name__ == "__main__":
migrate()
|