File size: 2,715 Bytes
4b1a31e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

import sqlite3
import pandas as pd
import os
from dotenv import load_dotenv
from huggingface_hub import login
from datasets import Dataset

# Load env vars
load_dotenv()
load_dotenv("../.env.local") # Try loading from root .env.local as well
HF_TOKEN = os.getenv("HF_TOKEN")
HF_DATASET_NAME = os.getenv("HF_DATASET_NAME")

if not HF_TOKEN or not HF_DATASET_NAME:
    print("Error: HF_TOKEN or HF_DATASET_NAME not found in environment.")
    exit(1)

login(token=HF_TOKEN)

DB_FILE = "innovations.db"

def migrate():
    conn = sqlite3.connect(DB_FILE)
    
    # 1. Migrate Files
    print("Migrating 'file' table...")
    try:
        df_files = pd.read_sql_query("SELECT * FROM file", conn)
        if not df_files.empty:
            ds = Dataset.from_pandas(df_files)
            ds.push_to_hub(HF_DATASET_NAME, config_name="files", token=HF_TOKEN)
            print(f"Pushed {len(df_files)} rows to 'files' config.")
        else:
            print("Table 'file' is empty.")
    except Exception as e:
        print(f"Error migrating 'file': {e}")

    # 2. Migrate Refined
    print("Migrating 'refined' table...")
    try:
        df_refined = pd.read_sql_query("SELECT * FROM refined", conn)
        if not df_refined.empty:
            ds = Dataset.from_pandas(df_refined)
            ds.push_to_hub(HF_DATASET_NAME, config_name="refined", token=HF_TOKEN)
            print(f"Pushed {len(df_refined)} rows to 'refined' config.")
        else:
            print("Table 'refined' is empty.")
    except Exception as e:
        print(f"Error migrating 'refined': {e}")

    # 3. Migrate Patterns
    print("Migrating 'pattern' table...")
    try:
        df_patterns = pd.read_sql_query("SELECT * FROM pattern", conn)
        if not df_patterns.empty:
            ds = Dataset.from_pandas(df_patterns)
            ds.push_to_hub(HF_DATASET_NAME, config_name="patterns", token=HF_TOKEN)
            print(f"Pushed {len(df_patterns)} rows to 'patterns' config.")
        else:
            print("Table 'pattern' is empty.")
    except Exception as e:
        print(f"Error migrating 'pattern': {e}")

    # 4. Migrate Results
    print("Migrating 'result' table...")
    try:
        df_results = pd.read_sql_query("SELECT * FROM result", conn)
        if not df_results.empty:
            ds = Dataset.from_pandas(df_results)
            ds.push_to_hub(HF_DATASET_NAME, config_name="results", token=HF_TOKEN)
            print(f"Pushed {len(df_results)} rows to 'results' config.")
        else:
            print("Table 'result' is empty.")
    except Exception as e:
        print(f"Error migrating 'result': {e}")

    conn.close()
    print("Migration complete!")

if __name__ == "__main__":
    migrate()