|
|
|
|
|
import sqlite3 |
|
|
import pandas as pd |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
from huggingface_hub import login |
|
|
from datasets import Dataset |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
load_dotenv("../.env.local") |
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
HF_DATASET_NAME = os.getenv("HF_DATASET_NAME") |
|
|
|
|
|
if not HF_TOKEN or not HF_DATASET_NAME: |
|
|
print("Error: HF_TOKEN or HF_DATASET_NAME not found in environment.") |
|
|
exit(1) |
|
|
|
|
|
login(token=HF_TOKEN) |
|
|
|
|
|
DB_FILE = "innovations.db" |
|
|
|
|
|
def migrate(): |
|
|
conn = sqlite3.connect(DB_FILE) |
|
|
|
|
|
|
|
|
print("Migrating 'file' table...") |
|
|
try: |
|
|
df_files = pd.read_sql_query("SELECT * FROM file", conn) |
|
|
if not df_files.empty: |
|
|
ds = Dataset.from_pandas(df_files) |
|
|
ds.push_to_hub(HF_DATASET_NAME, config_name="files", token=HF_TOKEN) |
|
|
print(f"Pushed {len(df_files)} rows to 'files' config.") |
|
|
else: |
|
|
print("Table 'file' is empty.") |
|
|
except Exception as e: |
|
|
print(f"Error migrating 'file': {e}") |
|
|
|
|
|
|
|
|
print("Migrating 'refined' table...") |
|
|
try: |
|
|
df_refined = pd.read_sql_query("SELECT * FROM refined", conn) |
|
|
if not df_refined.empty: |
|
|
ds = Dataset.from_pandas(df_refined) |
|
|
ds.push_to_hub(HF_DATASET_NAME, config_name="refined", token=HF_TOKEN) |
|
|
print(f"Pushed {len(df_refined)} rows to 'refined' config.") |
|
|
else: |
|
|
print("Table 'refined' is empty.") |
|
|
except Exception as e: |
|
|
print(f"Error migrating 'refined': {e}") |
|
|
|
|
|
|
|
|
print("Migrating 'pattern' table...") |
|
|
try: |
|
|
df_patterns = pd.read_sql_query("SELECT * FROM pattern", conn) |
|
|
if not df_patterns.empty: |
|
|
ds = Dataset.from_pandas(df_patterns) |
|
|
ds.push_to_hub(HF_DATASET_NAME, config_name="patterns", token=HF_TOKEN) |
|
|
print(f"Pushed {len(df_patterns)} rows to 'patterns' config.") |
|
|
else: |
|
|
print("Table 'pattern' is empty.") |
|
|
except Exception as e: |
|
|
print(f"Error migrating 'pattern': {e}") |
|
|
|
|
|
|
|
|
print("Migrating 'result' table...") |
|
|
try: |
|
|
df_results = pd.read_sql_query("SELECT * FROM result", conn) |
|
|
if not df_results.empty: |
|
|
ds = Dataset.from_pandas(df_results) |
|
|
ds.push_to_hub(HF_DATASET_NAME, config_name="results", token=HF_TOKEN) |
|
|
print(f"Pushed {len(df_results)} rows to 'results' config.") |
|
|
else: |
|
|
print("Table 'result' is empty.") |
|
|
except Exception as e: |
|
|
print(f"Error migrating 'result': {e}") |
|
|
|
|
|
conn.close() |
|
|
print("Migration complete!") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
migrate() |
|
|
|