|
|
"""
|
|
|
Download and store HuggingFace models in device DB
|
|
|
"""
|
|
|
from typing import Optional, Dict, Any
|
|
|
import os
|
|
|
import json
|
|
|
from huggingface_hub import snapshot_download
|
|
|
import duckdb
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
def download_model(model_id: str, cache_dir: Optional[str] = None) -> str:
|
|
|
"""Download model from HuggingFace Hub"""
|
|
|
print(f"Downloading model {model_id} from HuggingFace Hub")
|
|
|
|
|
|
|
|
|
if cache_dir is None:
|
|
|
cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "helium", "models")
|
|
|
os.makedirs(cache_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
local_path = snapshot_download(
|
|
|
repo_id=model_id,
|
|
|
cache_dir=cache_dir,
|
|
|
resume_download=True,
|
|
|
local_files_only=False
|
|
|
)
|
|
|
|
|
|
print(f"Model downloaded to {local_path}")
|
|
|
return local_path
|
|
|
|
|
|
def store_in_device_db(
|
|
|
model_path: str,
|
|
|
device_db_url: str,
|
|
|
model_id: str
|
|
|
) -> None:
|
|
|
"""Store model weights and config in device DB"""
|
|
|
print(f"Storing model {model_id} in device DB at {device_db_url}")
|
|
|
|
|
|
|
|
|
conn = duckdb.connect(device_db_url)
|
|
|
|
|
|
|
|
|
conn.execute("""
|
|
|
CREATE TABLE IF NOT EXISTS model_configs (
|
|
|
model_id VARCHAR PRIMARY KEY,
|
|
|
config JSON
|
|
|
);
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS model_weights (
|
|
|
model_id VARCHAR,
|
|
|
layer_name VARCHAR,
|
|
|
weight_data BLOB,
|
|
|
weight_dtype VARCHAR,
|
|
|
weight_shape VARCHAR,
|
|
|
PRIMARY KEY (model_id, layer_name)
|
|
|
);
|
|
|
""")
|
|
|
|
|
|
|
|
|
config_path = os.path.join(model_path, "config.json")
|
|
|
with open(config_path, "r") as f:
|
|
|
config = json.load(f)
|
|
|
|
|
|
|
|
|
conn.execute(
|
|
|
"INSERT OR REPLACE INTO model_configs (model_id, config) VALUES (?, ?)",
|
|
|
[model_id, json.dumps(config)]
|
|
|
)
|
|
|
|
|
|
|
|
|
for root, _, files in os.walk(model_path):
|
|
|
for file in files:
|
|
|
if file.endswith(".bin"):
|
|
|
layer_name = os.path.splitext(file)[0]
|
|
|
weight_path = os.path.join(root, file)
|
|
|
|
|
|
|
|
|
weight_data = np.fromfile(weight_path, dtype=np.float32)
|
|
|
weight_shape = weight_data.shape
|
|
|
|
|
|
|
|
|
conn.execute(
|
|
|
"""
|
|
|
INSERT OR REPLACE INTO model_weights
|
|
|
(model_id, layer_name, weight_data, weight_dtype, weight_shape)
|
|
|
VALUES (?, ?, ?, ?, ?)
|
|
|
""",
|
|
|
[model_id, layer_name, weight_data.tobytes(),
|
|
|
str(weight_data.dtype), str(weight_shape)]
|
|
|
)
|
|
|
|
|
|
conn.commit()
|
|
|
conn.execute(
|
|
|
"INSERT INTO model_configs (model_id, config) VALUES (?, ?)",
|
|
|
[model_id, json.dumps(config)]
|
|
|
)
|
|
|
|
|
|
|
|
|
weight_files = [f for f in os.listdir(model_path) if f.endswith(".bin")]
|
|
|
for weight_file in weight_files:
|
|
|
layer_name = os.path.splitext(weight_file)[0]
|
|
|
weight_path = os.path.join(model_path, weight_file)
|
|
|
|
|
|
|
|
|
weights = np.load(weight_path)
|
|
|
|
|
|
|
|
|
conn.execute(
|
|
|
"INSERT INTO model_weights (model_id, layer_name, weight_data) VALUES (?, ?, ?)",
|
|
|
[model_id, layer_name, weights.tobytes()]
|
|
|
)
|
|
|
|
|
|
conn.close()
|
|
|
|