HealthcareGraphRAG / src /utils.py
minhthien's picture
fix: pre-download GGUF and embedding models at build time to eliminate cold-start delay
24d38de
Raw
History Blame Contribute Delete
1.41 kB
from pathlib import Path
from fsspec.implementations.local import LocalFileSystem
from huggingface_hub import hf_hub_download
MODEL_REPO_ID = "Jackrong/Qwen3.5-4B-Neo-GGUF"
MODEL_FILENAME = "Qwen3.5-4B.Q4_K_S.gguf"
class UTF8LocalFileSystem(LocalFileSystem):
"""LocalFileSystem that forces UTF-8 encoding for text-mode opens."""
def open(self, path, mode="rb", *args, **kwargs): # type: ignore[override]
if "b" not in mode:
kwargs.setdefault("encoding", "utf-8")
return super().open(path, mode, *args, **kwargs)
def resolve_gguf_model_path(status_callback=None) -> str:
"""Return local path to the GGUF model file, downloading from HF Hub if needed."""
import os
# Respect HF_HUB_CACHE env var (set in Dockerfile to /app/model)
cache_dir = Path(os.environ.get("HF_HUB_CACHE", "./model"))
direct = cache_dir / MODEL_FILENAME
if direct.exists():
return str(direct)
snapshots_root = (
cache_dir
/ f"models--{'--'.join(MODEL_REPO_ID.split('/'))}"
/ "snapshots"
)
match = next(snapshots_root.rglob(MODEL_FILENAME), None)
if match:
return str(match)
if status_callback:
status_callback(f"downloading model ({MODEL_FILENAME}) from HuggingFace…")
return hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=MODEL_FILENAME,
cache_dir=str(cache_dir),
)