Spaces:

minhthien
/

HealthcareGraphRAG

Running

fix: pre-download GGUF and embedding models at build time to eliminate cold-start delay

24d38de 2 months ago

1.41 kB

	from pathlib import Path
	from fsspec.implementations.local import LocalFileSystem
	from huggingface_hub import hf_hub_download

	MODEL_REPO_ID = "Jackrong/Qwen3.5-4B-Neo-GGUF"
	MODEL_FILENAME = "Qwen3.5-4B.Q4_K_S.gguf"


	class UTF8LocalFileSystem(LocalFileSystem):
	"""LocalFileSystem that forces UTF-8 encoding for text-mode opens."""

	def open(self, path, mode="rb", args, *kwargs): # type: ignore[override]
	if "b" not in mode:
	kwargs.setdefault("encoding", "utf-8")
	return super().open(path, mode, args, *kwargs)


	def resolve_gguf_model_path(status_callback=None) -> str:
	"""Return local path to the GGUF model file, downloading from HF Hub if needed."""
	import os

	# Respect HF_HUB_CACHE env var (set in Dockerfile to /app/model)
	cache_dir = Path(os.environ.get("HF_HUB_CACHE", "./model"))

	direct = cache_dir / MODEL_FILENAME
	if direct.exists():
	return str(direct)

	snapshots_root = (
	cache_dir
	/ f"models--{'--'.join(MODEL_REPO_ID.split('/'))}"
	/ "snapshots"
	)
	match = next(snapshots_root.rglob(MODEL_FILENAME), None)
	if match:
	return str(match)

	if status_callback:
	status_callback(f"downloading model ({MODEL_FILENAME}) from HuggingFace…")
	return hf_hub_download(
	repo_id=MODEL_REPO_ID,
	filename=MODEL_FILENAME,
	cache_dir=str(cache_dir),
	)