Spaces:

BaoKhuong
/

GPUdocker-FinRobot

Sleeping

App Files Files Community

BaoKhuong commited on Sep 25, 2025

Commit

4cc5fe9

verified ·

1 Parent(s): 163685a

Upload 3 files

Browse files

Files changed (1) hide show

app.py +54 -11

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
 os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
 import json
-from typing import Dict
 import gradio as gr
 import requests
@@ -33,27 +33,70 @@ if RAPIDAPI_KEY and RAPIDAPI_KEY.strip():
 RAPIDAPI_HOST = "alpha-vantage.p.rapidapi.com"
 # -------- llama.cpp GGUF model --------
-MODEL_REPO = "mradermacher/Fin-o1-14B-GGUF"
-MODEL_FILE = os.getenv("GGUF_FILENAME", "Fin-o1-14B.Q4_K_M.gguf")
 N_THREADS = int(os.getenv("LLAMA_CPP_THREADS", str(os.cpu_count() or 4)))
-CTX_LEN = int(os.getenv("LLAMA_CPP_CTX", "4096"))
-from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 _llm = None
 def load_model():
 	global _llm
 	if _llm is not None:
 		return _llm
-	model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=os.getenv("HF_HOME", "/app/hf_cache"))
-	_llm = Llama(
-		model_path=model_path,
-		n_ctx=CTX_LEN,
-		n_threads=N_THREADS,
-		verbose=False,
 	)
 	return _llm

 os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
 import json
+from typing import Dict, List
 import gradio as gr
 import requests
 RAPIDAPI_HOST = "alpha-vantage.p.rapidapi.com"
 # -------- llama.cpp GGUF model --------
+MODEL_REPO = "mradermacher/Fin-o1-8B-GGUF"
+GGUF_OVERRIDE = os.getenv("GGUF_FILENAME", "").strip()
 N_THREADS = int(os.getenv("LLAMA_CPP_THREADS", str(os.cpu_count() or 4)))
+CTX_LEN = int(os.getenv("LLAMA_CPP_CTX", "3072"))  # CPU-friendly default
+N_BATCH = int(os.getenv("LLAMA_CPP_BATCH", "128"))
+from huggingface_hub import snapshot_download
 from llama_cpp import Llama
 _llm = None
+def _pick_gguf_file(root_dir: str, override: str | None) -> str:
+	import glob
+	if override:
+		path = os.path.join(root_dir, override)
+		if os.path.isfile(path) and os.path.getsize(path) > 0:
+			return path
+		candidates = glob.glob(os.path.join(root_dir, "**", override), recursive=True)
+		for c in candidates:
+			if os.path.getsize(c) > 0:
+				return c
+	preferred: List[str] = [
+		"Fin-o1-8B.Q4_K_M.gguf",  # explicit 8B file name first
+		"Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_M", "Q3_K_S", "Q3_0", "Q2_K", "Q2_0",
+	]
+	import glob as _glob
+	ggufs = _glob.glob(os.path.join(root_dir, "**", "*.gguf"), recursive=True)
+	if not ggufs:
+		raise FileNotFoundError("No .gguf files found in snapshot")
+	for key in preferred:
+		for f in ggufs:
+			if key in os.path.basename(f):
+				return f
+	return ggufs[0]
 def load_model():
 	global _llm
 	if _llm is not None:
 		return _llm
+	repo_dir = snapshot_download(
+		repo_id=MODEL_REPO,
+		allow_patterns=["*.gguf"],
+		cache_dir=os.getenv("HF_HOME", "/app/hf_cache"),
+		local_files_only=False,
+		resume_download=True,
 	)
+	try:
+		model_path = _pick_gguf_file(repo_dir, GGUF_OVERRIDE or None)
+	except Exception as e:
+		raise RuntimeError(f"GGUF not found: {e}")
+	try:
+		_llm = Llama(
+			model_path=model_path,
+			n_ctx=CTX_LEN,
+			n_threads=N_THREADS,
+			n_batch=N_BATCH,
+			use_mlock=False,
+			use_mmap=True,
+			verbose=False,
+		)
+	except Exception as e:
+		raise RuntimeError(f"Failed to load GGUF: {e}. Set GGUF_FILENAME to an available 8B file if needed.")
 	return _llm