BaoKhuong commited on
Commit
4cc5fe9
·
verified ·
1 Parent(s): 163685a

Upload 3 files

Browse files
Files changed (1) hide show
  1. app.py +54 -11
app.py CHANGED
@@ -9,7 +9,7 @@ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
9
  os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
10
 
11
  import json
12
- from typing import Dict
13
 
14
  import gradio as gr
15
  import requests
@@ -33,27 +33,70 @@ if RAPIDAPI_KEY and RAPIDAPI_KEY.strip():
33
  RAPIDAPI_HOST = "alpha-vantage.p.rapidapi.com"
34
 
35
  # -------- llama.cpp GGUF model --------
36
- MODEL_REPO = "mradermacher/Fin-o1-14B-GGUF"
37
- MODEL_FILE = os.getenv("GGUF_FILENAME", "Fin-o1-14B.Q4_K_M.gguf")
38
  N_THREADS = int(os.getenv("LLAMA_CPP_THREADS", str(os.cpu_count() or 4)))
39
- CTX_LEN = int(os.getenv("LLAMA_CPP_CTX", "4096"))
 
40
 
41
- from huggingface_hub import hf_hub_download
42
  from llama_cpp import Llama
43
 
44
  _llm = None
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def load_model():
47
  global _llm
48
  if _llm is not None:
49
  return _llm
50
- model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=os.getenv("HF_HOME", "/app/hf_cache"))
51
- _llm = Llama(
52
- model_path=model_path,
53
- n_ctx=CTX_LEN,
54
- n_threads=N_THREADS,
55
- verbose=False,
56
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return _llm
58
 
59
 
 
9
  os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
10
 
11
  import json
12
+ from typing import Dict, List
13
 
14
  import gradio as gr
15
  import requests
 
33
  RAPIDAPI_HOST = "alpha-vantage.p.rapidapi.com"
34
 
35
  # -------- llama.cpp GGUF model --------
36
+ MODEL_REPO = "mradermacher/Fin-o1-8B-GGUF"
37
+ GGUF_OVERRIDE = os.getenv("GGUF_FILENAME", "").strip()
38
  N_THREADS = int(os.getenv("LLAMA_CPP_THREADS", str(os.cpu_count() or 4)))
39
+ CTX_LEN = int(os.getenv("LLAMA_CPP_CTX", "3072")) # CPU-friendly default
40
+ N_BATCH = int(os.getenv("LLAMA_CPP_BATCH", "128"))
41
 
42
+ from huggingface_hub import snapshot_download
43
  from llama_cpp import Llama
44
 
45
  _llm = None
46
 
47
+
48
+ def _pick_gguf_file(root_dir: str, override: str | None) -> str:
49
+ import glob
50
+ if override:
51
+ path = os.path.join(root_dir, override)
52
+ if os.path.isfile(path) and os.path.getsize(path) > 0:
53
+ return path
54
+ candidates = glob.glob(os.path.join(root_dir, "**", override), recursive=True)
55
+ for c in candidates:
56
+ if os.path.getsize(c) > 0:
57
+ return c
58
+ preferred: List[str] = [
59
+ "Fin-o1-8B.Q4_K_M.gguf", # explicit 8B file name first
60
+ "Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_M", "Q3_K_S", "Q3_0", "Q2_K", "Q2_0",
61
+ ]
62
+ import glob as _glob
63
+ ggufs = _glob.glob(os.path.join(root_dir, "**", "*.gguf"), recursive=True)
64
+ if not ggufs:
65
+ raise FileNotFoundError("No .gguf files found in snapshot")
66
+ for key in preferred:
67
+ for f in ggufs:
68
+ if key in os.path.basename(f):
69
+ return f
70
+ return ggufs[0]
71
+
72
+
73
  def load_model():
74
  global _llm
75
  if _llm is not None:
76
  return _llm
77
+ repo_dir = snapshot_download(
78
+ repo_id=MODEL_REPO,
79
+ allow_patterns=["*.gguf"],
80
+ cache_dir=os.getenv("HF_HOME", "/app/hf_cache"),
81
+ local_files_only=False,
82
+ resume_download=True,
83
  )
84
+ try:
85
+ model_path = _pick_gguf_file(repo_dir, GGUF_OVERRIDE or None)
86
+ except Exception as e:
87
+ raise RuntimeError(f"GGUF not found: {e}")
88
+ try:
89
+ _llm = Llama(
90
+ model_path=model_path,
91
+ n_ctx=CTX_LEN,
92
+ n_threads=N_THREADS,
93
+ n_batch=N_BATCH,
94
+ use_mlock=False,
95
+ use_mmap=True,
96
+ verbose=False,
97
+ )
98
+ except Exception as e:
99
+ raise RuntimeError(f"Failed to load GGUF: {e}. Set GGUF_FILENAME to an available 8B file if needed.")
100
  return _llm
101
 
102