Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
|
|
| 9 |
os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
|
| 10 |
|
| 11 |
import json
|
| 12 |
-
from typing import Dict
|
| 13 |
|
| 14 |
import gradio as gr
|
| 15 |
import requests
|
|
@@ -33,27 +33,70 @@ if RAPIDAPI_KEY and RAPIDAPI_KEY.strip():
|
|
| 33 |
RAPIDAPI_HOST = "alpha-vantage.p.rapidapi.com"
|
| 34 |
|
| 35 |
# -------- llama.cpp GGUF model --------
|
| 36 |
-
MODEL_REPO = "mradermacher/Fin-o1-
|
| 37 |
-
|
| 38 |
N_THREADS = int(os.getenv("LLAMA_CPP_THREADS", str(os.cpu_count() or 4)))
|
| 39 |
-
CTX_LEN = int(os.getenv("LLAMA_CPP_CTX", "
|
|
|
|
| 40 |
|
| 41 |
-
from huggingface_hub import
|
| 42 |
from llama_cpp import Llama
|
| 43 |
|
| 44 |
_llm = None
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def load_model():
|
| 47 |
global _llm
|
| 48 |
if _llm is not None:
|
| 49 |
return _llm
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
return _llm
|
| 58 |
|
| 59 |
|
|
|
|
| 9 |
os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
|
| 10 |
|
| 11 |
import json
|
| 12 |
+
from typing import Dict, List
|
| 13 |
|
| 14 |
import gradio as gr
|
| 15 |
import requests
|
|
|
|
| 33 |
RAPIDAPI_HOST = "alpha-vantage.p.rapidapi.com"
|
| 34 |
|
| 35 |
# -------- llama.cpp GGUF model --------
|
| 36 |
+
MODEL_REPO = "mradermacher/Fin-o1-8B-GGUF"
|
| 37 |
+
GGUF_OVERRIDE = os.getenv("GGUF_FILENAME", "").strip()
|
| 38 |
N_THREADS = int(os.getenv("LLAMA_CPP_THREADS", str(os.cpu_count() or 4)))
|
| 39 |
+
CTX_LEN = int(os.getenv("LLAMA_CPP_CTX", "3072")) # CPU-friendly default
|
| 40 |
+
N_BATCH = int(os.getenv("LLAMA_CPP_BATCH", "128"))
|
| 41 |
|
| 42 |
+
from huggingface_hub import snapshot_download
|
| 43 |
from llama_cpp import Llama
|
| 44 |
|
| 45 |
_llm = None
|
| 46 |
|
| 47 |
+
|
| 48 |
+
def _pick_gguf_file(root_dir: str, override: str | None) -> str:
|
| 49 |
+
import glob
|
| 50 |
+
if override:
|
| 51 |
+
path = os.path.join(root_dir, override)
|
| 52 |
+
if os.path.isfile(path) and os.path.getsize(path) > 0:
|
| 53 |
+
return path
|
| 54 |
+
candidates = glob.glob(os.path.join(root_dir, "**", override), recursive=True)
|
| 55 |
+
for c in candidates:
|
| 56 |
+
if os.path.getsize(c) > 0:
|
| 57 |
+
return c
|
| 58 |
+
preferred: List[str] = [
|
| 59 |
+
"Fin-o1-8B.Q4_K_M.gguf", # explicit 8B file name first
|
| 60 |
+
"Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_M", "Q3_K_S", "Q3_0", "Q2_K", "Q2_0",
|
| 61 |
+
]
|
| 62 |
+
import glob as _glob
|
| 63 |
+
ggufs = _glob.glob(os.path.join(root_dir, "**", "*.gguf"), recursive=True)
|
| 64 |
+
if not ggufs:
|
| 65 |
+
raise FileNotFoundError("No .gguf files found in snapshot")
|
| 66 |
+
for key in preferred:
|
| 67 |
+
for f in ggufs:
|
| 68 |
+
if key in os.path.basename(f):
|
| 69 |
+
return f
|
| 70 |
+
return ggufs[0]
|
| 71 |
+
|
| 72 |
+
|
| 73 |
def load_model():
|
| 74 |
global _llm
|
| 75 |
if _llm is not None:
|
| 76 |
return _llm
|
| 77 |
+
repo_dir = snapshot_download(
|
| 78 |
+
repo_id=MODEL_REPO,
|
| 79 |
+
allow_patterns=["*.gguf"],
|
| 80 |
+
cache_dir=os.getenv("HF_HOME", "/app/hf_cache"),
|
| 81 |
+
local_files_only=False,
|
| 82 |
+
resume_download=True,
|
| 83 |
)
|
| 84 |
+
try:
|
| 85 |
+
model_path = _pick_gguf_file(repo_dir, GGUF_OVERRIDE or None)
|
| 86 |
+
except Exception as e:
|
| 87 |
+
raise RuntimeError(f"GGUF not found: {e}")
|
| 88 |
+
try:
|
| 89 |
+
_llm = Llama(
|
| 90 |
+
model_path=model_path,
|
| 91 |
+
n_ctx=CTX_LEN,
|
| 92 |
+
n_threads=N_THREADS,
|
| 93 |
+
n_batch=N_BATCH,
|
| 94 |
+
use_mlock=False,
|
| 95 |
+
use_mmap=True,
|
| 96 |
+
verbose=False,
|
| 97 |
+
)
|
| 98 |
+
except Exception as e:
|
| 99 |
+
raise RuntimeError(f"Failed to load GGUF: {e}. Set GGUF_FILENAME to an available 8B file if needed.")
|
| 100 |
return _llm
|
| 101 |
|
| 102 |
|