Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,6 +26,8 @@ LOG_FILE = os.getenv("LOG_FILE", os.path.join(APP_DIR, "startup.log"))
|
|
| 26 |
CPU_THREADS = int(os.getenv("CPU_THREADS", "8"))
|
| 27 |
LLAMA_CTX = int(os.getenv("LLAMA_CTX", "12288"))
|
| 28 |
LLAMA_MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "8192"))
|
|
|
|
|
|
|
| 29 |
JINA_API_KEY = os.getenv("JINA_API_KEY", "")
|
| 30 |
JINA_EMBED_MODEL = os.getenv("JINA_EMBED_MODEL", "jina-embeddings-v3")
|
| 31 |
RAG_INDEX_FILE = os.getenv("RAG_INDEX_FILE", os.path.join(APP_DIR, "rag_index.json"))
|
|
@@ -225,7 +227,12 @@ def log(msg):
|
|
| 225 |
def start_server():
|
| 226 |
global multimodal_ready
|
| 227 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 228 |
-
log(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
log("Descargando modelo para inferencia CPU-only...")
|
| 230 |
try:
|
| 231 |
m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
|
|
@@ -262,8 +269,8 @@ def start_server():
|
|
| 262 |
"-t", str(CPU_THREADS),
|
| 263 |
"-tb", str(CPU_THREADS),
|
| 264 |
"-np", "1",
|
| 265 |
-
"-b",
|
| 266 |
-
"-ub",
|
| 267 |
"--threads-http", "2",
|
| 268 |
"--fit", "off",
|
| 269 |
"--no-mmap",
|
|
|
|
| 26 |
CPU_THREADS = int(os.getenv("CPU_THREADS", "8"))
|
| 27 |
LLAMA_CTX = int(os.getenv("LLAMA_CTX", "12288"))
|
| 28 |
LLAMA_MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "8192"))
|
| 29 |
+
LLAMA_BATCH = int(os.getenv("LLAMA_BATCH", "512"))
|
| 30 |
+
LLAMA_UBATCH = int(os.getenv("LLAMA_UBATCH", "512"))
|
| 31 |
JINA_API_KEY = os.getenv("JINA_API_KEY", "")
|
| 32 |
JINA_EMBED_MODEL = os.getenv("JINA_EMBED_MODEL", "jina-embeddings-v3")
|
| 33 |
RAG_INDEX_FILE = os.getenv("RAG_INDEX_FILE", os.path.join(APP_DIR, "rag_index.json"))
|
|
|
|
| 227 |
def start_server():
|
| 228 |
global multimodal_ready
|
| 229 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 230 |
+
log(
|
| 231 |
+
"Configuraci贸n: "
|
| 232 |
+
f"CPU_THREADS={CPU_THREADS}, LLAMA_CTX={LLAMA_CTX}, "
|
| 233 |
+
f"LLAMA_MAX_TOKENS={LLAMA_MAX_TOKENS}, LLAMA_BATCH={LLAMA_BATCH}, "
|
| 234 |
+
f"LLAMA_UBATCH={LLAMA_UBATCH}"
|
| 235 |
+
)
|
| 236 |
log("Descargando modelo para inferencia CPU-only...")
|
| 237 |
try:
|
| 238 |
m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
|
|
|
|
| 269 |
"-t", str(CPU_THREADS),
|
| 270 |
"-tb", str(CPU_THREADS),
|
| 271 |
"-np", "1",
|
| 272 |
+
"-b", str(LLAMA_BATCH),
|
| 273 |
+
"-ub", str(LLAMA_UBATCH),
|
| 274 |
"--threads-http", "2",
|
| 275 |
"--fit", "off",
|
| 276 |
"--no-mmap",
|