Spaces:

studzinsky
/

bielik_app_service

Sleeping

Patryk Studzinski commited on Dec 29, 2025

Commit

cdff838

1 Parent(s): baa08b7

enhance error handling in LlamaCppModel initialization; include full traceback on failure

Files changed (2) hide show

app/models/llama_cpp_model.py CHANGED Viewed

@@ -5,6 +5,7 @@ Highly optimized for CPU inference.
 import os
 import asyncio
 from typing import List, Dict, Any, Optional
 from app.models.base_llm import BaseLLM
@@ -58,8 +59,11 @@ class LlamaCppModel(BaseLLM):
             print(f"[{self.name}] GGUF Model loaded successfully (n_ctx={self.n_ctx})")
         except Exception as e:
-            print(f"[{self.name}] Failed to load GGUF model: {e}")
-            raise
     async def generate(
         self,

 import os
 import asyncio
+import traceback
 from typing import List, Dict, Any, Optional
 from app.models.base_llm import BaseLLM
             print(f"[{self.name}] GGUF Model loaded successfully (n_ctx={self.n_ctx})")
         except Exception as e:
+            error_msg = str(e) if str(e) else repr(e)
+            print(f"[{self.name}] Failed to load GGUF model: {error_msg}")
+            print(f"[{self.name}] Full traceback:")
+            traceback.print_exc()
+            raise RuntimeError(f"Failed to load GGUF model: {error_msg}") from e
     async def generate(
         self,

app/models/registry.py CHANGED Viewed

@@ -23,10 +23,11 @@ MODEL_CONFIG = {
         "size": "1.5B",
     },
     "bielik-1.5b-gguf": {
-    "id": "speakleash/Bielik-1.5B-v3.0-Instruct-GGUF",
-    "filename": "Bielik-1.5B-v3.0-Instruct.Q8_0.gguf",
-    "type": "gguf",
-    "size": "1.7 GB",
     },
     "qwen2.5-3b": {
         "id": "Qwen/Qwen2.5-3B-Instruct",

         "size": "1.5B",
     },
     "bielik-1.5b-gguf": {
+        "id": "speakleash/Bielik-1.5B-v3.0-Instruct-GGUF",
+        "local_path": "bielik-1.5b-gguf",
+        "filename": "Bielik-1.5B-v3.0-Instruct.Q8_0.gguf",
+        "type": "gguf",
+        "size": "1.7 GB",
     },
     "qwen2.5-3b": {
         "id": "Qwen/Qwen2.5-3B-Instruct",