Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Jan 21

Commit

0c28548

verified ·

1 Parent(s): e395dab

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +17 -4

src/app.py CHANGED Viewed

@@ -158,25 +158,34 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
         except Exception as e:
             return f"[OpenAI Error: {e}]", None
     # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
     elif "Custom Gemma" in model_choice:
         try:
             # 1. Download Model (Cached automatically)
             repo_id = "NavyDevilDoc/navy-custom-models"
             filename = "gemma-2-9b-it.Q4_K_M.gguf"
             model_path = hf_hub_download(repo_id=repo_id, filename=filename)
             # 2. Initialize Llama (The Engine)
-            # n_ctx=8192 matches Gemma 2's window. n_threads=8 utilizes your CPU Upgrade.
             llm = Llama(
                 model_path=model_path,
                 n_ctx=8192,
                 n_threads=8,
-                verbose=False
             )
-            # 3. Format Prompt for Gemma 2 (It is picky about ChatML/Instruction format)
             # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
             full_prompt = ""
             for m in messages:
@@ -185,6 +194,7 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
             full_prompt += "<start_of_turn>model\n"
             # 4. Generate
             output = llm(
                 full_prompt,
                 max_tokens=max_tokens,
@@ -200,7 +210,10 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
             return response_text, usage
         except Exception as e:
-            return f"[GGUF Error: {e}]", None
     # --- ROUTE 4: LOCAL/OPEN SOURCE ---

         except Exception as e:
             return f"[OpenAI Error: {e}]", None
     # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
     elif "Custom Gemma" in model_choice:
+        import traceback # NEW: For deep debugging
         try:
             # 1. Download Model (Cached automatically)
+            # UPDATED: Hardcoded to your actual repo
             repo_id = "NavyDevilDoc/navy-custom-models"
             filename = "gemma-2-9b-it.Q4_K_M.gguf"
+            # Print status to console logs
+            print(f"DEBUG: Attempting to download {filename} from {repo_id}...")
             model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+            print(f"DEBUG: Model found at {model_path}")
             # 2. Initialize Llama (The Engine)
+            # n_ctx=8192 matches Gemma 2's window.
+            # n_threads=8 utilizes your CPU Upgrade.
+            # verbose=True lets us see C++ errors in the logs
             llm = Llama(
                 model_path=model_path,
                 n_ctx=8192,
                 n_threads=8,
+                verbose=True
             )
+            # 3. Format Prompt for Gemma 2
             # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
             full_prompt = ""
             for m in messages:
             full_prompt += "<start_of_turn>model\n"
             # 4. Generate
+            print("DEBUG: Sending prompt to Gemma...")
             output = llm(
                 full_prompt,
                 max_tokens=max_tokens,
             return response_text, usage
         except Exception as e:
+            # PRINT THE REAL ERROR TO THE CONSOLE
+            print("❌ GGUF CRITICAL FAILURE ❌")
+            traceback.print_exc()
+            return f"[GGUF Error: {str(e)} (Check Logs)]", None
     # --- ROUTE 4: LOCAL/OPEN SOURCE ---