NavyDevilDoc commited on
Commit
0c28548
·
verified ·
1 Parent(s): e395dab

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +17 -4
src/app.py CHANGED
@@ -158,25 +158,34 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
158
  except Exception as e:
159
  return f"[OpenAI Error: {e}]", None
160
 
 
161
  # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
162
  elif "Custom Gemma" in model_choice:
 
163
  try:
164
  # 1. Download Model (Cached automatically)
 
165
  repo_id = "NavyDevilDoc/navy-custom-models"
166
  filename = "gemma-2-9b-it.Q4_K_M.gguf"
167
 
 
 
 
168
  model_path = hf_hub_download(repo_id=repo_id, filename=filename)
 
169
 
170
  # 2. Initialize Llama (The Engine)
171
- # n_ctx=8192 matches Gemma 2's window. n_threads=8 utilizes your CPU Upgrade.
 
 
172
  llm = Llama(
173
  model_path=model_path,
174
  n_ctx=8192,
175
  n_threads=8,
176
- verbose=False
177
  )
178
 
179
- # 3. Format Prompt for Gemma 2 (It is picky about ChatML/Instruction format)
180
  # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
181
  full_prompt = ""
182
  for m in messages:
@@ -185,6 +194,7 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
185
  full_prompt += "<start_of_turn>model\n"
186
 
187
  # 4. Generate
 
188
  output = llm(
189
  full_prompt,
190
  max_tokens=max_tokens,
@@ -200,7 +210,10 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
200
  return response_text, usage
201
 
202
  except Exception as e:
203
- return f"[GGUF Error: {e}]", None
 
 
 
204
 
205
 
206
  # --- ROUTE 4: LOCAL/OPEN SOURCE ---
 
158
  except Exception as e:
159
  return f"[OpenAI Error: {e}]", None
160
 
161
+
162
  # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
163
  elif "Custom Gemma" in model_choice:
164
+ import traceback # NEW: For deep debugging
165
  try:
166
  # 1. Download Model (Cached automatically)
167
+ # UPDATED: Hardcoded to your actual repo
168
  repo_id = "NavyDevilDoc/navy-custom-models"
169
  filename = "gemma-2-9b-it.Q4_K_M.gguf"
170
 
171
+ # Print status to console logs
172
+ print(f"DEBUG: Attempting to download {filename} from {repo_id}...")
173
+
174
  model_path = hf_hub_download(repo_id=repo_id, filename=filename)
175
+ print(f"DEBUG: Model found at {model_path}")
176
 
177
  # 2. Initialize Llama (The Engine)
178
+ # n_ctx=8192 matches Gemma 2's window.
179
+ # n_threads=8 utilizes your CPU Upgrade.
180
+ # verbose=True lets us see C++ errors in the logs
181
  llm = Llama(
182
  model_path=model_path,
183
  n_ctx=8192,
184
  n_threads=8,
185
+ verbose=True
186
  )
187
 
188
+ # 3. Format Prompt for Gemma 2
189
  # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
190
  full_prompt = ""
191
  for m in messages:
 
194
  full_prompt += "<start_of_turn>model\n"
195
 
196
  # 4. Generate
197
+ print("DEBUG: Sending prompt to Gemma...")
198
  output = llm(
199
  full_prompt,
200
  max_tokens=max_tokens,
 
210
  return response_text, usage
211
 
212
  except Exception as e:
213
+ # PRINT THE REAL ERROR TO THE CONSOLE
214
+ print("❌ GGUF CRITICAL FAILURE ❌")
215
+ traceback.print_exc()
216
+ return f"[GGUF Error: {str(e)} (Check Logs)]", None
217
 
218
 
219
  # --- ROUTE 4: LOCAL/OPEN SOURCE ---