Spaces:
Sleeping
Sleeping
Update src/app.py
Browse files- src/app.py +17 -4
src/app.py
CHANGED
|
@@ -158,25 +158,34 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
|
| 158 |
except Exception as e:
|
| 159 |
return f"[OpenAI Error: {e}]", None
|
| 160 |
|
|
|
|
| 161 |
# --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
|
| 162 |
elif "Custom Gemma" in model_choice:
|
|
|
|
| 163 |
try:
|
| 164 |
# 1. Download Model (Cached automatically)
|
|
|
|
| 165 |
repo_id = "NavyDevilDoc/navy-custom-models"
|
| 166 |
filename = "gemma-2-9b-it.Q4_K_M.gguf"
|
| 167 |
|
|
|
|
|
|
|
|
|
|
| 168 |
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
|
|
|
| 169 |
|
| 170 |
# 2. Initialize Llama (The Engine)
|
| 171 |
-
# n_ctx=8192 matches Gemma 2's window.
|
|
|
|
|
|
|
| 172 |
llm = Llama(
|
| 173 |
model_path=model_path,
|
| 174 |
n_ctx=8192,
|
| 175 |
n_threads=8,
|
| 176 |
-
verbose=
|
| 177 |
)
|
| 178 |
|
| 179 |
-
# 3. Format Prompt for Gemma 2
|
| 180 |
# Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
|
| 181 |
full_prompt = ""
|
| 182 |
for m in messages:
|
|
@@ -185,6 +194,7 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
|
| 185 |
full_prompt += "<start_of_turn>model\n"
|
| 186 |
|
| 187 |
# 4. Generate
|
|
|
|
| 188 |
output = llm(
|
| 189 |
full_prompt,
|
| 190 |
max_tokens=max_tokens,
|
|
@@ -200,7 +210,10 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
|
| 200 |
return response_text, usage
|
| 201 |
|
| 202 |
except Exception as e:
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
|
| 206 |
# --- ROUTE 4: LOCAL/OPEN SOURCE ---
|
|
|
|
| 158 |
except Exception as e:
|
| 159 |
return f"[OpenAI Error: {e}]", None
|
| 160 |
|
| 161 |
+
|
| 162 |
# --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
|
| 163 |
elif "Custom Gemma" in model_choice:
|
| 164 |
+
import traceback # NEW: For deep debugging
|
| 165 |
try:
|
| 166 |
# 1. Download Model (Cached automatically)
|
| 167 |
+
# UPDATED: Hardcoded to your actual repo
|
| 168 |
repo_id = "NavyDevilDoc/navy-custom-models"
|
| 169 |
filename = "gemma-2-9b-it.Q4_K_M.gguf"
|
| 170 |
|
| 171 |
+
# Print status to console logs
|
| 172 |
+
print(f"DEBUG: Attempting to download {filename} from {repo_id}...")
|
| 173 |
+
|
| 174 |
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 175 |
+
print(f"DEBUG: Model found at {model_path}")
|
| 176 |
|
| 177 |
# 2. Initialize Llama (The Engine)
|
| 178 |
+
# n_ctx=8192 matches Gemma 2's window.
|
| 179 |
+
# n_threads=8 utilizes your CPU Upgrade.
|
| 180 |
+
# verbose=True lets us see C++ errors in the logs
|
| 181 |
llm = Llama(
|
| 182 |
model_path=model_path,
|
| 183 |
n_ctx=8192,
|
| 184 |
n_threads=8,
|
| 185 |
+
verbose=True
|
| 186 |
)
|
| 187 |
|
| 188 |
+
# 3. Format Prompt for Gemma 2
|
| 189 |
# Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
|
| 190 |
full_prompt = ""
|
| 191 |
for m in messages:
|
|
|
|
| 194 |
full_prompt += "<start_of_turn>model\n"
|
| 195 |
|
| 196 |
# 4. Generate
|
| 197 |
+
print("DEBUG: Sending prompt to Gemma...")
|
| 198 |
output = llm(
|
| 199 |
full_prompt,
|
| 200 |
max_tokens=max_tokens,
|
|
|
|
| 210 |
return response_text, usage
|
| 211 |
|
| 212 |
except Exception as e:
|
| 213 |
+
# PRINT THE REAL ERROR TO THE CONSOLE
|
| 214 |
+
print("❌ GGUF CRITICAL FAILURE ❌")
|
| 215 |
+
traceback.print_exc()
|
| 216 |
+
return f"[GGUF Error: {str(e)} (Check Logs)]", None
|
| 217 |
|
| 218 |
|
| 219 |
# --- ROUTE 4: LOCAL/OPEN SOURCE ---
|