Spaces:
Sleeping
Sleeping
Update src/app.py
Browse filesremoved fine-tuned Gemma model
- src/app.py +1 -59
src/app.py
CHANGED
|
@@ -19,7 +19,6 @@ from test_integration import run_tests
|
|
| 19 |
from core.QuizEngine import QuizEngine
|
| 20 |
from core.PineconeManager import PineconeManager
|
| 21 |
from huggingface_hub import hf_hub_download
|
| 22 |
-
from llama_cpp import Llama
|
| 23 |
|
| 24 |
# --- CONFIGURATION ---
|
| 25 |
st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
|
|
@@ -158,65 +157,8 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
|
| 158 |
except Exception as e:
|
| 159 |
return f"[OpenAI Error: {e}]", None
|
| 160 |
|
| 161 |
-
|
| 162 |
-
# --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
|
| 163 |
-
elif "Custom Gemma" in model_choice:
|
| 164 |
-
import traceback # NEW: For deep debugging
|
| 165 |
-
try:
|
| 166 |
-
# 1. Download Model (Cached automatically)
|
| 167 |
-
# UPDATED: Hardcoded to your actual repo
|
| 168 |
-
repo_id = "NavyDevilDoc/navy-custom-models"
|
| 169 |
-
filename = "gemma-2-9b-it.Q4_K_M.gguf"
|
| 170 |
-
|
| 171 |
-
# Print status to console logs
|
| 172 |
-
print(f"DEBUG: Attempting to download {filename} from {repo_id}...")
|
| 173 |
-
|
| 174 |
-
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 175 |
-
print(f"DEBUG: Model found at {model_path}")
|
| 176 |
-
|
| 177 |
-
# 2. Initialize Llama (The Engine)
|
| 178 |
-
# n_ctx=8192 matches Gemma 2's window.
|
| 179 |
-
# n_threads=8 utilizes your CPU Upgrade.
|
| 180 |
-
# verbose=True lets us see C++ errors in the logs
|
| 181 |
-
llm = Llama(
|
| 182 |
-
model_path=model_path,
|
| 183 |
-
n_ctx=8192,
|
| 184 |
-
n_threads=8,
|
| 185 |
-
verbose=True
|
| 186 |
-
)
|
| 187 |
-
|
| 188 |
-
# 3. Format Prompt for Gemma 2
|
| 189 |
-
# Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
|
| 190 |
-
full_prompt = ""
|
| 191 |
-
for m in messages:
|
| 192 |
-
role = "model" if m["role"] == "assistant" else "user"
|
| 193 |
-
full_prompt += f"<start_of_turn>{role}\n{m['content']}<end_of_turn>\n"
|
| 194 |
-
full_prompt += "<start_of_turn>model\n"
|
| 195 |
-
|
| 196 |
-
# 4. Generate
|
| 197 |
-
print("DEBUG: Sending prompt to Gemma...")
|
| 198 |
-
output = llm(
|
| 199 |
-
full_prompt,
|
| 200 |
-
max_tokens=max_tokens,
|
| 201 |
-
stop=["<end_of_turn>"],
|
| 202 |
-
temperature=0.3
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
response_text = output['choices'][0]['text']
|
| 206 |
-
usage = {
|
| 207 |
-
"input": output['usage']['prompt_tokens'],
|
| 208 |
-
"output": output['usage']['completion_tokens']
|
| 209 |
-
}
|
| 210 |
-
return response_text, usage
|
| 211 |
-
|
| 212 |
-
except Exception as e:
|
| 213 |
-
# PRINT THE REAL ERROR TO THE CONSOLE
|
| 214 |
-
print("❌ GGUF CRITICAL FAILURE ❌")
|
| 215 |
-
traceback.print_exc()
|
| 216 |
-
return f"[GGUF Error: {str(e)} (Check Logs)]", None
|
| 217 |
-
|
| 218 |
|
| 219 |
-
# --- ROUTE
|
| 220 |
else:
|
| 221 |
model_map = {
|
| 222 |
"Granite 4 (IBM)": "granite4:latest",
|
|
|
|
| 19 |
from core.QuizEngine import QuizEngine
|
| 20 |
from core.PineconeManager import PineconeManager
|
| 21 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 22 |
|
| 23 |
# --- CONFIGURATION ---
|
| 24 |
st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
|
|
|
|
| 157 |
except Exception as e:
|
| 158 |
return f"[OpenAI Error: {e}]", None
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
+
# --- ROUTE 3: LOCAL/OPEN SOURCE ---
|
| 162 |
else:
|
| 163 |
model_map = {
|
| 164 |
"Granite 4 (IBM)": "granite4:latest",
|