NavyDevilDoc commited on
Commit
8b6ed10
·
verified ·
1 Parent(s): d879271

Update src/app.py

Browse files

removed fine-tuned Gemma model

Files changed (1) hide show
  1. src/app.py +1 -59
src/app.py CHANGED
@@ -19,7 +19,6 @@ from test_integration import run_tests
19
  from core.QuizEngine import QuizEngine
20
  from core.PineconeManager import PineconeManager
21
  from huggingface_hub import hf_hub_download
22
- from llama_cpp import Llama
23
 
24
  # --- CONFIGURATION ---
25
  st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
@@ -158,65 +157,8 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
158
  except Exception as e:
159
  return f"[OpenAI Error: {e}]", None
160
 
161
-
162
- # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
163
- elif "Custom Gemma" in model_choice:
164
- import traceback # NEW: For deep debugging
165
- try:
166
- # 1. Download Model (Cached automatically)
167
- # UPDATED: Hardcoded to your actual repo
168
- repo_id = "NavyDevilDoc/navy-custom-models"
169
- filename = "gemma-2-9b-it.Q4_K_M.gguf"
170
-
171
- # Print status to console logs
172
- print(f"DEBUG: Attempting to download {filename} from {repo_id}...")
173
-
174
- model_path = hf_hub_download(repo_id=repo_id, filename=filename)
175
- print(f"DEBUG: Model found at {model_path}")
176
-
177
- # 2. Initialize Llama (The Engine)
178
- # n_ctx=8192 matches Gemma 2's window.
179
- # n_threads=8 utilizes your CPU Upgrade.
180
- # verbose=True lets us see C++ errors in the logs
181
- llm = Llama(
182
- model_path=model_path,
183
- n_ctx=8192,
184
- n_threads=8,
185
- verbose=True
186
- )
187
-
188
- # 3. Format Prompt for Gemma 2
189
- # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
190
- full_prompt = ""
191
- for m in messages:
192
- role = "model" if m["role"] == "assistant" else "user"
193
- full_prompt += f"<start_of_turn>{role}\n{m['content']}<end_of_turn>\n"
194
- full_prompt += "<start_of_turn>model\n"
195
-
196
- # 4. Generate
197
- print("DEBUG: Sending prompt to Gemma...")
198
- output = llm(
199
- full_prompt,
200
- max_tokens=max_tokens,
201
- stop=["<end_of_turn>"],
202
- temperature=0.3
203
- )
204
-
205
- response_text = output['choices'][0]['text']
206
- usage = {
207
- "input": output['usage']['prompt_tokens'],
208
- "output": output['usage']['completion_tokens']
209
- }
210
- return response_text, usage
211
-
212
- except Exception as e:
213
- # PRINT THE REAL ERROR TO THE CONSOLE
214
- print("❌ GGUF CRITICAL FAILURE ❌")
215
- traceback.print_exc()
216
- return f"[GGUF Error: {str(e)} (Check Logs)]", None
217
-
218
 
219
- # --- ROUTE 4: LOCAL/OPEN SOURCE ---
220
  else:
221
  model_map = {
222
  "Granite 4 (IBM)": "granite4:latest",
 
19
  from core.QuizEngine import QuizEngine
20
  from core.PineconeManager import PineconeManager
21
  from huggingface_hub import hf_hub_download
 
22
 
23
  # --- CONFIGURATION ---
24
  st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
 
157
  except Exception as e:
158
  return f"[OpenAI Error: {e}]", None
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ # --- ROUTE 3: LOCAL/OPEN SOURCE ---
162
  else:
163
  model_map = {
164
  "Granite 4 (IBM)": "granite4:latest",