NavyDevilDoc commited on
Commit
e01972c
·
verified ·
1 Parent(s): 60fc195

Update src/app.py

Browse files

added access to custom embedding and small language models

Files changed (1) hide show
  1. src/app.py +51 -3
src/app.py CHANGED
@@ -12,12 +12,14 @@ import doc_loader
12
  import modules.admin_panel as admin_panel
13
 
14
  from openai import OpenAI
15
- from google import genai # NEW: Google SDK
16
- from google.genai import types # NEW: Types for config
17
  from datetime import datetime
18
  from test_integration import run_tests
19
  from core.QuizEngine import QuizEngine
20
  from core.PineconeManager import PineconeManager
 
 
21
 
22
  # --- CONFIGURATION ---
23
  st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
@@ -156,7 +158,52 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
156
  except Exception as e:
157
  return f"[OpenAI Error: {e}]", None
158
 
159
- # --- ROUTE 3: LOCAL/OPEN SOURCE ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  else:
161
  model_map = {
162
  "Granite 4 (IBM)": "granite4:latest",
@@ -264,6 +311,7 @@ with st.sidebar:
264
  "Standard (All-MiniLM, 384d)": "sentence-transformers/all-MiniLM-L6-v2",
265
  "High-Perf (MPNet, 768d)": "sentence-transformers/all-mpnet-base-v2",
266
  "OpenAI Small (1536d)": "text-embedding-3-small"
 
267
  }
268
  embed_choice_label = st.selectbox("Select Embedding Model", list(embed_options.keys()))
269
  st.session_state.active_embed_model = embed_options[embed_choice_label]
 
12
  import modules.admin_panel as admin_panel
13
 
14
  from openai import OpenAI
15
+ from google import genai
16
+ from google.genai import types
17
  from datetime import datetime
18
  from test_integration import run_tests
19
  from core.QuizEngine import QuizEngine
20
  from core.PineconeManager import PineconeManager
21
+ from huggingface_hub import hf_hub_download
22
+ from llama_cpp import Llama
23
 
24
  # --- CONFIGURATION ---
25
  st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
 
158
  except Exception as e:
159
  return f"[OpenAI Error: {e}]", None
160
 
161
+ # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
162
+ elif "Custom Gemma" in model_choice:
163
+ try:
164
+ # 1. Download Model (Cached automatically)
165
+ repo_id = "NavyDevilDoc/navy-custom-models"
166
+ filename = "gemma-2-9b-it.Q4_K_M.gguf"
167
+
168
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename)
169
+
170
+ # 2. Initialize Llama (The Engine)
171
+ # n_ctx=8192 matches Gemma 2's window. n_threads=8 utilizes your CPU Upgrade.
172
+ llm = Llama(
173
+ model_path=model_path,
174
+ n_ctx=8192,
175
+ n_threads=8,
176
+ verbose=False
177
+ )
178
+
179
+ # 3. Format Prompt for Gemma 2 (It is picky about ChatML/Instruction format)
180
+ # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
181
+ full_prompt = ""
182
+ for m in messages:
183
+ role = "model" if m["role"] == "assistant" else "user"
184
+ full_prompt += f"<start_of_turn>{role}\n{m['content']}<end_of_turn>\n"
185
+ full_prompt += "<start_of_turn>model\n"
186
+
187
+ # 4. Generate
188
+ output = llm(
189
+ full_prompt,
190
+ max_tokens=max_tokens,
191
+ stop=["<end_of_turn>"],
192
+ temperature=0.3
193
+ )
194
+
195
+ response_text = output['choices'][0]['text']
196
+ usage = {
197
+ "input": output['usage']['prompt_tokens'],
198
+ "output": output['usage']['completion_tokens']
199
+ }
200
+ return response_text, usage
201
+
202
+ except Exception as e:
203
+ return f"[GGUF Error: {e}]", None
204
+
205
+
206
+ # --- ROUTE 4: LOCAL/OPEN SOURCE ---
207
  else:
208
  model_map = {
209
  "Granite 4 (IBM)": "granite4:latest",
 
311
  "Standard (All-MiniLM, 384d)": "sentence-transformers/all-MiniLM-L6-v2",
312
  "High-Perf (MPNet, 768d)": "sentence-transformers/all-mpnet-base-v2",
313
  "OpenAI Small (1536d)": "text-embedding-3-small"
314
+ "Custom Navy (BGE, 768d)": "NavyDevilDoc/navy-custom-models/bge-finetuned" # NEW
315
  }
316
  embed_choice_label = st.selectbox("Select Embedding Model", list(embed_options.keys()))
317
  st.session_state.active_embed_model = embed_options[embed_choice_label]