Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Jan 21

Commit

8b6ed10

verified ·

1 Parent(s): d879271

Update src/app.py

Browse files

removed fine-tuned Gemma model

Files changed (1) hide show

src/app.py +1 -59

src/app.py CHANGED Viewed

@@ -19,7 +19,6 @@ from test_integration import run_tests
 from core.QuizEngine import QuizEngine
 from core.PineconeManager import PineconeManager
 from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
 # --- CONFIGURATION ---
 st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
@@ -158,65 +157,8 @@ def query_model_universal(messages, max_tokens, model_choice, user_key=None):
         except Exception as e:
             return f"[OpenAI Error: {e}]", None
-    # --- ROUTE 3: CUSTOM LOCAL GGUF (Gemma 2) ---
-    elif "Custom Gemma" in model_choice:
-        import traceback # NEW: For deep debugging
-        try:
-            # 1. Download Model (Cached automatically)
-            # UPDATED: Hardcoded to your actual repo
-            repo_id = "NavyDevilDoc/navy-custom-models"
-            filename = "gemma-2-9b-it.Q4_K_M.gguf"
-            # Print status to console logs
-            print(f"DEBUG: Attempting to download {filename} from {repo_id}...")
-            model_path = hf_hub_download(repo_id=repo_id, filename=filename)
-            print(f"DEBUG: Model found at {model_path}")
-            # 2. Initialize Llama (The Engine)
-            # n_ctx=8192 matches Gemma 2's window.
-            # n_threads=8 utilizes your CPU Upgrade.
-            # verbose=True lets us see C++ errors in the logs
-            llm = Llama(
-                model_path=model_path,
-                n_ctx=8192,
-                n_threads=8,
-                verbose=True
-            )
-            # 3. Format Prompt for Gemma 2
-            # Gemma 2 format: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n
-            full_prompt = ""
-            for m in messages:
-                role = "model" if m["role"] == "assistant" else "user"
-                full_prompt += f"<start_of_turn>{role}\n{m['content']}<end_of_turn>\n"
-            full_prompt += "<start_of_turn>model\n"
-            # 4. Generate
-            print("DEBUG: Sending prompt to Gemma...")
-            output = llm(
-                full_prompt,
-                max_tokens=max_tokens,
-                stop=["<end_of_turn>"],
-                temperature=0.3
-            )
-            response_text = output['choices'][0]['text']
-            usage = {
-                "input": output['usage']['prompt_tokens'],
-                "output": output['usage']['completion_tokens']
-            }
-            return response_text, usage
-        except Exception as e:
-            # PRINT THE REAL ERROR TO THE CONSOLE
-            print("❌ GGUF CRITICAL FAILURE ❌")
-            traceback.print_exc()
-            return f"[GGUF Error: {str(e)} (Check Logs)]", None
-    # --- ROUTE 4: LOCAL/OPEN SOURCE ---
     else:
         model_map = {
             "Granite 4 (IBM)": "granite4:latest",

 from core.QuizEngine import QuizEngine
 from core.PineconeManager import PineconeManager
 from huggingface_hub import hf_hub_download
 # --- CONFIGURATION ---
 st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
         except Exception as e:
             return f"[OpenAI Error: {e}]", None
+    # --- ROUTE 3: LOCAL/OPEN SOURCE ---
     else:
         model_map = {
             "Granite 4 (IBM)": "granite4:latest",