Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 13, 2025

Commit

8ba0387

verified ·

1 Parent(s): 1fd5385

Update src/app.py

Browse files

updated to add a rudimentary chat component

Files changed (1) hide show

src/app.py +139 -70

src/app.py CHANGED Viewed

@@ -133,13 +133,32 @@ def update_sidebar_metrics():
 # Call metrics once on load
 update_sidebar_metrics()
-def query_local_model(user_prompt, system_persona, max_tokens, model_name):
     if not API_URL_ROOT:
         return "Error: API_URL not set.", None
     url = API_URL_ROOT + "/generate"
     payload = {
-        "text": user_prompt,
         "persona": system_persona,
         "max_tokens": max_tokens,
         "model": model_name
@@ -159,7 +178,7 @@ def query_local_model(user_prompt, system_persona, max_tokens, model_name):
     except Exception as e:
         return f"Connection Error: {e}", None
-def query_gpt4o(prompt, persona, max_tokens):
     if not OPENAI_KEY:
         return "Error: OPENAI_API_KEY not set.", None
@@ -169,10 +188,7 @@ def query_gpt4o(prompt, persona, max_tokens):
         response = client.chat.completions.create(
             model="gpt-4o",
             max_tokens=max_tokens,
-            messages=[
-                {"role": "system", "content": persona},
-                {"role": "user", "content": prompt}
-            ],
             temperature=0.3
         )
         usage_obj = response.usage
@@ -192,7 +208,7 @@ def clean_text(text):
 def ask_ai(user_prompt, system_persona, max_tokens):
     if "GPT-4o" in model_choice:
-        return query_gpt4o(user_prompt, system_persona, max_tokens)
     else:
         technical_name = model_map[model_choice]
         return query_local_model(user_prompt, system_persona, max_tokens, technical_name)
@@ -268,72 +284,125 @@ with tab1:
 with tab2:
     st.header("Choose Your Model and Start a Discussion")
-    if "chat_response" not in st.session_state:
-        st.session_state.chat_response = ""
-    user_input = st.text_input("Ask a question:")
-    c1, c2 = st.columns([1,1])
     with c1:
-        use_rag = st.toggle("🔌 Enable Knowledge Base", value=False)
     with c2:
-        est_tokens = len(user_input) / 4
-        st.progress(min(est_tokens / 2000, 1.0), text=f"Input: {int(est_tokens)} tokens")
-    if st.button("Send Query"):
-        if not user_input:
-            st.warning("Please enter a question.")
-        else:
-            final_prompt = user_input
-            system_persona = "You are a helpful assistant."
-            # --- RAG LOGIC ---
-            if use_rag:
-                with st.spinner("🧠 Searching Knowledge Base..."):
-                    # 1. Retrieve & Rerank (Now using the fixed function)
-                    retrieved_docs = rag_engine.search_knowledge_base(
-                        user_input,
-                        st.session_state.username
-                    )
-                    if retrieved_docs:
-                        # 2. Format Context
-                        context_text = ""
-                        for i, doc in enumerate(retrieved_docs):
-                            # Add metadata relevance score if available
-                            score = doc.metadata.get('relevance_score', 'N/A')
-                            src = os.path.basename(doc.metadata.get('source', 'Unknown'))
-                            context_text += f"---\nSOURCE: {src} (Rel: {score})\nTEXT: {doc.page_content}\n"
-                        # 3. Update Prompt
-                        system_persona = (
-                            "You are a Navy Document Analyst. "
-                            "Your task is to answer the user's question using ONLY the Context provided below. "
-                            "Follow these rules strictly:\n"
-                            "1. If the answer is present in the Context, provide it clearly. Do NOT add any disclaimers about missing information if you found the answer.\n"
-                            "2. If the answer is NOT present in the Context, return ONLY this exact phrase: 'I cannot find that information in the provided documents.'\n\n"
-                            f"### CONTEXT:\n{context_text}"
-                        )
-                        st.success(f"Found {len(retrieved_docs)} relevant documents.")
-                        with st.expander("View Context Used"):
-                            st.text(context_text)
-                    else:
-                        st.warning("No relevant documents found. Using general knowledge.")
-            # --- GENERATION ---
-            with st.spinner(f"Thinking with {model_choice}..."):
-                reply, usage = ask_ai(final_prompt, system_persona, max_len)
-                st.session_state.chat_response = reply
-                if usage:
-                    m_name = "Granite" if "Granite" in model_choice else "GPT-4o"
-                    tracker.log_usage(m_name, usage["input"], usage["output"])
-                    update_sidebar_metrics()
-    if st.session_state.chat_response:
-        st.divider()
-        st.markdown("**AI Response:**")
-        st.write(st.session_state.chat_response)
 # --- TAB 3: PROMPT ARCHITECT ---
 with tab3:

 # Call metrics once on load
 update_sidebar_metrics()
+def query_local_model(messages, max_tokens, model_name):
     if not API_URL_ROOT:
         return "Error: API_URL not set.", None
     url = API_URL_ROOT + "/generate"
+    # --- FLATTEN MESSAGE HISTORY ---
+    # Since the backend expects a single string ("text"), we format the history here.
+    # We extract the system persona separately to pass to the 'persona' field.
+    formatted_history = ""
+    system_persona = "You are a helpful assistant." # Default
+    for msg in messages:
+        if msg['role'] == 'system':
+            system_persona = msg['content']
+        elif msg['role'] == 'user':
+            formatted_history += f"User: {msg['content']}\n"
+        elif msg['role'] == 'assistant':
+            formatted_history += f"Assistant: {msg['content']}\n"
+    # Append the "Assistant:" prompt at the end to cue the model
+    formatted_history += "Assistant: "
     payload = {
+        "text": formatted_history, # <--- History goes here
         "persona": system_persona,
         "max_tokens": max_tokens,
         "model": model_name
     except Exception as e:
         return f"Connection Error: {e}", None
+def query_openai_model(messages, max_tokens):
     if not OPENAI_KEY:
         return "Error: OPENAI_API_KEY not set.", None
         response = client.chat.completions.create(
             model="gpt-4o",
             max_tokens=max_tokens,
+            messages=messages,
             temperature=0.3
         )
         usage_obj = response.usage
 def ask_ai(user_prompt, system_persona, max_tokens):
     if "GPT-4o" in model_choice:
+        return query_local_model(user_prompt, system_persona, max_tokens)
     else:
         technical_name = model_map[model_choice]
         return query_local_model(user_prompt, system_persona, max_tokens, technical_name)
 with tab2:
     st.header("Choose Your Model and Start a Discussion")
+    # --- INITIALIZE CHAT MEMORY (MUST BE DONE FIRST) ---
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # --- CONTROLS AND METRICS ---
+    # The controls are kept outside the chat loop.
+    c1, c2, c3 = st.columns([1, 1, 1])
     with c1:
+        # Use the global model_choice from the sidebar/tab1 initialization
+        selected_model_name = st.session_state.get('model_choice', 'Granite 4 (IBM)')
     with c2:
+        use_rag = st.toggle("🔌 Enable Knowledge Base", value=False)
+    # The token progress bar will be handled inside the prompt logic based on input length
+    # --- DISPLAY CONVERSATION HISTORY ---
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # --- CHAT INPUT HANDLING (Replaces st.text_input and st.button) ---
+    if prompt := st.chat_input("Ask about Naval Systems..."):
+        # 1. Display User Message and save to history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # 2. Initialize the Payload with System Persona
+        system_persona = "You are a Navy Document Analyst. Your task is to answer the user's question using ONLY the Context provided below. If the answer is not present in the Context, return ONLY this exact phrase: 'I cannot find that information in the provided documents.' If no context is provided, answer generally."
+        # Start the message payload with the system persona
+        messages_payload = [{"role": "system", "content": system_persona}]
+        # --- MEMORY LOGIC: SLIDING WINDOW ---
+        # Get the last N messages (e.g., 6 total: 3 user + 3 assistant) for memory.
+        # We start from -7 because we need to exclude the current prompt (already added)
+        # and we want pairs of messages (user/assistant).
+        history_depth = 8 # 4 full exchanges (8 messages) + current
+        recent_history = st.session_state.messages[-(history_depth+1):-1]
+        # Add history to payload
+        messages_payload.extend(recent_history)
+        # 3. Handle RAG & Current Prompt Augmentation
+        final_user_content = prompt
+        retrieved_docs = [] # Initialize for the context display later
+        if use_rag:
+            with st.spinner("🧠 Searching Knowledge Base..."):
+                # Retrieve Docs
+                retrieved_docs = rag_engine.search_knowledge_base(
+                    prompt,
+                    st.session_state.username
+                )
+                # Format Context
+                context_text = ""
+                if retrieved_docs:
+                    for doc in retrieved_docs:
+                        score = doc.metadata.get('relevance_score', 'N/A')
+                        src = os.path.basename(doc.metadata.get('source', 'Unknown'))
+                        context_text += f"---\nSOURCE: {src} (Rel: {score})\nTEXT: {doc.page_content}\n"
+                    # Augment the FINAL prompt with RAG context
+                    final_user_content = (
+                        f"User Question: {prompt}\n\n"
+                        f"Relevant Context:\n{context_text}\n\n"
+                        "Answer the question using the context provided."
+                    )
+        # 4. Add the final (potentially augmented) user message to payload
+        messages_payload.append({"role": "user", "content": final_user_content})
+        # 5. Generate Response and Display
+        with st.chat_message("assistant"):
+            with st.spinner(f"Thinking with {selected_model_name}..."):
+                # Determine model ID and max_len (assuming these are defined globally)
+                max_len = 2000 # Example max length
+                model_id = "" # To be mapped
+                # --- MODEL MAPPING LOGIC (Use your existing global logic) ---
+                ollama_map = {
+                    "Granite 4 (IBM)": "granite4:latest",
+                    "Llama 3.2 (Meta)": "llama3.2:latest",
+                    "Gemma 3 (Google)": "gemma3:latest"
+                }
+                for key, val in ollama_map.items():
+                    if key in selected_model_name:
+                        model_id = val
+                        break
+                if not model_id and "gpt" in selected_model_name.lower():
+                    # If it's the GPT model choice
+                    response, usage = query_openai_model(messages_payload, max_len)
+                elif model_id:
+                    # If it's the local Ollama model
+                    response, usage = query_local_model(messages_payload, max_len, model_id)
+                else:
+                    response, usage = "Error: Could not determine model to use.", None
+                st.markdown(response)
+        # 6. Final Steps: Save Assistant Response and Update Metrics
+        st.session_state.messages.append({"role": "assistant", "content": response})
+        if usage:
+            m_name = "Granite" if "Granite" in selected_model_name else "GPT-4o"
+            tracker.log_usage(m_name, usage["input"], usage["output"])
+            # Assuming update_sidebar_metrics() is defined globally
+            update_sidebar_metrics()
+        # 7. Display Context Used (if RAG was enabled)
+        if use_rag and retrieved_docs:
+            with st.expander("📚 View Context Used"):
+                for i, doc in enumerate(retrieved_docs):
+                    score = doc.metadata.get('relevance_score', 'N/A')
+                    src = os.path.basename(doc.metadata.get('source', 'Unknown'))
+                    st.caption(f"Rank {i+1} (Source: {src}, Rel: {score})")
+                    st.text(doc.page_content)
+                    st.divider()
 # --- TAB 3: PROMPT ARCHITECT ---
 with tab3: