File size: 19,377 Bytes
024bf35
 
 
6d20f65
 
fbcf37a
6d20f65
024bf35
6d20f65
 
024bf35
 
fbcf37a
024bf35
 
 
 
6d20f65
 
024bf35
 
 
 
 
6d20f65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
024bf35
 
 
6d20f65
 
bff8b24
 
 
 
024bf35
6d20f65
024bf35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d20f65
 
024bf35
6d20f65
024bf35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d20f65
 
024bf35
6d20f65
024bf35
 
 
6d20f65
 
024bf35
6d20f65
4f42001
6d20f65
4f42001
6d20f65
4f42001
 
6d20f65
 
4f42001
6d20f65
 
4f42001
6d20f65
4f42001
ec56e85
6d20f65
ec56e85
6d20f65
 
4f42001
6d20f65
024bf35
6d20f65
 
024bf35
 
 
6d20f65
 
024bf35
fbcf37a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
024bf35
 
6d20f65
 
 
024bf35
6d20f65
024bf35
6d20f65
 
024bf35
6d20f65
 
 
024bf35
6d20f65
 
9938e85
6d20f65
f3084a5
6d20f65
 
 
 
161831e
 
f3084a5
 
161831e
6d20f65
 
161831e
 
 
6d20f65
161831e
 
 
 
 
f3084a5
 
161831e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3084a5
6d20f65
f3084a5
6d20f65
 
 
f3084a5
6d20f65
 
f3084a5
6d20f65
 
 
 
 
 
f3084a5
6d20f65
f3084a5
6d20f65
 
 
 
 
024bf35
6d20f65
024bf35
6d20f65
024bf35
6d20f65
 
 
024bf35
6d20f65
 
 
184448e
6d20f65
 
184448e
c174894
6d20f65
c174894
 
7f02724
 
c174894
 
 
6d20f65
 
7f02724
 
6d20f65
7f02724
 
 
 
6d20f65
 
 
 
 
7f02724
73fcb96
6d20f65
 
 
c174894
 
 
6d20f65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c174894
 
 
 
6043f3a
 
 
 
6d20f65
 
 
6043f3a
 
6d20f65
 
 
 
 
 
 
 
6043f3a
6d20f65
 
 
6043f3a
6d20f65
 
 
 
 
 
 
 
6043f3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
024bf35
6d20f65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
import streamlit as st
import requests
import os
import re
import io
import contextlib
import zipfile
import tracker
import rag_engine
import doc_loader 
from openai import OpenAI
from datetime import datetime
from test_integration import run_tests

# --- CONFIGURATION ---
st.set_page_config(page_title="Navy AI Toolkit", page_icon="βš“", layout="wide")

API_URL_ROOT = os.getenv("API_URL") 
OPENAI_KEY = os.getenv("OPENAI_API_KEY") 

# --- INITIALIZATION ---
if "roles" not in st.session_state:
    st.session_state.roles = []

# --- FLATTENER LOGIC (Integrated) ---
class OutlineProcessor:
    """Parses text outlines for the Flattener tool."""
    def __init__(self, file_content):
        self.raw_lines = file_content.split('\n')
    
    def _is_list_item(self, line):
        pattern = r"^\s*(\d+\.|[a-zA-Z]\.|-|\*)\s+"
        return bool(re.match(pattern, line))

    def _merge_multiline_items(self):
        merged_lines = []
        for line in self.raw_lines:
            stripped = line.strip()
            if not stripped: continue
            if not merged_lines:
                merged_lines.append(line)
                continue
            if not self._is_list_item(line):
                merged_lines[-1] = merged_lines[-1].rstrip() + " " + stripped
            else:
                merged_lines.append(line)
        return merged_lines

    def parse(self):
        clean_lines = self._merge_multiline_items()
        stack = []
        results = []
        for line in clean_lines:
            stripped = line.strip()
            indent = len(line) - len(line.lstrip())
            while stack and stack[-1]['indent'] >= indent:
                stack.pop()
            stack.append({'indent': indent, 'text': stripped})
            if len(stack) > 1:
                context_str = " > ".join([item['text'] for item in stack[:-1]])
            else:
                context_str = "ROOT"
            results.append({"context": context_str, "target": stripped})
        return results

# --- HELPER FUNCTIONS ---
def query_model_universal(messages, max_tokens, model_choice, user_key=None):
    """Unified router for both Chat and Tools."""
    # 1. OpenAI Path
    if "GPT-4o" in model_choice:
        key = user_key if user_key else OPENAI_KEY
        if not key: return "[Error: No OpenAI API Key]", None
        
        client = OpenAI(api_key=key)
        try:
            resp = client.chat.completions.create(
                model="gpt-4o", max_tokens=max_tokens, messages=messages, temperature=0.3
            )
            usage = {"input": resp.usage.prompt_tokens, "output": resp.usage.completion_tokens}
            return resp.choices[0].message.content, usage
        except Exception as e:
            return f"[OpenAI Error: {e}]", None

    # 2. Local Path
    else:
        model_map = {
            "Granite 4 (IBM)": "granite4:latest",
            "Llama 3.2 (Meta)": "llama3.2:latest",
            "Gemma 3 (Google)": "gemma3:latest"
        }
        tech_name = model_map.get(model_choice)
        if not tech_name: return "[Error: Model Map Failed]", None
        
        url = f"{API_URL_ROOT}/generate"
        
        # Flatten history for Ollama
        hist = ""
        sys_msg = "You are a helpful assistant."
        for m in messages:
            if m['role']=='system': sys_msg = m['content']
            elif m['role']=='user': hist += f"User: {m['content']}\n"
            elif m['role']=='assistant': hist += f"Assistant: {m['content']}\n"
        hist += "Assistant: "
        
        try:
            r = requests.post(url, json={"text": hist, "persona": sys_msg, "max_tokens": max_tokens, "model": tech_name}, timeout=300)
            if r.status_code == 200:
                d = r.json()
                return d.get("response", ""), d.get("usage", {"input":0,"output":0})
            return f"[Local Error {r.status_code}]", None
        except Exception as e:
            return f"[Conn Error: {e}]", None

def update_sidebar_metrics():
    # Helper to safely update metrics if placeholder exists
    if metric_placeholder:
        stats = tracker.get_daily_stats()
        u_stats = stats["users"].get(st.session_state.username, {"input":0, "output":0})
        metric_placeholder.metric("My Tokens Today", u_stats["input"] + u_stats["output"])

# --- LOGIN ---
if "authentication_status" not in st.session_state or st.session_state["authentication_status"] is None:
    login_tab, register_tab = st.tabs(["πŸ”‘ Login", "πŸ“ Register"])
    with login_tab:
        if tracker.check_login():
            # Session Isolation Logic
            if "last_user" in st.session_state and st.session_state.last_user != st.session_state.username:
                st.session_state.messages = []
                st.session_state.user_openai_key = None
            st.session_state.last_user = st.session_state.username
            tracker.download_user_db(st.session_state.username)
            st.rerun()
    with register_tab:
        st.header("Create Account")
        with st.form("reg_form"):
            new_user = st.text_input("Username")
            new_name = st.text_input("Display Name")
            new_email = st.text_input("Email")
            new_pwd = st.text_input("Password", type="password")
            invite = st.text_input("Invitation Passcode") 
            
            if st.form_submit_button("Register"):
                success, msg = tracker.register_user(new_email, new_user, new_name, new_pwd, invite)
                if success:
                    st.success(msg)
                else:
                    st.error(msg)
                    
    if not st.session_state.get("authentication_status"): st.stop()

# --- SIDEBAR ---
metric_placeholder = None
with st.sidebar:
    st.header("πŸ‘€ User Profile")
    st.write(f"Welcome, **{st.session_state.name}**")
    
    st.header("πŸ“Š Usage Tracker")
    metric_placeholder = st.empty()
    
    # Admin Tools
    if "admin" in st.session_state.roles:
        st.divider()
        st.header("πŸ›‘οΈ Admin Tools")
        log_path = tracker.get_log_path()
        if log_path.exists():
            with open(log_path, "r") as f:
                log_data = f.read()
            st.download_button(
                label="πŸ“₯ Download Usage Logs",
                data=log_data,
                file_name=f"usage_log_{datetime.now().strftime('%Y-%m-%d')}.json",
                mime="application/json"
            )
    
    st.divider()
    
    # Model Selector
    st.header("🧠 Intelligence")
    model_map = {
        "Granite 4 (IBM)": "granite4:latest", 
        "Llama 3.2 (Meta)": "llama3.2:latest",
        "Gemma 3 (Google)": "gemma3:latest"
    }
    opts = list(model_map.keys())
    model_captions = ["Slower, free, private" for _ in opts]
    
    # Vision Key Input (User or Admin)
    is_admin = "admin" in st.session_state.roles
    user_key = None
    if not is_admin:
        user_key = st.text_input(
            "πŸ”“ Unlock GPT-4o (Enter API Key)", 
            type="password", 
            key=f"key_{st.session_state.username}",
            help="Required for Vision Mode and GPT-4o."
        )
        if user_key: 
            st.session_state.user_openai_key = user_key
            st.caption("βœ… Key Active")
        else: 
            st.session_state.user_openai_key = None
    else:
        # Admin defaults to system key, but we ensure state is clean
        st.session_state.user_openai_key = None
    
    # Unlock GPT-4o option
    if is_admin or st.session_state.get("user_openai_key"):
        opts.append("GPT-4o (Omni)")
        model_captions.append("Fast, smart, sends data to OpenAI")
        
    model_choice = st.radio("Select Model:", opts, captions=model_captions, key="model_selector_radio")
    st.info(f"Connected to: **{model_choice}**")
    
    st.divider()
    if st.session_state.authenticator:
        st.session_state.authenticator.logout(location='sidebar')

    st.divider()
    st.subheader("πŸ”§ System Diagnostics")
    
    if st.button("Run Integration Test"):
        with st.spinner("Running diagnostics..."):
            # Create a buffer to capture the text that would normally be printed
            f = io.StringIO()
            
            # Redirect 'print' statements to our buffer instead of the console
            try:
                with contextlib.redirect_stdout(f):
                    run_tests()
                
                # Display the result in a code block for easy reading
                st.success("Tests Completed")
                st.code(f.getvalue(), language="text")
                
            except Exception as e:
                st.error(f"Test Execution Failed: {e}")

update_sidebar_metrics()

# --- MAIN APP ---
st.title("βš“ Navy AI Toolkit")
tab1, tab2 = st.tabs(["πŸ’¬ Chat Playground", "πŸ“‚ Knowledge & Tools"])

# === TAB 1: CHAT ===
with tab1:
    st.header("Discussion & Analysis")
    if "messages" not in st.session_state: st.session_state.messages = []
    
    c1, c2 = st.columns([3, 1])
    with c1: st.caption(f"Active Model: **{st.session_state.get('model_selector_radio', 'Granite')}**")
    with c2: use_rag = st.toggle("Enable Knowledge Base", value=False)
    
    for msg in st.session_state.messages:
        with st.chat_message(msg["role"]): st.markdown(msg["content"])
        
    if prompt := st.chat_input("Input command..."):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"): st.markdown(prompt)
        
        # RAG Search
        context_txt = ""
        # 1. Default System Prompt (No RAG)
        sys_p = "You are a helpful AI assistant."
        
        if use_rag:
            with st.spinner("Searching Knowledge Base..."):
                docs = rag_engine.search_knowledge_base(prompt, st.session_state.username)
                if docs:
                    # 2. Strict System Prompt (With RAG)
                    # We relax the strictness slightly to allow for inference, 
                    # while still demanding evidence.
                    sys_p = (
                        "You are a Navy Document Analyst. "
                        "You must answer the user's question based PRIMARILY on the provided Context. "
                        "If the Context contains the answer, output it clearly. "
                        "If the Context does NOT contain the answer, simply state: "
                        "'I cannot find that specific information in the documents provided.'"
                    )
                    
                    # 3. XML-Formatted Context Construction
                    # This helps the model "see" the start and end of each chunk clearly.
                    for i, d in enumerate(docs):
                        src = d.metadata.get('source', 'Unknown')
                        context_txt += f"<document index='{i+1}' source='{src}'>\n{d.page_content}\n</document>\n"
                    
        # 4. Construct Final User Payload
        if context_txt:
            final_prompt = (
                f"User Question: {prompt}\n\n"
                f"<context>\n{context_txt}\n</context>\n\n"
                "Instruction: Answer the question using the context above."
            )
        else:
            final_prompt = prompt
        
        # Generation
        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                # Memory Window
                hist = [{"role":"system", "content":sys_p}] + st.session_state.messages[-6:-1] + [{"role":"user", "content":final_prompt}]
                
                resp, usage = query_model_universal(hist, 2000, model_choice, st.session_state.get("user_openai_key"))
                st.markdown(resp)
                
                if usage:
                    m_name = "GPT-4o" if "GPT-4o" in model_choice else model_choice.split()[0]
                    tracker.log_usage(m_name, usage["input"], usage["output"])
                    update_sidebar_metrics()
                    
        st.session_state.messages.append({"role": "assistant", "content": resp})
        
        if use_rag and context_txt:
            with st.expander("πŸ“š View Context Used"):
                st.text(context_txt)

# === TAB 2: KNOWLEDGE & TOOLS ===
with tab2:
    st.header("Document Processor")
    
    c1, c2 = st.columns([1, 1])
    with c1:
        uploaded_file = st.file_uploader("Upload File (PDF, PPT, Doc, Text)", type=["pdf", "docx", "pptx", "txt", "md"])
    with c2:
        use_vision = st.toggle("πŸ‘οΈ Enable Vision Mode", help="Use GPT-4o to read diagrams/tables. Requires API Key.")
        if use_vision and "GPT-4o" not in opts:
            st.warning("Vision requires OpenAI Access.")

    if uploaded_file:
        # Save temp
        temp_path = rag_engine.save_uploaded_file(uploaded_file)
        
        # ACTION BAR
        col_a, col_b, col_c = st.columns(3)
        
        # 1. ADD TO DB (With Strategy Selection)
        with col_a:
            chunk_strategy = st.selectbox(
                "Chunking Strategy", 
                ["paragraph", "token"], # Removed 'page' as it is not implemented in new engine yet
                help="Paragraph: Standard. Token: Dense text.",
                key="chunk_selector"
            )
            
            if st.button("πŸ“₯ Add to Knowledge Base", type="primary"):
                with st.spinner("Ingesting..."):
                    # Note: New engine uses internal Tesseract OCR, not GPT-4o Vision
                    # so we don't pass vision flags or keys here anymore.
                    
                    ok, msg = rag_engine.ingest_file(
                        file_path=temp_path, 
                        username=st.session_state.username, 
                        strategy=chunk_strategy
                    )
                    
                    if ok:
                        tracker.upload_user_db(st.session_state.username) # Auto-Sync
                        st.success(msg)
                    else:
                        st.error(msg)

        # 2. SUMMARIZE
        with col_b:
            # Spacer to align buttons visually since col_a has a selectbox
            st.write("") 
            st.write("") 
            if st.button("πŸ“ Summarize Document"):
                with st.spinner("Reading & Summarizing..."):
                    key = st.session_state.get("user_openai_key") or OPENAI_KEY
                    # Extract raw text first
                    class FileObj:
                        def __init__(self, p, n): self.path=p; self.name=n
                        def read(self): 
                            with open(self.path, "rb") as f: return f.read()
                    
                    # Extraction
                    raw = doc_loader.extract_text_from_file(
                        FileObj(temp_path, uploaded_file.name), 
                        use_vision=use_vision, api_key=key
                    )
                    
                    # Call LLM
                    prompt = f"Summarize this document into a key executive brief:\n\n{raw[:20000]}" # Truncate for safety
                    msgs = [{"role":"user", "content": prompt}]
                    summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
                    
                    st.subheader("Summary Result")
                    st.markdown(summ)
                    if usage:
                        m_name = "GPT-4o" if "GPT-4o" in model_choice else model_choice.split()[0]
                        tracker.log_usage(m_name, usage["input"], usage["output"])
                        update_sidebar_metrics()

        # 3. FLATTEN
        with col_c:
            # Spacer to align buttons
            st.write("") 
            st.write("") 
            
            # We use a session state variable to store the result so it persists for the "Index" step
            if "flattened_result" not in st.session_state:
                st.session_state.flattened_result = None
            
            if st.button("πŸ“„ Flatten Context"):
                with st.spinner("Flattening..."):
                    key = st.session_state.get("user_openai_key") or OPENAI_KEY
                    
                    # A. Extract
                    with open(temp_path, "rb") as f:
                        class Wrapper:
                            def __init__(self, data, n): self.data=data; self.name=n
                            def read(self): return self.data
                        raw = doc_loader.extract_text_from_file(
                            Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key
                        )

                    # B. Parse
                    proc = OutlineProcessor(raw)
                    items = proc.parse()
                    
                    # C. Flatten
                    out_txt = []
                    bar = st.progress(0)
                    for i, item in enumerate(items):
                        p = f"Context: {item['context']}\nTarget: {item['target']}\nRewrite as one sentence."
                        m = [{"role":"user", "content": p}]
                        res, _ = query_model_universal(m, 300, model_choice, st.session_state.get("user_openai_key"))
                        out_txt.append(res)
                        bar.progress((i+1)/len(items))
                    
                    # D. Store Result in Session State
                    final_flattened_text = "\n".join(out_txt)
                    st.session_state.flattened_result = {
                        "text": final_flattened_text,
                        "source": f"{uploaded_file.name}_flat"
                    }
                    st.rerun() # Refresh to show the new result/buttons

            # Display Result & Index Option
            if st.session_state.flattened_result:
                res = st.session_state.flattened_result
                st.success("Flattening Complete!")
                st.text_area("Result", res["text"], height=200)
                
                # The New Button
                if st.button("πŸ“₯ Index This Flattened Version"):
                    with st.spinner("Indexing Flattened Text..."):
                        ok, msg = rag_engine.process_and_add_text(
                            res["text"], 
                            res["source"], 
                            st.session_state.username
                        )
                        if ok:
                            tracker.upload_user_db(st.session_state.username) # Sync!
                            st.success(msg)
                        else:
                            st.error(msg)

    st.divider()
    
    # DB MANAGER
    st.subheader("Database Management")
    docs = rag_engine.list_documents(st.session_state.username)
    if docs:
        for d in docs:
            c1, c2 = st.columns([4,1])
            c1.text(f"πŸ“„ {d['filename']} ({d['chunks']} chunks)")
            if c2.button("πŸ—‘οΈ", key=d['source']):
                rag_engine.delete_document(st.session_state.username, d['source'])
                tracker.upload_user_db(st.session_state.username)
                st.rerun()
    else:
        st.info("Database Empty.")