File size: 11,708 Bytes
ef513a5
0b474cc
79adaa2
39f39ce
79adaa2
 
 
 
 
7b68202
ef513a5
 
f09334e
39f313e
0b474cc
 
 
79adaa2
39f313e
13a7929
79adaa2
 
74f60fc
39f313e
74f60fc
 
39f313e
74f60fc
13a7929
74f60fc
 
 
 
 
 
 
 
9ea268c
 
 
74f60fc
13a7929
9ea268c
 
 
 
 
13a7929
 
9ea268c
 
 
 
13a7929
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea268c
 
74f60fc
39f313e
79adaa2
9ea268c
 
39f313e
 
74f60fc
f09334e
39f313e
 
13a7929
74f60fc
f09334e
 
79adaa2
73ca4a0
74f60fc
13a7929
 
74f60fc
ef513a5
79adaa2
 
74f60fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea268c
56e4e5f
 
9ea268c
56e4e5f
9ea268c
 
 
 
 
 
56e4e5f
 
 
 
 
 
 
 
 
 
9ea268c
 
 
56e4e5f
 
 
9ea268c
74f60fc
 
bfaaaee
 
 
 
74f60fc
bfaaaee
 
 
 
79adaa2
 
73ca4a0
79adaa2
73ca4a0
bfaaaee
 
73ca4a0
79adaa2
c6eeec6
bfaaaee
74f60fc
bfaaaee
 
79adaa2
c6eeec6
79adaa2
 
73ca4a0
79adaa2
 
e546bbb
74f60fc
a14f7cc
 
 
 
 
 
 
 
 
 
 
79adaa2
74f60fc
73ca4a0
79adaa2
74f60fc
 
bfaaaee
74f60fc
bfaaaee
 
79adaa2
f09334e
39f39ce
79adaa2
74f60fc
 
bfaaaee
 
 
 
 
 
 
74f60fc
bfaaaee
 
 
74f60fc
bfaaaee
 
 
74f60fc
bfaaaee
 
 
74f60fc
 
bfaaaee
 
 
 
39f39ce
79adaa2
 
74f60fc
39f39ce
74f60fc
39f313e
 
79adaa2
 
 
 
 
d71c08c
79adaa2
73ca4a0
d71c08c
b62f4f4
 
 
 
 
1a8ac84
b62f4f4
 
 
 
 
 
 
 
79adaa2
74f60fc
b62f4f4
5f4804b
79adaa2
5f4804b
79adaa2
74f60fc
 
a859b2e
 
 
a14f7cc
a859b2e
74f60fc
bd85152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import streamlit as st
import os
from huggingface_hub import HfApi, hf_hub_download
import time

# --- IMPORT OUR NEW MODULES ---
from src.database import DatabaseManager
from src.search import SearchEngine
from src.parsers import process_file, chunk_text
from src.llm_client import ask_llm

# --- CONFIGURATION ---
DATASET_REPO_ID = "NavyDevilDoc/navy-policy-index" 
HF_TOKEN = os.environ.get("HF_TOKEN")
INDEX_FILE = "navy_index.faiss"
META_FILE = "navy_metadata.pkl"

st.set_page_config(page_title="Navy Policy Architect", layout="wide", page_icon="βš“")

# --- CLOUD SYNC MANAGER (FIXED) ---
class SyncManager:
    """Handles downloading/uploading the Database & Index to Hugging Face"""
    
    @staticmethod
    def get_remote_dbs():
        if not HF_TOKEN: return []
        try:
            api = HfApi(token=HF_TOKEN)
            # This worked because we specified repo_type="dataset" here
            files = api.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
            dbs = [f for f in files if f.endswith(".db")]
            return dbs
        except Exception as e:
            return []

    @staticmethod
    def pull_data(db_filename):
        if not HF_TOKEN: 
            st.error("HF_TOKEN missing.")
            return False
        try:
            # FIX: Added repo_type="dataset"
            hf_hub_download(
                repo_id=DATASET_REPO_ID, 
                filename=db_filename, 
                local_dir=".", 
                token=HF_TOKEN,
                repo_type="dataset", # <--- THE MISSING LINK
                force_download=False 
            )
            
            # Download Index (Best effort)
            try:
                hf_hub_download(
                    repo_id=DATASET_REPO_ID, 
                    filename=INDEX_FILE, 
                    local_dir=".", 
                    token=HF_TOKEN,
                    repo_type="dataset" # <--- Added here too
                )
                hf_hub_download(
                    repo_id=DATASET_REPO_ID, 
                    filename=META_FILE, 
                    local_dir=".", 
                    token=HF_TOKEN,
                    repo_type="dataset" # <--- And here
                )
            except:
                pass 
            
            return True
        except Exception as e:
            # We return the actual error message so the UI can show it permanently
            return str(e)

    @staticmethod
    def push_data(db_filename):
        if not HF_TOKEN: return
        api = HfApi(token=HF_TOKEN)
        try:
            # This was already working because we had repo_type="dataset"
            api.upload_file(path_or_fileobj=db_filename, path_in_repo=db_filename, repo_id=DATASET_REPO_ID, repo_type="dataset")
            api.upload_file(path_or_fileobj=INDEX_FILE, path_in_repo=INDEX_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
            api.upload_file(path_or_fileobj=META_FILE, path_in_repo=META_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
            st.toast("Cloud Sync Complete!", icon="☁️")
        except Exception as e:
            st.error(f"Sync Error (Push): {e}")

            
# --- SIDEBAR: KNOWLEDGE BASE SELECTOR ---
with st.sidebar:
    st.header("πŸ—„οΈ Knowledge Base")
    
    # 1. Database Selector
    # We fetch available DBs from the cloud to populate the dropdown
    if "available_dbs" not in st.session_state:
        st.session_state.available_dbs = SyncManager.get_remote_dbs()
        if not st.session_state.available_dbs:
            st.session_state.available_dbs = ["navy_docs.db"] # Default if empty

    selected_db = st.selectbox("Select Database:", st.session_state.available_dbs)
    
    # 2. Create New Database Option
    with st.expander("βž• Create New Database"):
        new_db_name = st.text_input("Name (e.g., 'Medical.db')")
        if st.button("Create"):
            if not new_db_name.endswith(".db"):
                new_db_name += ".db"
            st.session_state.available_dbs.append(new_db_name)
            # Force reload to switch to this new DB
            st.rerun()

    # --- INITIALIZATION (Dynamic based on selection) ---
    # If the DB has changed or isn't loaded, load it now
    if 'current_db_name' not in st.session_state or st.session_state.current_db_name != selected_db:
        
        # We use an empty container to hold messages
        msg_container = st.empty()
        
        with st.spinner(f"Syncing {selected_db}..."):
            
            # 1. Attempt the Pull
            result = SyncManager.pull_data(selected_db)
            
            # 2. Check the Result
            if result is True:
                # Success! Cloud file found.
                msg_container.success(f"Loaded {selected_db} from Cloud.")
            else:
                # Failure! (File deleted or new setup)
                # INSTEAD OF STOPPING, we warn and create a fresh local DB.
                msg_container.warning(f"Could not find {selected_db} in cloud. Creating new local database.")
                # We do NOT run st.stop() here anymore.

            # 3. Initialize the Database Manager (Either with the downloaded file or a new blank one)
            try:
                st.session_state.db = DatabaseManager(selected_db)
                st.session_state.search_engine = SearchEngine()
                st.session_state.current_db_name = selected_db
                # We intentionally do NOT rerun immediately here to let the warning show
            except Exception as e:
                st.error(f"Failed to initialize database: {e}")
                st.stop()

    # 3. Upload Section
    if "uploader_key" not in st.session_state:
        st.session_state.uploader_key = 0

    uploaded_files = st.file_uploader(
        f"Upload to {selected_db}", 
        accept_multiple_files=True, 
        type=['pdf', 'docx', 'txt', 'csv', 'xlsx'],
        key=f"uploader_{st.session_state.uploader_key}" 
    )
    
    if uploaded_files and st.button("Ingest Documents"):
        progress_bar = st.progress(0)
        status = st.empty()
        
        existing_files = st.session_state.db.get_all_filenames()
        
        for i, f in enumerate(uploaded_files):
            status.text(f"Processing: {f.name}...")
            
            if f.name in existing_files:
                st.toast(f"♻️ Updating: {f.name}")
                st.session_state.db.delete_document(f.name)

            text, filename, method = process_file(f)
            
            if "Error" in method:
                st.error(f"Failed {filename}: {method}")
                continue
                
            chunks, doc_id = chunk_text(text, filename)
            
            # Generate Abstract
            abstract = "No summary generated."
            if len(text) > 500:
                with st.spinner(f"Writing abstract for {filename}..."):
                    abstract = ask_llm(
                        query="Generate Abstract", 
                        context=text[:30000], 
                        mode="Abstract Generator", 
                        model_provider="Gemini"
                    )
            
            st.session_state.db.add_document(doc_id, filename, text, abstract=abstract)
            st.session_state.search_engine.add_features(chunks)
            progress_bar.progress((i + 1) / len(uploaded_files))
            
        status.text("Syncing to Cloud...")
        # Push SPECIFICALLY the active database
        SyncManager.push_data(selected_db)
        
        st.success(f"Ingested {len(uploaded_files)} docs into {selected_db}!")
        time.sleep(1)
        st.session_state.uploader_key += 1 
        st.rerun()

    st.divider()
    
    # 4. Document Library
    st.subheader(f"Files in {selected_db}")
    all_files = st.session_state.db.get_all_filenames()
    
    if all_files:
        with st.expander("View File List", expanded=False):
            for f in all_files:
                st.text(f"β€’ {f}")

        file_to_del = st.selectbox("Delete File:", [""] + all_files)
        if file_to_del and st.button("πŸ—‘οΈ Delete Selected"):
            deleted_id = st.session_state.db.delete_document(file_to_del)
            st.toast(f"Removed {file_to_del}")
            SyncManager.push_data(selected_db)
            time.sleep(1)
            st.rerun()
            
        if st.button("⚠️ Nuke Database", type="primary"):
            for f in all_files:
                st.session_state.db.delete_document(f)
            st.session_state.search_engine.reset_index()
            SyncManager.push_data(selected_db)
            st.success("Database wiped.")
            time.sleep(1)
            st.rerun()
    else:
        st.info("Library is empty.")

# --- MAIN UI: SEARCH ---
st.title("βš“ Navy Policy Architect")
st.caption(f"Connected to Knowledge Base: {st.session_state.current_db_name}")

query = st.text_input("Enter your query...", placeholder="Search...")

if query:
    results = st.session_state.search_engine.search(query, top_k=5)
    
    if not results:
        st.info("No matching documents found.")
    else:
        top_match = results[0]
        full_doc_text = st.session_state.db.get_doc_text(top_match['doc_id'])
        
        with st.container():
            st.markdown("### πŸ€– Intelligence Hub")
            col1, col2 = st.columns(2)
            with col1:
                analysis_mode = st.selectbox(
                    "Select Analysis Type:", 
                    ["Executive Summary", "Action Plan", "Risk Assessment", "Socratic Review", "Instructor Mode"]
                )
            with col2:
                model_choice = st.selectbox(
                    "Select Model:",
                    ["Gemini (Cloud - Smartest)", "Granite (Private Space)"]
                )
                provider = "Gemini" if "Gemini" in model_choice else "Granite"

            if st.button("✨ Generate Assessment"):
                with st.spinner(f"Consulting {provider}..."):
                    response = ask_llm(query, full_doc_text, mode=analysis_mode, model_provider=provider)
                    st.markdown("---")
                    st.markdown(response)
                    st.markdown("---")

        # --- SEARCH RESULTS SECTION (FIXED HTML) ---
        with st.expander("πŸ“š Reference Documents", expanded=True):
            for res in results:
                score = res['score']
                color = "#09ab3b" if score > 2 else "#ffbd45" if score > 0 else "#ff4b4b"
                doc_abstract = st.session_state.db.get_doc_abstract(res['doc_id'])
                
                # IMPORTANT: Left-aligned HTML string to prevent Code Block rendering
                html_content = f"""
<div style="
    border-left: 5px solid {color}; 
    padding: 15px; 
    background-color: #f0f2f6; 
    margin-bottom: 15px; 
    border-radius: 5px;
    color: #1f1f1f;
">
    <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
        <h4 style="margin:0; color: #0e1117;">πŸ“„ {res['source']}</h4>
        <span style="font-size: 0.8em; color: #555; background: #ddd; padding: 2px 8px; border-radius: 4px;">Relevance: {score:.2f}</span>
    </div>
    <div style="background: #e3e6ea; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
        <p style="margin: 0; font-size: 0.9em; color: #333;"><strong>πŸ€– Abstract:</strong> {doc_abstract}</p>
    </div>
    <p style="margin: 0; font-style: italic; font-size: 0.85em; color: #555;">
        "Matching Chunk: ...{res['snippet']}..."
    </p>
</div>
"""
                st.markdown(html_content, unsafe_allow_html=True)