NavyDevilDoc commited on
Commit
74f60fc
Β·
verified Β·
1 Parent(s): bd85152

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -105
app.py CHANGED
@@ -12,64 +12,105 @@ from src.llm_client import ask_llm
12
  # --- CONFIGURATION ---
13
  DATASET_REPO_ID = "NavyDevilDoc/navy-policy-index"
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
- DB_FILE = "navy_docs.db"
16
  INDEX_FILE = "navy_index.faiss"
17
  META_FILE = "navy_metadata.pkl"
18
 
19
  st.set_page_config(page_title="Navy Policy Architect", layout="wide", page_icon="βš“")
20
 
21
- # --- CLOUD SYNC MANAGER ---
22
  class SyncManager:
23
  """Handles downloading/uploading the Database & Index to Hugging Face"""
 
24
  @staticmethod
25
- def pull_data():
26
- if not HF_TOKEN: return
 
27
  try:
28
- # Download SQLite DB
29
- if not os.path.exists(DB_FILE):
30
- hf_hub_download(repo_id=DATASET_REPO_ID, filename=DB_FILE, local_dir=".", token=HF_TOKEN)
31
- # Download FAISS Index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  if not os.path.exists(INDEX_FILE):
33
- hf_hub_download(repo_id=DATASET_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
34
- hf_hub_download(repo_id=DATASET_REPO_ID, filename=META_FILE, local_dir=".", token=HF_TOKEN)
 
 
 
35
  return True
36
  except Exception as e:
37
- # It's okay if files don't exist yet (first run)
38
- print(f"Sync Note: {e}")
39
  return False
40
 
41
  @staticmethod
42
- def push_data():
43
  if not HF_TOKEN: return
44
  api = HfApi(token=HF_TOKEN)
45
  try:
46
- # Upload SQLite DB
47
- api.upload_file(path_or_fileobj=DB_FILE, path_in_repo=DB_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
48
  # Upload FAISS Index
49
  api.upload_file(path_or_fileobj=INDEX_FILE, path_in_repo=INDEX_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
50
  api.upload_file(path_or_fileobj=META_FILE, path_in_repo=META_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
51
  st.toast("Cloud Sync Complete!", icon="☁️")
52
  except Exception as e:
53
- st.error(f"Sync Error: {e}")
54
 
55
- # --- INITIALIZATION ---
56
- if 'db' not in st.session_state:
57
- with st.spinner("Connecting to Secure Cloud Storage..."):
58
- SyncManager.pull_data()
59
- st.session_state.db = DatabaseManager(DB_FILE)
60
- st.session_state.search_engine = SearchEngine()
61
-
62
- # --- SIDEBAR: UPLOAD & MANAGE ---
63
  with st.sidebar:
64
  st.header("πŸ—„οΈ Knowledge Base")
65
 
66
- # 1. Initialize the Uploader Key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  if "uploader_key" not in st.session_state:
68
  st.session_state.uploader_key = 0
69
 
70
- # 2. Upload Section
71
  uploaded_files = st.file_uploader(
72
- "Upload Policy Documents",
73
  accept_multiple_files=True,
74
  type=['pdf', 'docx', 'txt', 'csv', 'xlsx'],
75
  key=f"uploader_{st.session_state.uploader_key}"
@@ -79,179 +120,122 @@ with st.sidebar:
79
  progress_bar = st.progress(0)
80
  status = st.empty()
81
 
82
- # Get current files to check for duplicates
83
  existing_files = st.session_state.db.get_all_filenames()
84
 
85
  for i, f in enumerate(uploaded_files):
86
  status.text(f"Processing: {f.name}...")
87
 
88
- # --- DUPLICATION CHECK ---
89
  if f.name in existing_files:
90
- st.toast(f"♻️ Updating existing file: {f.name}")
91
  st.session_state.db.delete_document(f.name)
92
 
93
- # A. Parse File
94
  text, filename, method = process_file(f)
95
 
96
  if "Error" in method:
97
  st.error(f"Failed {filename}: {method}")
98
  continue
99
 
100
- # B. Chunk & ID
101
  chunks, doc_id = chunk_text(text, filename)
102
 
103
- # --- NEW STEP: Generate Abstract ---
104
- # We skip this for tiny files to save time
105
  abstract = "No summary generated."
106
  if len(text) > 500:
107
  with st.spinner(f"Writing abstract for {filename}..."):
108
- # We utilize our flexible LLM client
109
- # Note: We send only the first 30k chars to keep it fast
110
  abstract = ask_llm(
111
  query="Generate Abstract",
112
  context=text[:30000],
113
  mode="Abstract Generator",
114
  model_provider="Gemini"
115
  )
116
- # -----------------------------------
117
 
118
- # C. Save to SQLite (Now includes Abstract)
119
  st.session_state.db.add_document(doc_id, filename, text, abstract=abstract)
120
-
121
- # D. Add to Vector Index
122
  st.session_state.search_engine.add_features(chunks)
 
123
 
124
- progress_bar.progress((i + 1) / len(uploaded_files))
125
  status.text("Syncing to Cloud...")
126
- SyncManager.push_data()
 
127
 
128
- st.success(f"Successfully processed {len(uploaded_files)} documents!")
129
  time.sleep(1)
130
  st.session_state.uploader_key += 1
131
  st.rerun()
132
 
133
  st.divider()
134
 
135
- # 3. Document Library (Better Visibility)
136
- st.subheader("Manage Files")
137
-
138
- # We fetch the latest list
139
  all_files = st.session_state.db.get_all_filenames()
140
 
141
  if all_files:
142
- st.caption(f"πŸ“š **Library: {len(all_files)} Documents**")
143
-
144
- # VISUAL LIST: A clean, scrollable list of what you have
145
  with st.expander("View File List", expanded=False):
146
  for f in all_files:
147
  st.text(f"β€’ {f}")
148
 
149
- # DELETION MENU
150
- file_to_del = st.selectbox("Select file to delete:", [""] + all_files)
151
  if file_to_del and st.button("πŸ—‘οΈ Delete Selected"):
152
  deleted_id = st.session_state.db.delete_document(file_to_del)
153
  st.toast(f"Removed {file_to_del}")
154
- SyncManager.push_data()
155
  time.sleep(1)
156
  st.rerun()
157
 
158
- # DANGER ZONE: Nuke Everything
159
- if st.button("⚠️ Reset Database (Delete All)", type="primary"):
160
  for f in all_files:
161
  st.session_state.db.delete_document(f)
162
- # We also wipe the index explicitly to be safe
163
  st.session_state.search_engine.reset_index()
164
- SyncManager.push_data()
165
- st.success("Database wiped clean.")
166
  time.sleep(1)
167
  st.rerun()
168
  else:
169
  st.info("Library is empty.")
170
-
171
- # 2. Management Section
172
- st.subheader("Manage Files")
173
- all_files = st.session_state.db.get_all_filenames()
174
- if all_files:
175
- st.caption(f"Total Documents: {len(all_files)}")
176
- file_to_del = st.selectbox("Delete File:", [""] + all_files)
177
- if file_to_del and st.button("πŸ—‘οΈ Remove Document"):
178
- deleted_id = st.session_state.db.delete_document(file_to_del)
179
- st.toast(f"Removed {file_to_del} from Database.")
180
- SyncManager.push_data()
181
- time.sleep(1)
182
- st.rerun()
183
 
184
  # --- MAIN UI: SEARCH ---
185
  st.title("βš“ Navy Policy Architect")
186
- st.markdown("Search across PDF, Word, and Excel files. Generate AI summaries based on official policy.")
187
 
188
- query = st.text_input("Enter your query (e.g., 'What are the requirements for O-5 promotion?')", placeholder="Search...")
189
 
190
  if query:
191
- # 1. SEARCH (Vector Search -> Returns relevant chunks)
192
  results = st.session_state.search_engine.search(query, top_k=5)
193
 
194
  if not results:
195
  st.info("No matching documents found.")
196
  else:
197
- # 2. SYNTHESIS (The "Parent Retrieval" Magic)
198
  top_match = results[0]
199
-
200
- # We grab the FULL TEXT from SQLite using the doc_id found in the chunk
201
  full_doc_text = st.session_state.db.get_doc_text(top_match['doc_id'])
202
 
203
- # --- AI SUMMARY SECTION ---
204
  with st.container():
205
  st.markdown("### πŸ€– Intelligence Hub")
206
- st.caption(f"Analyzing primary source: {top_match['source']}")
207
-
208
- # LAYOUT: Two columns for controls
209
  col1, col2 = st.columns(2)
210
-
211
  with col1:
212
- # The "Deep Dive" Selector
213
  analysis_mode = st.selectbox(
214
  "Select Analysis Type:",
215
  ["Executive Summary", "Action Plan", "Risk Assessment", "Socratic Review", "Instructor Mode"]
216
  )
217
-
218
  with col2:
219
- # The "Brain" Selector
220
  model_choice = st.selectbox(
221
  "Select Model:",
222
  ["Gemini (Cloud - Smartest)", "Granite (Private Space)"]
223
  )
224
- # Map the UI text to the backend key
225
  provider = "Gemini" if "Gemini" in model_choice else "Granite"
226
 
227
  if st.button("✨ Generate Assessment"):
228
- with st.spinner(f"Consulting {provider} via {analysis_mode}..."):
229
-
230
- # Call the client with the new parameters
231
  response = ask_llm(query, full_doc_text, mode=analysis_mode, model_provider=provider)
232
-
233
  st.markdown("---")
234
  st.markdown(response)
235
  st.markdown("---")
236
-
237
- # Feature: Source Verification
238
- with st.expander("πŸ” View Source Data used for this summary"):
239
- st.text(full_doc_text[:2000] + "...")
240
 
241
- # --- SEARCH RESULTS SECTION (Rich View) ---
242
- with st.expander("πŸ“š Reference Documents (Click to view)", expanded=True):
243
- if not results:
244
- st.info("No matching documents found.")
245
-
246
  for res in results:
247
  score = res['score']
248
  color = "#09ab3b" if score > 2 else "#ffbd45" if score > 0 else "#ff4b4b"
249
-
250
- # RETRIEVE THE ABSTRACT FROM DB
251
  doc_abstract = st.session_state.db.get_doc_abstract(res['doc_id'])
252
 
253
- # FIX: We moved the HTML string flush to the left to prevent
254
- # Markdown from interpreting it as a code block.
255
  html_content = f"""
256
  <div style="
257
  border-left: 5px solid {color};
@@ -265,11 +249,9 @@ if query:
265
  <h4 style="margin:0; color: #0e1117;">πŸ“„ {res['source']}</h4>
266
  <span style="font-size: 0.8em; color: #555; background: #ddd; padding: 2px 8px; border-radius: 4px;">Relevance: {score:.2f}</span>
267
  </div>
268
-
269
  <div style="background: #e3e6ea; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
270
  <p style="margin: 0; font-size: 0.9em; color: #333;"><strong>πŸ€– Abstract:</strong> {doc_abstract}</p>
271
  </div>
272
-
273
  <p style="margin: 0; font-style: italic; font-size: 0.85em; color: #555;">
274
  "Matching Chunk: ...{res['snippet']}..."
275
  </p>
 
12
  # --- CONFIGURATION ---
13
  DATASET_REPO_ID = "NavyDevilDoc/navy-policy-index"
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
15
  INDEX_FILE = "navy_index.faiss"
16
  META_FILE = "navy_metadata.pkl"
17
 
18
  st.set_page_config(page_title="Navy Policy Architect", layout="wide", page_icon="βš“")
19
 
20
+ # --- CLOUD SYNC MANAGER (UPGRADED) ---
21
  class SyncManager:
22
  """Handles downloading/uploading the Database & Index to Hugging Face"""
23
+
24
  @staticmethod
25
+ def get_remote_dbs():
26
+ """Scans the Hugging Face Repo for available .db files"""
27
+ if not HF_TOKEN: return []
28
  try:
29
+ api = HfApi(token=HF_TOKEN)
30
+ files = api.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
31
+ # Filter for .db files (excluding potential system files)
32
+ dbs = [f for f in files if f.endswith(".db")]
33
+ return dbs
34
+ except Exception as e:
35
+ print(f"Error listing DBs: {e}")
36
+ return []
37
+
38
+ @staticmethod
39
+ def pull_data(db_filename):
40
+ if not HF_TOKEN: return False
41
+ try:
42
+ # Download Specific SQLite DB
43
+ if not os.path.exists(db_filename):
44
+ hf_hub_download(repo_id=DATASET_REPO_ID, filename=db_filename, local_dir=".", token=HF_TOKEN)
45
+
46
+ # Download FAISS Index (Note: We use one shared index for simplicity in this demo,
47
+ # but ideally you'd have 'navy_index_eng.faiss', etc.)
48
  if not os.path.exists(INDEX_FILE):
49
+ try:
50
+ hf_hub_download(repo_id=DATASET_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
51
+ hf_hub_download(repo_id=DATASET_REPO_ID, filename=META_FILE, local_dir=".", token=HF_TOKEN)
52
+ except:
53
+ pass # It's okay if index doesn't exist yet
54
  return True
55
  except Exception as e:
56
+ st.error(f"Sync Error (Pull): {e}")
 
57
  return False
58
 
59
  @staticmethod
60
+ def push_data(db_filename):
61
  if not HF_TOKEN: return
62
  api = HfApi(token=HF_TOKEN)
63
  try:
64
+ # Upload Specific SQLite DB
65
+ api.upload_file(path_or_fileobj=db_filename, path_in_repo=db_filename, repo_id=DATASET_REPO_ID, repo_type="dataset")
66
  # Upload FAISS Index
67
  api.upload_file(path_or_fileobj=INDEX_FILE, path_in_repo=INDEX_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
68
  api.upload_file(path_or_fileobj=META_FILE, path_in_repo=META_FILE, repo_id=DATASET_REPO_ID, repo_type="dataset")
69
  st.toast("Cloud Sync Complete!", icon="☁️")
70
  except Exception as e:
71
+ st.error(f"Sync Error (Push): {e}")
72
 
73
+ # --- SIDEBAR: KNOWLEDGE BASE SELECTOR ---
 
 
 
 
 
 
 
74
  with st.sidebar:
75
  st.header("πŸ—„οΈ Knowledge Base")
76
 
77
+ # 1. Database Selector
78
+ # We fetch available DBs from the cloud to populate the dropdown
79
+ if "available_dbs" not in st.session_state:
80
+ st.session_state.available_dbs = SyncManager.get_remote_dbs()
81
+ if not st.session_state.available_dbs:
82
+ st.session_state.available_dbs = ["navy_docs.db"] # Default if empty
83
+
84
+ selected_db = st.selectbox("Select Database:", st.session_state.available_dbs)
85
+
86
+ # 2. Create New Database Option
87
+ with st.expander("βž• Create New Database"):
88
+ new_db_name = st.text_input("Name (e.g., 'Medical.db')")
89
+ if st.button("Create"):
90
+ if not new_db_name.endswith(".db"):
91
+ new_db_name += ".db"
92
+ st.session_state.available_dbs.append(new_db_name)
93
+ # Force reload to switch to this new DB
94
+ st.rerun()
95
+
96
+ # --- INITIALIZATION (Dynamic based on selection) ---
97
+ # If the DB has changed or isn't loaded, load it now
98
+ if 'current_db_name' not in st.session_state or st.session_state.current_db_name != selected_db:
99
+ with st.spinner(f"Loading {selected_db} from Cloud..."):
100
+ SyncManager.pull_data(selected_db)
101
+ st.session_state.db = DatabaseManager(selected_db)
102
+ st.session_state.search_engine = SearchEngine() # This resets the search engine for the new DB
103
+ st.session_state.current_db_name = selected_db
104
+ st.rerun() # Refresh to ensure everything is synced
105
+
106
+ st.divider()
107
+
108
+ # 3. Upload Section
109
  if "uploader_key" not in st.session_state:
110
  st.session_state.uploader_key = 0
111
 
 
112
  uploaded_files = st.file_uploader(
113
+ f"Upload to {selected_db}",
114
  accept_multiple_files=True,
115
  type=['pdf', 'docx', 'txt', 'csv', 'xlsx'],
116
  key=f"uploader_{st.session_state.uploader_key}"
 
120
  progress_bar = st.progress(0)
121
  status = st.empty()
122
 
 
123
  existing_files = st.session_state.db.get_all_filenames()
124
 
125
  for i, f in enumerate(uploaded_files):
126
  status.text(f"Processing: {f.name}...")
127
 
 
128
  if f.name in existing_files:
129
+ st.toast(f"♻️ Updating: {f.name}")
130
  st.session_state.db.delete_document(f.name)
131
 
 
132
  text, filename, method = process_file(f)
133
 
134
  if "Error" in method:
135
  st.error(f"Failed {filename}: {method}")
136
  continue
137
 
 
138
  chunks, doc_id = chunk_text(text, filename)
139
 
140
+ # Generate Abstract
 
141
  abstract = "No summary generated."
142
  if len(text) > 500:
143
  with st.spinner(f"Writing abstract for {filename}..."):
 
 
144
  abstract = ask_llm(
145
  query="Generate Abstract",
146
  context=text[:30000],
147
  mode="Abstract Generator",
148
  model_provider="Gemini"
149
  )
 
150
 
 
151
  st.session_state.db.add_document(doc_id, filename, text, abstract=abstract)
 
 
152
  st.session_state.search_engine.add_features(chunks)
153
+ progress_bar.progress((i + 1) / len(uploaded_files))
154
 
 
155
  status.text("Syncing to Cloud...")
156
+ # Push SPECIFICALLY the active database
157
+ SyncManager.push_data(selected_db)
158
 
159
+ st.success(f"Ingested {len(uploaded_files)} docs into {selected_db}!")
160
  time.sleep(1)
161
  st.session_state.uploader_key += 1
162
  st.rerun()
163
 
164
  st.divider()
165
 
166
+ # 4. Document Library
167
+ st.subheader(f"Files in {selected_db}")
 
 
168
  all_files = st.session_state.db.get_all_filenames()
169
 
170
  if all_files:
 
 
 
171
  with st.expander("View File List", expanded=False):
172
  for f in all_files:
173
  st.text(f"β€’ {f}")
174
 
175
+ file_to_del = st.selectbox("Delete File:", [""] + all_files)
 
176
  if file_to_del and st.button("πŸ—‘οΈ Delete Selected"):
177
  deleted_id = st.session_state.db.delete_document(file_to_del)
178
  st.toast(f"Removed {file_to_del}")
179
+ SyncManager.push_data(selected_db)
180
  time.sleep(1)
181
  st.rerun()
182
 
183
+ if st.button("⚠️ Nuke Database", type="primary"):
 
184
  for f in all_files:
185
  st.session_state.db.delete_document(f)
 
186
  st.session_state.search_engine.reset_index()
187
+ SyncManager.push_data(selected_db)
188
+ st.success("Database wiped.")
189
  time.sleep(1)
190
  st.rerun()
191
  else:
192
  st.info("Library is empty.")
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  # --- MAIN UI: SEARCH ---
195
  st.title("βš“ Navy Policy Architect")
196
+ st.caption(f"Connected to Knowledge Base: {st.session_state.current_db_name}")
197
 
198
+ query = st.text_input("Enter your query...", placeholder="Search...")
199
 
200
  if query:
 
201
  results = st.session_state.search_engine.search(query, top_k=5)
202
 
203
  if not results:
204
  st.info("No matching documents found.")
205
  else:
 
206
  top_match = results[0]
 
 
207
  full_doc_text = st.session_state.db.get_doc_text(top_match['doc_id'])
208
 
 
209
  with st.container():
210
  st.markdown("### πŸ€– Intelligence Hub")
 
 
 
211
  col1, col2 = st.columns(2)
 
212
  with col1:
 
213
  analysis_mode = st.selectbox(
214
  "Select Analysis Type:",
215
  ["Executive Summary", "Action Plan", "Risk Assessment", "Socratic Review", "Instructor Mode"]
216
  )
 
217
  with col2:
 
218
  model_choice = st.selectbox(
219
  "Select Model:",
220
  ["Gemini (Cloud - Smartest)", "Granite (Private Space)"]
221
  )
 
222
  provider = "Gemini" if "Gemini" in model_choice else "Granite"
223
 
224
  if st.button("✨ Generate Assessment"):
225
+ with st.spinner(f"Consulting {provider}..."):
 
 
226
  response = ask_llm(query, full_doc_text, mode=analysis_mode, model_provider=provider)
 
227
  st.markdown("---")
228
  st.markdown(response)
229
  st.markdown("---")
 
 
 
 
230
 
231
+ # --- SEARCH RESULTS SECTION (FIXED HTML) ---
232
+ with st.expander("πŸ“š Reference Documents", expanded=True):
 
 
 
233
  for res in results:
234
  score = res['score']
235
  color = "#09ab3b" if score > 2 else "#ffbd45" if score > 0 else "#ff4b4b"
 
 
236
  doc_abstract = st.session_state.db.get_doc_abstract(res['doc_id'])
237
 
238
+ # IMPORTANT: Left-aligned HTML string to prevent Code Block rendering
 
239
  html_content = f"""
240
  <div style="
241
  border-left: 5px solid {color};
 
249
  <h4 style="margin:0; color: #0e1117;">πŸ“„ {res['source']}</h4>
250
  <span style="font-size: 0.8em; color: #555; background: #ddd; padding: 2px 8px; border-radius: 4px;">Relevance: {score:.2f}</span>
251
  </div>
 
252
  <div style="background: #e3e6ea; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
253
  <p style="margin: 0; font-size: 0.9em; color: #333;"><strong>πŸ€– Abstract:</strong> {doc_abstract}</p>
254
  </div>
 
255
  <p style="margin: 0; font-style: italic; font-size: 0.85em; color: #555;">
256
  "Matching Chunk: ...{res['snippet']}..."
257
  </p>