NavyDevilDoc commited on
Commit
bfaaaee
·
verified ·
1 Parent(s): a859b2e

Update app.py

Browse files

fixed issues with database file name duplication

Files changed (1) hide show
  1. app.py +66 -14
app.py CHANGED
@@ -63,44 +63,99 @@ if 'db' not in st.session_state:
63
  with st.sidebar:
64
  st.header("🗄️ Knowledge Base")
65
 
66
- # 1. Upload Section
67
- uploaded_files = st.file_uploader("Upload Policy Documents", accept_multiple_files=True, type=['pdf', 'docx', 'txt', 'csv', 'xlsx'])
 
 
 
 
 
 
 
 
 
68
 
69
  if uploaded_files and st.button("Ingest Documents"):
70
  progress_bar = st.progress(0)
71
  status = st.empty()
72
 
 
 
 
73
  for i, f in enumerate(uploaded_files):
74
  status.text(f"Processing: {f.name}...")
75
 
76
- # A. Parse File (handled by src/parsers.py)
 
 
 
 
 
 
 
77
  text, filename, method = process_file(f)
78
 
79
  if "Error" in method:
80
  st.error(f"Failed {filename}: {method}")
81
  continue
82
 
83
- # B. Chunk & ID (handled by src/parsers.py)
84
  chunks, doc_id = chunk_text(text, filename)
85
 
86
- # C. Save to SQLite (handled by src/database.py)
87
- # We explicitly store the full text for reliable RAG later
88
  st.session_state.db.add_document(doc_id, filename, text)
89
 
90
- # D. Add to Vector Index (handled by src/search.py)
91
- # We only vector search the chunks, but they link back to doc_id
92
  st.session_state.search_engine.add_features(chunks)
93
 
94
  progress_bar.progress((i + 1) / len(uploaded_files))
95
 
96
  status.text("Syncing to Cloud...")
97
  SyncManager.push_data()
98
- st.success(f"Successfully ingested {len(uploaded_files)} documents!")
99
- time.sleep(2)
 
 
100
  st.rerun()
101
 
102
  st.divider()
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # 2. Management Section
105
  st.subheader("Manage Files")
106
  all_files = st.session_state.db.get_all_filenames()
@@ -108,10 +163,7 @@ with st.sidebar:
108
  st.caption(f"Total Documents: {len(all_files)}")
109
  file_to_del = st.selectbox("Delete File:", [""] + all_files)
110
  if file_to_del and st.button("🗑️ Remove Document"):
111
- # Delete from SQL
112
  deleted_id = st.session_state.db.delete_document(file_to_del)
113
- # Note: FAISS deletion is hard, usually we just rebuild index.
114
- # For now, we accept the "Ghost" vectors in FAISS until a full rebuild.
115
  st.toast(f"Removed {file_to_del} from Database.")
116
  SyncManager.push_data()
117
  time.sleep(1)
@@ -142,7 +194,7 @@ if query:
142
  st.caption(f"Analyzing primary source: {top_match['source']}")
143
 
144
  if st.button("✨ Generate Assessment"):
145
- with st.spinner("Consulting Granite Model..."):
146
  # Call our separated LLM client
147
  response = ask_granite(query, full_doc_text)
148
 
 
63
  with st.sidebar:
64
  st.header("🗄️ Knowledge Base")
65
 
66
+ # 1. Initialize the Uploader Key
67
+ if "uploader_key" not in st.session_state:
68
+ st.session_state.uploader_key = 0
69
+
70
+ # 2. Upload Section
71
+ uploaded_files = st.file_uploader(
72
+ "Upload Policy Documents",
73
+ accept_multiple_files=True,
74
+ type=['pdf', 'docx', 'txt', 'csv', 'xlsx'],
75
+ key=f"uploader_{st.session_state.uploader_key}"
76
+ )
77
 
78
  if uploaded_files and st.button("Ingest Documents"):
79
  progress_bar = st.progress(0)
80
  status = st.empty()
81
 
82
+ # Get current files to check for duplicates
83
+ existing_files = st.session_state.db.get_all_filenames()
84
+
85
  for i, f in enumerate(uploaded_files):
86
  status.text(f"Processing: {f.name}...")
87
 
88
+ # --- DUPLICATION CHECK ---
89
+ if f.name in existing_files:
90
+ st.toast(f"♻️ Updating existing file: {f.name}")
91
+ # Remove the old version first (Cleans SQL and FAISS)
92
+ st.session_state.db.delete_document(f.name)
93
+ # -------------------------
94
+
95
+ # A. Parse File
96
  text, filename, method = process_file(f)
97
 
98
  if "Error" in method:
99
  st.error(f"Failed {filename}: {method}")
100
  continue
101
 
102
+ # B. Chunk & ID
103
  chunks, doc_id = chunk_text(text, filename)
104
 
105
+ # C. Save to SQLite
 
106
  st.session_state.db.add_document(doc_id, filename, text)
107
 
108
+ # D. Add to Vector Index
 
109
  st.session_state.search_engine.add_features(chunks)
110
 
111
  progress_bar.progress((i + 1) / len(uploaded_files))
112
 
113
  status.text("Syncing to Cloud...")
114
  SyncManager.push_data()
115
+
116
+ st.success(f"Successfully processed {len(uploaded_files)} documents!")
117
+ time.sleep(1)
118
+ st.session_state.uploader_key += 1
119
  st.rerun()
120
 
121
  st.divider()
122
 
123
+ # 3. Document Library (Better Visibility)
124
+ st.subheader("Manage Files")
125
+
126
+ # We fetch the latest list
127
+ all_files = st.session_state.db.get_all_filenames()
128
+
129
+ if all_files:
130
+ st.caption(f"📚 **Library: {len(all_files)} Documents**")
131
+
132
+ # VISUAL LIST: A clean, scrollable list of what you have
133
+ with st.expander("View File List", expanded=False):
134
+ for f in all_files:
135
+ st.text(f"• {f}")
136
+
137
+ # DELETION MENU
138
+ file_to_del = st.selectbox("Select file to delete:", [""] + all_files)
139
+ if file_to_del and st.button("🗑️ Delete Selected"):
140
+ deleted_id = st.session_state.db.delete_document(file_to_del)
141
+ st.toast(f"Removed {file_to_del}")
142
+ SyncManager.push_data()
143
+ time.sleep(1)
144
+ st.rerun()
145
+
146
+ # DANGER ZONE: Nuke Everything
147
+ if st.button("⚠️ Reset Database (Delete All)", type="primary"):
148
+ for f in all_files:
149
+ st.session_state.db.delete_document(f)
150
+ # We also wipe the index explicitly to be safe
151
+ st.session_state.search_engine.reset_index()
152
+ SyncManager.push_data()
153
+ st.success("Database wiped clean.")
154
+ time.sleep(1)
155
+ st.rerun()
156
+ else:
157
+ st.info("Library is empty.")
158
+
159
  # 2. Management Section
160
  st.subheader("Manage Files")
161
  all_files = st.session_state.db.get_all_filenames()
 
163
  st.caption(f"Total Documents: {len(all_files)}")
164
  file_to_del = st.selectbox("Delete File:", [""] + all_files)
165
  if file_to_del and st.button("🗑️ Remove Document"):
 
166
  deleted_id = st.session_state.db.delete_document(file_to_del)
 
 
167
  st.toast(f"Removed {file_to_del} from Database.")
168
  SyncManager.push_data()
169
  time.sleep(1)
 
194
  st.caption(f"Analyzing primary source: {top_match['source']}")
195
 
196
  if st.button("✨ Generate Assessment"):
197
+ with st.spinner("Sending Data to Selected Model..."):
198
  # Call our separated LLM client
199
  response = ask_granite(query, full_doc_text)
200