MrSimple07 commited on
Commit
5c3579c
Β·
1 Parent(s): cd201bf

fixed white back problem + upload new files problem + added history chatbot

Browse files
Files changed (2) hide show
  1. app.py +91 -44
  2. document_processor.py +7 -11
app.py CHANGED
@@ -5,20 +5,30 @@ from llama_index.llms.google_genai import GoogleGenAI
5
  from llama_index.core import Settings
6
  from config import *
7
  from document_processor import *
 
8
 
9
  query_engine = None
10
  chunks_df = None
 
 
11
 
12
- def answer_question(question):
13
- global query_engine
14
 
15
  if query_engine is None:
16
- return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ System not initialized or document database is empty</div>", ""
17
 
18
  try:
19
  start_time = time.time()
20
 
21
- response = query_engine.query(question)
 
 
 
 
 
 
 
22
  retrieved_nodes = query_engine.retriever.retrieve(question)
23
 
24
  end_time = time.time()
@@ -34,11 +44,18 @@ def answer_question(question):
34
  </div>
35
  </div>"""
36
 
37
- return answer_with_time, sources_html
 
 
 
 
 
38
 
39
  except Exception as e:
40
  error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ Error processing question: {str(e)}</div>"
41
- return error_msg, ""
 
 
42
 
43
  def generate_sources_html(nodes):
44
  html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
@@ -86,7 +103,7 @@ def get_documents_display():
86
  return html
87
 
88
  def upload_and_process_file(files, doc_names, doc_links):
89
- global query_engine, chunks_df
90
 
91
  if not files:
92
  return "No files selected", get_documents_display()
@@ -94,6 +111,7 @@ def upload_and_process_file(files, doc_names, doc_links):
94
  if len(files) != len(doc_names) or len(files) != len(doc_links):
95
  return "Error: Number of files must match number of document names and links", get_documents_display()
96
 
 
97
  results = []
98
 
99
  for i, file in enumerate(files):
@@ -103,6 +121,11 @@ def upload_and_process_file(files, doc_names, doc_links):
103
  if not doc_name:
104
  doc_name = file.name.split('/')[-1].replace('.txt', '').replace('.pdf', '')
105
 
 
 
 
 
 
106
  log_message(f"πŸ”„ Starting processing of file {i+1}/{len(files)}: {file.name}")
107
 
108
  file_info, error = process_uploaded_file(file.name, file.name.split('/')[-1], doc_name, doc_link)
@@ -118,6 +141,8 @@ def upload_and_process_file(files, doc_names, doc_links):
118
  else:
119
  results.append(f"βœ… {file_info['document']}: Successfully processed and added to database")
120
  log_message(f"βœ… Completed processing: {file_info['document']}")
 
 
121
 
122
  return "\n".join(results), get_documents_display()
123
 
@@ -135,12 +160,19 @@ def create_interface():
135
 
136
  with gr.Row():
137
  with gr.Column(scale=3):
 
 
 
 
 
 
138
  question_input = gr.Textbox(
139
  label="Your question to the knowledge base",
140
  placeholder="Enter your question about the documents...",
141
  lines=3
142
  )
143
  ask_btn = gr.Button("πŸ” Find Answer", variant="primary", size="lg")
 
144
 
145
  gr.Examples(
146
  examples=[
@@ -151,14 +183,12 @@ def create_interface():
151
  inputs=question_input
152
  )
153
 
154
- with gr.Row():
155
- with gr.Column(scale=2):
156
  answer_output = gr.HTML(
157
  label="",
158
  value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>The answer to your question will appear here...</div>",
159
  )
160
-
161
- with gr.Column(scale=1):
162
  sources_output = gr.HTML(
163
  label="",
164
  value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Sources will appear here...</div>",
@@ -166,14 +196,22 @@ def create_interface():
166
 
167
  ask_btn.click(
168
  fn=answer_question,
169
- inputs=[question_input],
170
- outputs=[answer_output, sources_output]
 
 
171
  )
172
 
173
  question_input.submit(
174
  fn=answer_question,
175
- inputs=[question_input],
176
- outputs=[answer_output, sources_output]
 
 
 
 
 
 
177
  )
178
 
179
  with gr.Tab("πŸ“š Document Management"):
@@ -188,35 +226,39 @@ def create_interface():
188
 
189
  refresh_btn = gr.Button("πŸ”„ Refresh List", variant="secondary")
190
 
191
- with gr.Column(scale=1):
192
- gr.Markdown("#### Upload new documents")
193
- gr.Markdown("Supported formats: PDF, TXT")
194
 
195
  file_upload = gr.File(
196
  file_count="multiple",
197
  file_types=[".pdf", ".txt"],
198
- label="Select files to upload"
 
199
  )
200
 
201
  doc_names_input = gr.Textbox(
202
  label="Document names (one per line)",
203
  placeholder="Enter document names, one per line...",
204
- lines=5
 
205
  )
206
 
207
  doc_links_input = gr.Textbox(
208
  label="Document links (one per line)",
209
  placeholder="Enter document links, one per line...",
210
- lines=5
 
211
  )
212
 
213
- upload_btn = gr.Button("πŸ“€ Upload and Process", variant="primary")
214
 
215
  upload_status = gr.Textbox(
216
  label="Upload status",
217
  lines=8,
218
  max_lines=10,
219
- interactive=False
 
220
  )
221
 
222
  def process_names_and_links(names_text, links_text):
@@ -238,25 +280,30 @@ def create_interface():
238
  outputs=[documents_display]
239
  )
240
 
241
- return demo
242
-
243
- if __name__ == "__main__":
244
- log_message("πŸš€ Starting AIEXP - AI Expert for Regulatory Documentation")
245
-
246
- llm = GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
247
- Settings.llm = llm
248
-
249
- query_engine, chunks_df, success = initialize_system()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
- if success:
252
- log_message("🌟 Starting web interface...")
253
- demo = create_interface()
254
- demo.launch(
255
- server_name="0.0.0.0",
256
- server_port=7860,
257
- share=True,
258
- debug=False
259
- )
260
- else:
261
- log_message("❌ Cannot start application due to initialization error")
262
- sys.exit(1)
 
5
  from llama_index.core import Settings
6
  from config import *
7
  from document_processor import *
8
+ from llama_index.core.chat_engine import CondensePlusContextChatEngine
9
 
10
  query_engine = None
11
  chunks_df = None
12
+ chat_engine = None
13
+ chat_history = []
14
 
15
+ def answer_question(question, history):
16
+ global query_engine, chat_engine
17
 
18
  if query_engine is None:
19
+ return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ System not initialized or document database is empty</div>", "", history
20
 
21
  try:
22
  start_time = time.time()
23
 
24
+ # Initialize chat engine if not exists
25
+ if chat_engine is None:
26
+ chat_engine = CondensePlusContextChatEngine.from_defaults(
27
+ retriever=query_engine.retriever,
28
+ response_synthesizer=query_engine.response_synthesizer
29
+ )
30
+
31
+ response = chat_engine.chat(question)
32
  retrieved_nodes = query_engine.retriever.retrieve(question)
33
 
34
  end_time = time.time()
 
44
  </div>
45
  </div>"""
46
 
47
+ # Update chat history (keep last 6 messages - 3 exchanges)
48
+ new_history = history + [[question, response.response]]
49
+ if len(new_history) > 3:
50
+ new_history = new_history[-3:]
51
+
52
+ return answer_with_time, sources_html, new_history
53
 
54
  except Exception as e:
55
  error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ Error processing question: {str(e)}</div>"
56
+ return error_msg, "", history
57
+
58
+
59
 
60
  def generate_sources_html(nodes):
61
  html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
 
103
  return html
104
 
105
  def upload_and_process_file(files, doc_names, doc_links):
106
+ global query_engine, chunks_df, chat_engine
107
 
108
  if not files:
109
  return "No files selected", get_documents_display()
 
111
  if len(files) != len(doc_names) or len(files) != len(doc_links):
112
  return "Error: Number of files must match number of document names and links", get_documents_display()
113
 
114
+ existing_docs = get_existing_documents()
115
  results = []
116
 
117
  for i, file in enumerate(files):
 
121
  if not doc_name:
122
  doc_name = file.name.split('/')[-1].replace('.txt', '').replace('.pdf', '')
123
 
124
+ # Check if document already exists
125
+ if doc_name in existing_docs:
126
+ results.append(f"⚠️ {doc_name}: Document already exists in the system")
127
+ continue
128
+
129
  log_message(f"πŸ”„ Starting processing of file {i+1}/{len(files)}: {file.name}")
130
 
131
  file_info, error = process_uploaded_file(file.name, file.name.split('/')[-1], doc_name, doc_link)
 
141
  else:
142
  results.append(f"βœ… {file_info['document']}: Successfully processed and added to database")
143
  log_message(f"βœ… Completed processing: {file_info['document']}")
144
+ # Reset chat engine to include new documents
145
+ chat_engine = None
146
 
147
  return "\n".join(results), get_documents_display()
148
 
 
160
 
161
  with gr.Row():
162
  with gr.Column(scale=3):
163
+ chatbot = gr.Chatbot(
164
+ label="Chat History",
165
+ height=400,
166
+ show_label=True
167
+ )
168
+
169
  question_input = gr.Textbox(
170
  label="Your question to the knowledge base",
171
  placeholder="Enter your question about the documents...",
172
  lines=3
173
  )
174
  ask_btn = gr.Button("πŸ” Find Answer", variant="primary", size="lg")
175
+ clear_btn = gr.Button("πŸ—‘οΈ Clear History", variant="secondary")
176
 
177
  gr.Examples(
178
  examples=[
 
183
  inputs=question_input
184
  )
185
 
186
+ with gr.Column(scale=1):
 
187
  answer_output = gr.HTML(
188
  label="",
189
  value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>The answer to your question will appear here...</div>",
190
  )
191
+
 
192
  sources_output = gr.HTML(
193
  label="",
194
  value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Sources will appear here...</div>",
 
196
 
197
  ask_btn.click(
198
  fn=answer_question,
199
+ inputs=[question_input, chatbot],
200
+ outputs=[answer_output, sources_output, chatbot]
201
+ ).then(
202
+ lambda: "", inputs=None, outputs=question_input
203
  )
204
 
205
  question_input.submit(
206
  fn=answer_question,
207
+ inputs=[question_input, chatbot],
208
+ outputs=[answer_output, sources_output, chatbot]
209
+ ).then(
210
+ lambda: "", inputs=None, outputs=question_input
211
+ )
212
+
213
+ clear_btn.click(
214
+ lambda: [], inputs=None, outputs=chatbot
215
  )
216
 
217
  with gr.Tab("πŸ“š Document Management"):
 
226
 
227
  refresh_btn = gr.Button("πŸ”„ Refresh List", variant="secondary")
228
 
229
+ with gr.Column(scale=1, elem_id="upload-column"):
230
+ gr.Markdown("#### Upload new documents", elem_classes=["upload-header"])
231
+ gr.Markdown("Supported formats: PDF, TXT", elem_classes=["upload-info"])
232
 
233
  file_upload = gr.File(
234
  file_count="multiple",
235
  file_types=[".pdf", ".txt"],
236
+ label="Select files to upload",
237
+ elem_classes=["upload-file"]
238
  )
239
 
240
  doc_names_input = gr.Textbox(
241
  label="Document names (one per line)",
242
  placeholder="Enter document names, one per line...",
243
+ lines=5,
244
+ elem_classes=["upload-input"]
245
  )
246
 
247
  doc_links_input = gr.Textbox(
248
  label="Document links (one per line)",
249
  placeholder="Enter document links, one per line...",
250
+ lines=5,
251
+ elem_classes=["upload-input"]
252
  )
253
 
254
+ upload_btn = gr.Button("πŸ“€ Upload and Process", variant="primary", elem_classes=["upload-btn"])
255
 
256
  upload_status = gr.Textbox(
257
  label="Upload status",
258
  lines=8,
259
  max_lines=10,
260
+ interactive=False,
261
+ elem_classes=["upload-status"]
262
  )
263
 
264
  def process_names_and_links(names_text, links_text):
 
280
  outputs=[documents_display]
281
  )
282
 
283
+ # Add CSS to fix white background in upload tab
284
+ demo.css = """
285
+ #upload-column {
286
+ background-color: #f8f9fa !important;
287
+ padding: 20px !important;
288
+ border-radius: 10px !important;
289
+ border: 1px solid #e9ecef !important;
290
+ }
291
+ .upload-header h4 {
292
+ color: #2d3748 !important;
293
+ margin-bottom: 10px !important;
294
+ }
295
+ .upload-info {
296
+ color: #666 !important;
297
+ margin-bottom: 15px !important;
298
+ }
299
+ .upload-file, .upload-input, .upload-status {
300
+ background-color: white !important;
301
+ border: 1px solid #ced4da !important;
302
+ border-radius: 5px !important;
303
+ }
304
+ .upload-btn {
305
+ margin-top: 10px !important;
306
+ }
307
+ """
308
 
309
+ return demo
 
 
 
 
 
 
 
 
 
 
 
document_processor.py CHANGED
@@ -72,17 +72,13 @@ def process_uploaded_file(file_path, file_name, doc_name, doc_link):
72
 
73
  def get_existing_documents():
74
  try:
75
- upload_dir = "UPLOADED_DOCUMENTS"
76
- if not os.path.exists(upload_dir):
77
- return []
78
-
79
- documents = []
80
- for file_name in os.listdir(upload_dir):
81
- if file_name.endswith(('.txt', '.pdf')):
82
- doc_name = os.path.splitext(file_name)[0]
83
- documents.append(doc_name)
84
-
85
- return sorted(documents)
86
  except Exception as e:
87
  log_message(f"❌ Error reading documents: {str(e)}")
88
  return []
 
72
 
73
  def get_existing_documents():
74
  try:
75
+ chunks_csv_path = os.path.join(download_dir, chunks_filename)
76
+ if os.path.exists(chunks_csv_path):
77
+ chunks_df = pd.read_csv(chunks_csv_path)
78
+ if not chunks_df.empty:
79
+ unique_docs = chunks_df['document_name'].unique()
80
+ return sorted(unique_docs.tolist())
81
+ return []
 
 
 
 
82
  except Exception as e:
83
  log_message(f"❌ Error reading documents: {str(e)}")
84
  return []