Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Aug 14, 2025

Commit

5c3579c

1 Parent(s): cd201bf

fixed white back problem + upload new files problem + added history chatbot

Browse files

Files changed (2) hide show

app.py +91 -44
document_processor.py +7 -11

app.py CHANGED Viewed

@@ -5,20 +5,30 @@ from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.core import Settings
 from config import *
 from document_processor import *
 query_engine = None
 chunks_df = None
-def answer_question(question):
-    global query_engine
     if query_engine is None:
-        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ System not initialized or document database is empty</div>", ""
     try:
         start_time = time.time()
-        response = query_engine.query(question)
         retrieved_nodes = query_engine.retriever.retrieve(question)
         end_time = time.time()
@@ -34,11 +44,18 @@ def answer_question(question):
         </div>
         </div>"""
-        return answer_with_time, sources_html
     except Exception as e:
         error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ Error processing question: {str(e)}</div>"
-        return error_msg, ""
 def generate_sources_html(nodes):
     html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
@@ -86,7 +103,7 @@ def get_documents_display():
     return html
 def upload_and_process_file(files, doc_names, doc_links):
-    global query_engine, chunks_df
     if not files:
         return "No files selected", get_documents_display()
@@ -94,6 +111,7 @@ def upload_and_process_file(files, doc_names, doc_links):
     if len(files) != len(doc_names) or len(files) != len(doc_links):
         return "Error: Number of files must match number of document names and links", get_documents_display()
     results = []
     for i, file in enumerate(files):
@@ -103,6 +121,11 @@ def upload_and_process_file(files, doc_names, doc_links):
         if not doc_name:
             doc_name = file.name.split('/')[-1].replace('.txt', '').replace('.pdf', '')
         log_message(f"🔄 Starting processing of file {i+1}/{len(files)}: {file.name}")
         file_info, error = process_uploaded_file(file.name, file.name.split('/')[-1], doc_name, doc_link)
@@ -118,6 +141,8 @@ def upload_and_process_file(files, doc_names, doc_links):
         else:
             results.append(f"✅ {file_info['document']}: Successfully processed and added to database")
             log_message(f"✅ Completed processing: {file_info['document']}")
     return "\n".join(results), get_documents_display()
@@ -135,12 +160,19 @@ def create_interface():
             with gr.Row():
                 with gr.Column(scale=3):
                     question_input = gr.Textbox(
                         label="Your question to the knowledge base",
                         placeholder="Enter your question about the documents...",
                         lines=3
                     )
                     ask_btn = gr.Button("🔍 Find Answer", variant="primary", size="lg")
                     gr.Examples(
                         examples=[
@@ -151,14 +183,12 @@ def create_interface():
                         inputs=question_input
                     )
-            with gr.Row():
-                with gr.Column(scale=2):
                     answer_output = gr.HTML(
                         label="",
                         value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>The answer to your question will appear here...</div>",
                     )
-                with gr.Column(scale=1):
                     sources_output = gr.HTML(
                         label="",
                         value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Sources will appear here...</div>",
@@ -166,14 +196,22 @@ def create_interface():
             ask_btn.click(
                 fn=answer_question,
-                inputs=[question_input],
-                outputs=[answer_output, sources_output]
             )
             question_input.submit(
                 fn=answer_question,
-                inputs=[question_input],
-                outputs=[answer_output, sources_output]
             )
         with gr.Tab("📚 Document Management"):
@@ -188,35 +226,39 @@ def create_interface():
                     refresh_btn = gr.Button("🔄 Refresh List", variant="secondary")
-                with gr.Column(scale=1):
-                    gr.Markdown("#### Upload new documents")
-                    gr.Markdown("Supported formats: PDF, TXT")
                     file_upload = gr.File(
                         file_count="multiple",
                         file_types=[".pdf", ".txt"],
-                        label="Select files to upload"
                     )
                     doc_names_input = gr.Textbox(
                         label="Document names (one per line)",
                         placeholder="Enter document names, one per line...",
-                        lines=5
                     )
                     doc_links_input = gr.Textbox(
                         label="Document links (one per line)",
                         placeholder="Enter document links, one per line...",
-                        lines=5
                     )
-                    upload_btn = gr.Button("📤 Upload and Process", variant="primary")
                     upload_status = gr.Textbox(
                         label="Upload status",
                         lines=8,
                         max_lines=10,
-                        interactive=False
                     )
             def process_names_and_links(names_text, links_text):
@@ -238,25 +280,30 @@ def create_interface():
                 outputs=[documents_display]
             )
-    return demo
-if __name__ == "__main__":
-    log_message("🚀 Starting AIEXP - AI Expert for Regulatory Documentation")
-    llm = GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
-    Settings.llm = llm
-    query_engine, chunks_df, success = initialize_system()
-    if success:
-        log_message("🌟 Starting web interface...")
-        demo = create_interface()
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=True,
-            debug=False
-        )
-    else:
-        log_message("❌ Cannot start application due to initialization error")
-        sys.exit(1)

 from llama_index.core import Settings
 from config import *
 from document_processor import *
+from llama_index.core.chat_engine import CondensePlusContextChatEngine
 query_engine = None
 chunks_df = None
+chat_engine = None
+chat_history = []
+def answer_question(question, history):
+    global query_engine, chat_engine
     if query_engine is None:
+        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ System not initialized or document database is empty</div>", "", history
     try:
         start_time = time.time()
+        # Initialize chat engine if not exists
+        if chat_engine is None:
+            chat_engine = CondensePlusContextChatEngine.from_defaults(
+                retriever=query_engine.retriever,
+                response_synthesizer=query_engine.response_synthesizer
+            )
+        response = chat_engine.chat(question)
         retrieved_nodes = query_engine.retriever.retrieve(question)
         end_time = time.time()
         </div>
         </div>"""
+        # Update chat history (keep last 6 messages - 3 exchanges)
+        new_history = history + [[question, response.response]]
+        if len(new_history) > 3:
+            new_history = new_history[-3:]
+        return answer_with_time, sources_html, new_history
     except Exception as e:
         error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>❌ Error processing question: {str(e)}</div>"
+        return error_msg, "", history
 def generate_sources_html(nodes):
     html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
     return html
 def upload_and_process_file(files, doc_names, doc_links):
+    global query_engine, chunks_df, chat_engine
     if not files:
         return "No files selected", get_documents_display()
     if len(files) != len(doc_names) or len(files) != len(doc_links):
         return "Error: Number of files must match number of document names and links", get_documents_display()
+    existing_docs = get_existing_documents()
     results = []
     for i, file in enumerate(files):
         if not doc_name:
             doc_name = file.name.split('/')[-1].replace('.txt', '').replace('.pdf', '')
+        # Check if document already exists
+        if doc_name in existing_docs:
+            results.append(f"⚠️ {doc_name}: Document already exists in the system")
+            continue
         log_message(f"🔄 Starting processing of file {i+1}/{len(files)}: {file.name}")
         file_info, error = process_uploaded_file(file.name, file.name.split('/')[-1], doc_name, doc_link)
         else:
             results.append(f"✅ {file_info['document']}: Successfully processed and added to database")
             log_message(f"✅ Completed processing: {file_info['document']}")
+            # Reset chat engine to include new documents
+            chat_engine = None
     return "\n".join(results), get_documents_display()
             with gr.Row():
                 with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(
+                        label="Chat History",
+                        height=400,
+                        show_label=True
+                    )
                     question_input = gr.Textbox(
                         label="Your question to the knowledge base",
                         placeholder="Enter your question about the documents...",
                         lines=3
                     )
                     ask_btn = gr.Button("🔍 Find Answer", variant="primary", size="lg")
+                    clear_btn = gr.Button("🗑️ Clear History", variant="secondary")
                     gr.Examples(
                         examples=[
                         inputs=question_input
                     )
+                with gr.Column(scale=1):
                     answer_output = gr.HTML(
                         label="",
                         value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>The answer to your question will appear here...</div>",
                     )
                     sources_output = gr.HTML(
                         label="",
                         value="<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; text-align: center;'>Sources will appear here...</div>",
             ask_btn.click(
                 fn=answer_question,
+                inputs=[question_input, chatbot],
+                outputs=[answer_output, sources_output, chatbot]
+            ).then(
+                lambda: "", inputs=None, outputs=question_input
             )
             question_input.submit(
                 fn=answer_question,
+                inputs=[question_input, chatbot],
+                outputs=[answer_output, sources_output, chatbot]
+            ).then(
+                lambda: "", inputs=None, outputs=question_input
+            )
+            clear_btn.click(
+                lambda: [], inputs=None, outputs=chatbot
             )
         with gr.Tab("📚 Document Management"):
                     refresh_btn = gr.Button("🔄 Refresh List", variant="secondary")
+                with gr.Column(scale=1, elem_id="upload-column"):
+                    gr.Markdown("#### Upload new documents", elem_classes=["upload-header"])
+                    gr.Markdown("Supported formats: PDF, TXT", elem_classes=["upload-info"])
                     file_upload = gr.File(
                         file_count="multiple",
                         file_types=[".pdf", ".txt"],
+                        label="Select files to upload",
+                        elem_classes=["upload-file"]
                     )
                     doc_names_input = gr.Textbox(
                         label="Document names (one per line)",
                         placeholder="Enter document names, one per line...",
+                        lines=5,
+                        elem_classes=["upload-input"]
                     )
                     doc_links_input = gr.Textbox(
                         label="Document links (one per line)",
                         placeholder="Enter document links, one per line...",
+                        lines=5,
+                        elem_classes=["upload-input"]
                     )
+                    upload_btn = gr.Button("📤 Upload and Process", variant="primary", elem_classes=["upload-btn"])
                     upload_status = gr.Textbox(
                         label="Upload status",
                         lines=8,
                         max_lines=10,
+                        interactive=False,
+                        elem_classes=["upload-status"]
                     )
             def process_names_and_links(names_text, links_text):
                 outputs=[documents_display]
             )
+    # Add CSS to fix white background in upload tab
+    demo.css = """
+    #upload-column {
+        background-color: #f8f9fa !important;
+        padding: 20px !important;
+        border-radius: 10px !important;
+        border: 1px solid #e9ecef !important;
+    }
+    .upload-header h4 {
+        color: #2d3748 !important;
+        margin-bottom: 10px !important;
+    }
+    .upload-info {
+        color: #666 !important;
+        margin-bottom: 15px !important;
+    }
+    .upload-file, .upload-input, .upload-status {
+        background-color: white !important;
+        border: 1px solid #ced4da !important;
+        border-radius: 5px !important;
+    }
+    .upload-btn {
+        margin-top: 10px !important;
+    }
+    """
+    return demo

document_processor.py CHANGED Viewed

@@ -72,17 +72,13 @@ def process_uploaded_file(file_path, file_name, doc_name, doc_link):
 def get_existing_documents():
     try:
-        upload_dir = "UPLOADED_DOCUMENTS"
-        if not os.path.exists(upload_dir):
-            return []
-        documents = []
-        for file_name in os.listdir(upload_dir):
-            if file_name.endswith(('.txt', '.pdf')):
-                doc_name = os.path.splitext(file_name)[0]
-                documents.append(doc_name)
-        return sorted(documents)
     except Exception as e:
         log_message(f"❌ Error reading documents: {str(e)}")
         return []

 def get_existing_documents():
     try:
+        chunks_csv_path = os.path.join(download_dir, chunks_filename)
+        if os.path.exists(chunks_csv_path):
+            chunks_df = pd.read_csv(chunks_csv_path)
+            if not chunks_df.empty:
+                unique_docs = chunks_df['document_name'].unique()
+                return sorted(unique_docs.tolist())
+        return []
     except Exception as e:
         log_message(f"❌ Error reading documents: {str(e)}")
         return []