Spaces:

prernajeet01
/

Reasoning_AI_Agent

Running

App Files Files Community

prernajeet01 commited on Feb 25, 2025

Commit

dff05f0

verified ·

1 Parent(s): 783e7ab

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -25

app.py CHANGED Viewed

@@ -138,40 +138,59 @@ class AuditAgent:
             supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
             if file_ext not in supported_exts:
-                os.remove(temp_path)
-                os.rmdir(temp_dir)
-                return f"Unsupported file type. Please upload one of: {', '.join(supported_exts)}"
             # Select appropriate loader
-            if file_ext == '.pdf':
-                loader = PyPDFLoader(temp_path)
-            elif file_ext == '.docx':
-                loader = Docx2txtLoader(temp_path)
-            elif file_ext == '.pptx':
-                loader = UnstructuredPowerPointLoader(temp_path)
-            elif file_ext in ['.xlsx', '.xls']:
-                loader = UnstructuredExcelLoader(temp_path)
-            # Load and process document
-            documents.extend(loader.load())
-            # Cleanup
-            os.remove(temp_path)
-            os.rmdir(temp_dir)
             # Split documents
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=1000,
                 chunk_overlap=200
             )
             splits = text_splitter.split_documents(documents)
             # Create vector store
             api_keys = get_api_keys()
             embeddings = OpenAIEmbeddings(openai_api_key=api_keys["openai_key"])
             self.document_store = FAISS.from_documents(splits, embeddings)
-            return "Document processed successfully"
         except Exception as e:
             return f"Error processing document: {str(e)}"
@@ -180,6 +199,9 @@ class AuditAgent:
         if not self.document_store:
             return "Please upload and process documents first"
         try:
             qa_chain = RetrievalQA.from_chain_type(
                 llm=self.llm,
@@ -194,9 +216,9 @@ class AuditAgent:
             source_docs = response.get('source_documents', [])
             if source_docs:
-                result += "\n\nSources:\n"
                 for i, doc in enumerate(source_docs, 1):
-                    result += f"{i}. {doc.metadata.get('source', 'Unknown source')}\n"
             return result
         except Exception as e:
@@ -256,10 +278,13 @@ def create_interface():
         with gr.Row():
             with gr.Column(scale=1):
                 file_upload = gr.File(
                     label="Upload Audit Documents",
-                    file_types=["pdf", "docx", "pptx", "xlsx", "xls"]
                 )
         # Use tabs for model selection instead of dropdown
         with gr.Tabs() as model_tabs:
@@ -308,7 +333,7 @@ def create_interface():
         model_tabs.select(update_selected_model, outputs=[selected_model])
-        # COMPLETELY REVISED: Initialize an agent and return both agent and status message
         def get_or_initialize_agent(model_name):
             """Initialize an agent if not already initialized and return a status message"""
             init_message = f"Initializing {model_name}..."
@@ -370,11 +395,18 @@ def create_interface():
                 error_msg = f"Error solving problem: {str(e)}"
                 return error_msg, error_msg
-        # Handle file upload
         def handle_file_upload(file, model_name):
             if file is None:
                 return "No file uploaded. Please upload a file."
             status = f"Processing document with {model_name}..."
             # Get or initialize agent
@@ -410,7 +442,7 @@ def create_interface():
                 error_msg = f"Error querying documents: {str(e)}"
                 return error_msg, error_msg
-        # Set up event handlers - UPDATED to include status_message updates
         chat_button.click(
             handle_chat,
             inputs=[chat_input, selected_model],

             supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
             if file_ext not in supported_exts:
+                # Clean up temp files before returning
+                if os.path.exists(temp_path):
+                    os.remove(temp_path)
+                if os.path.exists(temp_dir):
+                    os.rmdir(temp_dir)
+                return f"Unsupported file type: {file_ext}. Please upload one of: {', '.join(supported_exts)}"
             # Select appropriate loader
+            try:
+                if file_ext == '.pdf':
+                    loader = PyPDFLoader(temp_path)
+                elif file_ext == '.docx':
+                    loader = Docx2txtLoader(temp_path)
+                elif file_ext == '.pptx':
+                    loader = UnstructuredPowerPointLoader(temp_path)
+                elif file_ext in ['.xlsx', '.xls']:
+                    loader = UnstructuredExcelLoader(temp_path)
+                # Load and process document
+                documents.extend(loader.load())
+            except Exception as e:
+                # Clean up temp files
+                if os.path.exists(temp_path):
+                    os.remove(temp_path)
+                if os.path.exists(temp_dir):
+                    os.rmdir(temp_dir)
+                return f"Error loading document content: {str(e)}"
+            # Cleanup temp files
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
+            if os.path.exists(temp_dir):
+                os.rmdir(temp_dir)
             # Split documents
+            if not documents:
+                return "No content could be extracted from the document."
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=1000,
                 chunk_overlap=200
             )
             splits = text_splitter.split_documents(documents)
+            if not splits:
+                return "Document was processed but no text content was found."
             # Create vector store
             api_keys = get_api_keys()
             embeddings = OpenAIEmbeddings(openai_api_key=api_keys["openai_key"])
             self.document_store = FAISS.from_documents(splits, embeddings)
+            return f"Document '{file.name}' processed successfully with {len(splits)} text chunks."
         except Exception as e:
             return f"Error processing document: {str(e)}"
         if not self.document_store:
             return "Please upload and process documents first"
+        if not query.strip():
+            return "Please provide a non-empty query."
         try:
             qa_chain = RetrievalQA.from_chain_type(
                 llm=self.llm,
             source_docs = response.get('source_documents', [])
             if source_docs:
+                result += "\n\n**Sources:**\n"
                 for i, doc in enumerate(source_docs, 1):
+                    result += f"{i}. {doc.metadata.get('source', 'Unknown source')}, page {doc.metadata.get('page', 'N/A')}\n"
             return result
         except Exception as e:
         with gr.Row():
             with gr.Column(scale=1):
+                # Updated file component with clearer instructions
                 file_upload = gr.File(
                     label="Upload Audit Documents",
+                    file_types=["pdf", "docx", "pptx", "xlsx", "xls"],
+                    type="binary"
                 )
+                gr.Markdown("Supported formats: PDF, DOCX, PPTX, XLSX, XLS")
         # Use tabs for model selection instead of dropdown
         with gr.Tabs() as model_tabs:
         model_tabs.select(update_selected_model, outputs=[selected_model])
+        # Get or initialize agent and return both agent and status message
         def get_or_initialize_agent(model_name):
             """Initialize an agent if not already initialized and return a status message"""
             init_message = f"Initializing {model_name}..."
                 error_msg = f"Error solving problem: {str(e)}"
                 return error_msg, error_msg
+        # Handle file upload with improved validation
         def handle_file_upload(file, model_name):
             if file is None:
                 return "No file uploaded. Please upload a file."
+            # Check file extension
+            file_ext = os.path.splitext(file.name.lower())[1] if file.name else ""
+            supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
+            if file_ext not in supported_exts:
+                return f"Invalid file type: {file_ext}. Please upload a file with one of these extensions: {', '.join(supported_exts)}"
             status = f"Processing document with {model_name}..."
             # Get or initialize agent
                 error_msg = f"Error querying documents: {str(e)}"
                 return error_msg, error_msg
+        # Set up event handlers
         chat_button.click(
             handle_chat,
             inputs=[chat_input, selected_model],