prernajeet01 commited on
Commit
f87f80e
·
verified ·
1 Parent(s): dff05f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -55
app.py CHANGED
@@ -117,61 +117,37 @@ class AuditAgent:
117
  except Exception as e:
118
  return f"Error processing query: {str(e)}"
119
 
120
- def process_documents(self, file):
121
  """Process uploaded documents and create a vector store."""
122
- if not file:
123
  return "Please upload a file"
124
 
125
  try:
126
  documents = []
127
 
128
- # Create temporary directory
129
- temp_dir = tempfile.mkdtemp()
130
- temp_path = os.path.join(temp_dir, file.name)
131
-
132
- # Save uploaded file
133
- with open(temp_path, 'wb') as f:
134
- f.write(file.read())
135
-
136
  # Get file extension and check it's supported
137
- file_ext = os.path.splitext(file.name.lower())[1]
138
  supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
139
 
140
  if file_ext not in supported_exts:
141
- # Clean up temp files before returning
142
- if os.path.exists(temp_path):
143
- os.remove(temp_path)
144
- if os.path.exists(temp_dir):
145
- os.rmdir(temp_dir)
146
  return f"Unsupported file type: {file_ext}. Please upload one of: {', '.join(supported_exts)}"
147
 
148
  # Select appropriate loader
149
  try:
150
  if file_ext == '.pdf':
151
- loader = PyPDFLoader(temp_path)
152
  elif file_ext == '.docx':
153
- loader = Docx2txtLoader(temp_path)
154
  elif file_ext == '.pptx':
155
- loader = UnstructuredPowerPointLoader(temp_path)
156
  elif file_ext in ['.xlsx', '.xls']:
157
- loader = UnstructuredExcelLoader(temp_path)
158
 
159
  # Load and process document
160
  documents.extend(loader.load())
161
  except Exception as e:
162
- # Clean up temp files
163
- if os.path.exists(temp_path):
164
- os.remove(temp_path)
165
- if os.path.exists(temp_dir):
166
- os.rmdir(temp_dir)
167
  return f"Error loading document content: {str(e)}"
168
 
169
- # Cleanup temp files
170
- if os.path.exists(temp_path):
171
- os.remove(temp_path)
172
- if os.path.exists(temp_dir):
173
- os.rmdir(temp_dir)
174
-
175
  # Split documents
176
  if not documents:
177
  return "No content could be extracted from the document."
@@ -190,7 +166,7 @@ class AuditAgent:
190
  embeddings = OpenAIEmbeddings(openai_api_key=api_keys["openai_key"])
191
  self.document_store = FAISS.from_documents(splits, embeddings)
192
 
193
- return f"Document '{file.name}' processed successfully with {len(splits)} text chunks."
194
  except Exception as e:
195
  return f"Error processing document: {str(e)}"
196
 
@@ -278,11 +254,11 @@ def create_interface():
278
 
279
  with gr.Row():
280
  with gr.Column(scale=1):
281
- # Updated file component with clearer instructions
282
  file_upload = gr.File(
283
  label="Upload Audit Documents",
284
  file_types=["pdf", "docx", "pptx", "xlsx", "xls"],
285
- type="binary"
286
  )
287
  gr.Markdown("Supported formats: PDF, DOCX, PPTX, XLSX, XLS")
288
 
@@ -395,30 +371,33 @@ def create_interface():
395
  error_msg = f"Error solving problem: {str(e)}"
396
  return error_msg, error_msg
397
 
398
- # Handle file upload with improved validation
399
- def handle_file_upload(file, model_name):
400
- if file is None:
401
  return "No file uploaded. Please upload a file."
402
 
403
- # Check file extension
404
- file_ext = os.path.splitext(file.name.lower())[1] if file.name else ""
405
- supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
406
-
407
- if file_ext not in supported_exts:
408
- return f"Invalid file type: {file_ext}. Please upload a file with one of these extensions: {', '.join(supported_exts)}"
409
-
410
- status = f"Processing document with {model_name}..."
411
-
412
- # Get or initialize agent
413
- agent, init_status = get_or_initialize_agent(model_name)
414
-
415
- # If initialization failed
416
- if agent is None:
417
- return init_status
418
-
419
- # Process the document
420
  try:
421
- result = agent.process_documents(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  return result
423
  except Exception as e:
424
  return f"Error processing document: {str(e)}"
 
117
  except Exception as e:
118
  return f"Error processing query: {str(e)}"
119
 
120
+ def process_documents(self, file_path, file_name):
121
  """Process uploaded documents and create a vector store."""
122
+ if not file_path or not file_name:
123
  return "Please upload a file"
124
 
125
  try:
126
  documents = []
127
 
 
 
 
 
 
 
 
 
128
  # Get file extension and check it's supported
129
+ file_ext = os.path.splitext(file_name.lower())[1]
130
  supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
131
 
132
  if file_ext not in supported_exts:
 
 
 
 
 
133
  return f"Unsupported file type: {file_ext}. Please upload one of: {', '.join(supported_exts)}"
134
 
135
  # Select appropriate loader
136
  try:
137
  if file_ext == '.pdf':
138
+ loader = PyPDFLoader(file_path)
139
  elif file_ext == '.docx':
140
+ loader = Docx2txtLoader(file_path)
141
  elif file_ext == '.pptx':
142
+ loader = UnstructuredPowerPointLoader(file_path)
143
  elif file_ext in ['.xlsx', '.xls']:
144
+ loader = UnstructuredExcelLoader(file_path)
145
 
146
  # Load and process document
147
  documents.extend(loader.load())
148
  except Exception as e:
 
 
 
 
 
149
  return f"Error loading document content: {str(e)}"
150
 
 
 
 
 
 
 
151
  # Split documents
152
  if not documents:
153
  return "No content could be extracted from the document."
 
166
  embeddings = OpenAIEmbeddings(openai_api_key=api_keys["openai_key"])
167
  self.document_store = FAISS.from_documents(splits, embeddings)
168
 
169
+ return f"Document '{file_name}' processed successfully with {len(splits)} text chunks."
170
  except Exception as e:
171
  return f"Error processing document: {str(e)}"
172
 
 
254
 
255
  with gr.Row():
256
  with gr.Column(scale=1):
257
+ # Updated file upload component - using file type instead of binary
258
  file_upload = gr.File(
259
  label="Upload Audit Documents",
260
  file_types=["pdf", "docx", "pptx", "xlsx", "xls"],
261
+ type="filepath" # Changed from "binary" to "filepath"
262
  )
263
  gr.Markdown("Supported formats: PDF, DOCX, PPTX, XLSX, XLS")
264
 
 
371
  error_msg = f"Error solving problem: {str(e)}"
372
  return error_msg, error_msg
373
 
374
+ # Updated file upload handler for filepath type
375
+ def handle_file_upload(file_path, model_name):
376
+ if file_path is None:
377
  return "No file uploaded. Please upload a file."
378
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  try:
380
+ # Extract the filename from the path
381
+ file_name = os.path.basename(file_path)
382
+
383
+ # Check file extension
384
+ file_ext = os.path.splitext(file_name.lower())[1]
385
+ supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
386
+
387
+ if file_ext not in supported_exts:
388
+ return f"Invalid file type: {file_ext}. Please upload a file with one of these extensions: {', '.join(supported_exts)}"
389
+
390
+ status = f"Processing document with {model_name}..."
391
+
392
+ # Get or initialize agent
393
+ agent, init_status = get_or_initialize_agent(model_name)
394
+
395
+ # If initialization failed
396
+ if agent is None:
397
+ return init_status
398
+
399
+ # Process the document
400
+ result = agent.process_documents(file_path, file_name)
401
  return result
402
  except Exception as e:
403
  return f"Error processing document: {str(e)}"