Hadiil commited on
Commit
0a6185b
·
verified ·
1 Parent(s): 267e7b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -213,10 +213,25 @@ async def extract_text_from_file(file: UploadFile):
213
  try:
214
  file_content = await file.read()
215
  if not file_content:
 
216
  raise ValueError("Uploaded file is empty.")
217
 
 
 
 
 
 
 
 
 
 
 
218
  if file.filename.endswith(".pdf"):
219
  try:
 
 
 
 
220
  doc = fitz.open(stream=file_content, filetype="pdf")
221
  text = ""
222
  for page in doc:
@@ -239,8 +254,6 @@ async def extract_text_from_file(file: UploadFile):
239
  except Exception as e:
240
  logger.error(f"Error reading TXT file: {e}")
241
  raise ValueError("Failed to read TXT file. It might be corrupted or not a valid TXT.")
242
- else:
243
- raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
244
  except Exception as e:
245
  logger.error(f"Error extracting text from file: {e}")
246
  raise HTTPException(status_code=400, detail=str(e))
 
213
  try:
214
  file_content = await file.read()
215
  if not file_content:
216
+ logger.error("Uploaded file is empty.")
217
  raise ValueError("Uploaded file is empty.")
218
 
219
+ # Check file size (e.g., limit to 10MB)
220
+ if len(file_content) > 10 * 1024 * 1024: # 10MB
221
+ logger.error("File size exceeds the limit (10MB).")
222
+ raise ValueError("File size exceeds the limit (10MB).")
223
+
224
+ # Check file type
225
+ if not file.filename.lower().endswith((".pdf", ".docx", ".txt")):
226
+ logger.error(f"Unsupported file format: {file.filename}")
227
+ raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
228
+
229
  if file.filename.endswith(".pdf"):
230
  try:
231
+ # Log the first few bytes of the file for debugging
232
+ logger.info(f"First 100 bytes of the file: {file_content[:100]}")
233
+
234
+ # Attempt to open the PDF
235
  doc = fitz.open(stream=file_content, filetype="pdf")
236
  text = ""
237
  for page in doc:
 
254
  except Exception as e:
255
  logger.error(f"Error reading TXT file: {e}")
256
  raise ValueError("Failed to read TXT file. It might be corrupted or not a valid TXT.")
 
 
257
  except Exception as e:
258
  logger.error(f"Error extracting text from file: {e}")
259
  raise HTTPException(status_code=400, detail=str(e))