Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -213,10 +213,25 @@ async def extract_text_from_file(file: UploadFile):
|
|
| 213 |
try:
|
| 214 |
file_content = await file.read()
|
| 215 |
if not file_content:
|
|
|
|
| 216 |
raise ValueError("Uploaded file is empty.")
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
if file.filename.endswith(".pdf"):
|
| 219 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
doc = fitz.open(stream=file_content, filetype="pdf")
|
| 221 |
text = ""
|
| 222 |
for page in doc:
|
|
@@ -239,8 +254,6 @@ async def extract_text_from_file(file: UploadFile):
|
|
| 239 |
except Exception as e:
|
| 240 |
logger.error(f"Error reading TXT file: {e}")
|
| 241 |
raise ValueError("Failed to read TXT file. It might be corrupted or not a valid TXT.")
|
| 242 |
-
else:
|
| 243 |
-
raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
|
| 244 |
except Exception as e:
|
| 245 |
logger.error(f"Error extracting text from file: {e}")
|
| 246 |
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
| 213 |
try:
|
| 214 |
file_content = await file.read()
|
| 215 |
if not file_content:
|
| 216 |
+
logger.error("Uploaded file is empty.")
|
| 217 |
raise ValueError("Uploaded file is empty.")
|
| 218 |
|
| 219 |
+
# Check file size (e.g., limit to 10MB)
|
| 220 |
+
if len(file_content) > 10 * 1024 * 1024: # 10MB
|
| 221 |
+
logger.error("File size exceeds the limit (10MB).")
|
| 222 |
+
raise ValueError("File size exceeds the limit (10MB).")
|
| 223 |
+
|
| 224 |
+
# Check file type
|
| 225 |
+
if not file.filename.lower().endswith((".pdf", ".docx", ".txt")):
|
| 226 |
+
logger.error(f"Unsupported file format: {file.filename}")
|
| 227 |
+
raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
|
| 228 |
+
|
| 229 |
if file.filename.endswith(".pdf"):
|
| 230 |
try:
|
| 231 |
+
# Log the first few bytes of the file for debugging
|
| 232 |
+
logger.info(f"First 100 bytes of the file: {file_content[:100]}")
|
| 233 |
+
|
| 234 |
+
# Attempt to open the PDF
|
| 235 |
doc = fitz.open(stream=file_content, filetype="pdf")
|
| 236 |
text = ""
|
| 237 |
for page in doc:
|
|
|
|
| 254 |
except Exception as e:
|
| 255 |
logger.error(f"Error reading TXT file: {e}")
|
| 256 |
raise ValueError("Failed to read TXT file. It might be corrupted or not a valid TXT.")
|
|
|
|
|
|
|
| 257 |
except Exception as e:
|
| 258 |
logger.error(f"Error extracting text from file: {e}")
|
| 259 |
raise HTTPException(status_code=400, detail=str(e))
|