Update app.py
Browse files
app.py
CHANGED
|
@@ -527,7 +527,7 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
|
|
| 527 |
Handle file upload, OCR processing, and text analysis
|
| 528 |
|
| 529 |
Args:
|
| 530 |
-
file_obj: Uploaded file object from Gradio
|
| 531 |
mode: Analysis mode (quick or detailed)
|
| 532 |
classifier: The TextClassifier instance
|
| 533 |
|
|
@@ -541,11 +541,33 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
|
|
| 541 |
"No file uploaded for analysis"
|
| 542 |
)
|
| 543 |
|
| 544 |
-
# Create a temporary file
|
| 545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
temp_file_path = temp_file.name
|
| 547 |
-
# Write uploaded file to the temporary file
|
| 548 |
-
temp_file.write(file_obj
|
| 549 |
|
| 550 |
try:
|
| 551 |
# Process the file with OCR
|
|
|
|
| 527 |
Handle file upload, OCR processing, and text analysis
|
| 528 |
|
| 529 |
Args:
|
| 530 |
+
file_obj: Uploaded file object from Gradio (bytes when using type="binary")
|
| 531 |
mode: Analysis mode (quick or detailed)
|
| 532 |
classifier: The TextClassifier instance
|
| 533 |
|
|
|
|
| 541 |
"No file uploaded for analysis"
|
| 542 |
)
|
| 543 |
|
| 544 |
+
# Create a temporary file with an appropriate extension based on content
|
| 545 |
+
# Since we don't have the original filename when using binary mode,
|
| 546 |
+
# we'll use a generic extension based on simple content detection
|
| 547 |
+
|
| 548 |
+
# Simple content type detection
|
| 549 |
+
content_start = file_obj[:20] # Look at the first few bytes
|
| 550 |
+
|
| 551 |
+
# Default to .bin extension
|
| 552 |
+
file_ext = ".bin"
|
| 553 |
+
|
| 554 |
+
# Try to detect PDF files
|
| 555 |
+
if content_start.startswith(b'%PDF'):
|
| 556 |
+
file_ext = ".pdf"
|
| 557 |
+
# For images, detect by common magic numbers
|
| 558 |
+
elif content_start.startswith(b'\xff\xd8'): # JPEG
|
| 559 |
+
file_ext = ".jpg"
|
| 560 |
+
elif content_start.startswith(b'\x89PNG'): # PNG
|
| 561 |
+
file_ext = ".png"
|
| 562 |
+
elif content_start.startswith(b'GIF'): # GIF
|
| 563 |
+
file_ext = ".gif"
|
| 564 |
+
# Add more content type detection as needed
|
| 565 |
+
|
| 566 |
+
# Create a temporary file with the detected extension
|
| 567 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file:
|
| 568 |
temp_file_path = temp_file.name
|
| 569 |
+
# Write uploaded file data to the temporary file
|
| 570 |
+
temp_file.write(file_obj)
|
| 571 |
|
| 572 |
try:
|
| 573 |
# Process the file with OCR
|