Commit ·
d91c091
1
Parent(s): 00640b0
jnjjimukm
Browse files- utils/email_parser.py +117 -31
utils/email_parser.py
CHANGED
|
@@ -493,7 +493,7 @@ def extract_name_and_email(email_string):
|
|
| 493 |
def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
|
| 494 |
"""
|
| 495 |
Safe wrapper for parsing uploaded files with comprehensive error handling.
|
| 496 |
-
|
| 497 |
"""
|
| 498 |
if filter_settings is None:
|
| 499 |
filter_settings = {}
|
|
@@ -506,43 +506,55 @@ def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
|
|
| 506 |
if not uploaded_file.name.lower().endswith('.zip'):
|
| 507 |
raise ValueError("File must be a ZIP archive (.zip)")
|
| 508 |
|
| 509 |
-
#
|
| 510 |
try:
|
| 511 |
-
|
| 512 |
-
if len(file_content) == 0:
|
| 513 |
-
raise ValueError("Uploaded file is empty")
|
| 514 |
-
|
| 515 |
-
file_size_mb = len(file_content) / (1024 * 1024)
|
| 516 |
-
if file_size_mb > 500: # 500MB limit
|
| 517 |
-
raise ValueError(f"File too large ({file_size_mb:.1f}MB). Please upload a smaller file.")
|
| 518 |
-
|
| 519 |
except Exception as e:
|
| 520 |
-
|
| 521 |
-
raise ValueError("File access denied. Try uploading the file again or use a different browser.")
|
| 522 |
-
raise ValueError(f"Cannot read uploaded file: {str(e)}")
|
| 523 |
-
|
| 524 |
-
# Reset file pointer and try parsing
|
| 525 |
-
uploaded_file.seek(0)
|
| 526 |
|
| 527 |
-
#
|
| 528 |
try:
|
| 529 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
except Exception as parse_error:
|
| 531 |
error_msg = str(parse_error)
|
| 532 |
|
| 533 |
-
# Handle specific error types
|
| 534 |
if "403" in error_msg or "AxiosError" in error_msg:
|
| 535 |
-
raise ValueError(
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
| 540 |
elif "BadZipFile" in error_msg or "zipfile" in error_msg.lower():
|
| 541 |
-
raise ValueError(
|
|
|
|
|
|
|
|
|
|
| 542 |
elif "mbox" in error_msg.lower():
|
| 543 |
-
raise ValueError(
|
|
|
|
|
|
|
|
|
|
| 544 |
else:
|
| 545 |
raise ValueError(f"Email parsing failed: {error_msg}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
|
| 547 |
except ValueError:
|
| 548 |
# Re-raise ValueError as-is (these are user-friendly messages)
|
|
@@ -551,10 +563,84 @@ def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
|
|
| 551 |
# Catch any other unexpected errors
|
| 552 |
error_msg = str(e)
|
| 553 |
if "403" in error_msg or "AxiosError" in error_msg:
|
| 554 |
-
raise ValueError(
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
|
|
|
| 559 |
else:
|
| 560 |
raise ValueError(f"Unexpected error: {error_msg}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
|
| 494 |
"""
|
| 495 |
Safe wrapper for parsing uploaded files with comprehensive error handling.
|
| 496 |
+
Optimized for Hugging Face Spaces environment.
|
| 497 |
"""
|
| 498 |
if filter_settings is None:
|
| 499 |
filter_settings = {}
|
|
|
|
| 506 |
if not uploaded_file.name.lower().endswith('.zip'):
|
| 507 |
raise ValueError("File must be a ZIP archive (.zip)")
|
| 508 |
|
| 509 |
+
# Use Hugging Face Spaces-specific file handling
|
| 510 |
try:
|
| 511 |
+
tmp_path = handle_hf_spaces_upload(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
except Exception as e:
|
| 513 |
+
raise ValueError(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
+
# Parse using temporary file
|
| 516 |
try:
|
| 517 |
+
# Create a file-like object for the existing parsing function
|
| 518 |
+
with open(tmp_path, 'rb') as f:
|
| 519 |
+
import io
|
| 520 |
+
uploaded_file_like = io.BytesIO(f.read())
|
| 521 |
+
uploaded_file_like.name = uploaded_file.name
|
| 522 |
+
|
| 523 |
+
return parse_uploaded_file_with_filters(
|
| 524 |
+
uploaded_file_like, filter_settings
|
| 525 |
+
)
|
| 526 |
except Exception as parse_error:
|
| 527 |
error_msg = str(parse_error)
|
| 528 |
|
| 529 |
+
# Handle specific error types with HF Spaces context
|
| 530 |
if "403" in error_msg or "AxiosError" in error_msg:
|
| 531 |
+
raise ValueError(
|
| 532 |
+
"🚫 Hugging Face Spaces file access error (403). Try:\n"
|
| 533 |
+
"• Wait 30 seconds and upload again\n"
|
| 534 |
+
"• Refresh the page (F5)\n"
|
| 535 |
+
"• Use a smaller file (under 50MB)\n"
|
| 536 |
+
"• Try Chrome or Firefox browser"
|
| 537 |
+
)
|
| 538 |
elif "BadZipFile" in error_msg or "zipfile" in error_msg.lower():
|
| 539 |
+
raise ValueError(
|
| 540 |
+
"Invalid ZIP file. Please ensure you uploaded a valid "
|
| 541 |
+
"Gmail Takeout ZIP file."
|
| 542 |
+
)
|
| 543 |
elif "mbox" in error_msg.lower():
|
| 544 |
+
raise ValueError(
|
| 545 |
+
"No email data found. Please ensure you uploaded a "
|
| 546 |
+
"Gmail Takeout file that contains emails."
|
| 547 |
+
)
|
| 548 |
else:
|
| 549 |
raise ValueError(f"Email parsing failed: {error_msg}")
|
| 550 |
+
finally:
|
| 551 |
+
# Cleanup temporary file
|
| 552 |
+
try:
|
| 553 |
+
import os
|
| 554 |
+
if os.path.exists(tmp_path):
|
| 555 |
+
os.unlink(tmp_path)
|
| 556 |
+
except Exception:
|
| 557 |
+
pass # Ignore cleanup errors
|
| 558 |
|
| 559 |
except ValueError:
|
| 560 |
# Re-raise ValueError as-is (these are user-friendly messages)
|
|
|
|
| 563 |
# Catch any other unexpected errors
|
| 564 |
error_msg = str(e)
|
| 565 |
if "403" in error_msg or "AxiosError" in error_msg:
|
| 566 |
+
raise ValueError(
|
| 567 |
+
"🚫 Hugging Face Spaces error (403). Please try:\n"
|
| 568 |
+
"• Refreshing the page and uploading again\n"
|
| 569 |
+
"• Using a smaller file (under 50MB)\n"
|
| 570 |
+
"• Trying a different browser"
|
| 571 |
+
)
|
| 572 |
else:
|
| 573 |
raise ValueError(f"Unexpected error: {error_msg}")
|
| 574 |
+
|
| 575 |
+
def handle_hf_spaces_upload(uploaded_file):
|
| 576 |
+
"""
|
| 577 |
+
Handle file uploads specifically for Hugging Face Spaces environment.
|
| 578 |
+
This function addresses common 403 errors and permission issues.
|
| 579 |
+
"""
|
| 580 |
+
import time
|
| 581 |
+
import tempfile
|
| 582 |
+
|
| 583 |
+
max_retries = 3
|
| 584 |
+
retry_delay = 1
|
| 585 |
+
|
| 586 |
+
for attempt in range(max_retries):
|
| 587 |
+
try:
|
| 588 |
+
# Reset file pointer
|
| 589 |
+
uploaded_file.seek(0)
|
| 590 |
+
|
| 591 |
+
# Read file content with error handling
|
| 592 |
+
try:
|
| 593 |
+
file_content = uploaded_file.getvalue()
|
| 594 |
+
except Exception as e:
|
| 595 |
+
if "403" in str(e) or "AxiosError" in str(e):
|
| 596 |
+
if attempt < max_retries - 1:
|
| 597 |
+
time.sleep(retry_delay)
|
| 598 |
+
retry_delay *= 2 # Exponential backoff
|
| 599 |
+
continue
|
| 600 |
+
else:
|
| 601 |
+
raise ValueError(
|
| 602 |
+
"🚫 Hugging Face Spaces upload error. Please:\n"
|
| 603 |
+
"• Try a smaller file (under 50MB)\n"
|
| 604 |
+
"• Refresh the page and try again\n"
|
| 605 |
+
"• Use Chrome or Firefox browser"
|
| 606 |
+
)
|
| 607 |
+
raise e
|
| 608 |
+
|
| 609 |
+
# Validate file content
|
| 610 |
+
if len(file_content) == 0:
|
| 611 |
+
raise ValueError("Uploaded file is empty")
|
| 612 |
+
|
| 613 |
+
file_size_mb = len(file_content) / (1024 * 1024)
|
| 614 |
+
if file_size_mb > 50: # Conservative limit for HF Spaces
|
| 615 |
+
raise ValueError(
|
| 616 |
+
f"File too large ({file_size_mb:.1f}MB). "
|
| 617 |
+
f"Hugging Face Spaces works best with files under 50MB."
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
# Create temporary file in a safer way for HF Spaces
|
| 621 |
+
try:
|
| 622 |
+
with tempfile.NamedTemporaryFile(
|
| 623 |
+
delete=False,
|
| 624 |
+
suffix='.zip',
|
| 625 |
+
dir=tempfile.gettempdir()
|
| 626 |
+
) as tmp_file:
|
| 627 |
+
tmp_file.write(file_content)
|
| 628 |
+
return tmp_file.name
|
| 629 |
+
except Exception as e:
|
| 630 |
+
raise ValueError(
|
| 631 |
+
f"Cannot create temporary file in Hugging Face Spaces: {e}"
|
| 632 |
+
)
|
| 633 |
+
|
| 634 |
+
except ValueError:
|
| 635 |
+
raise # Re-raise user-friendly errors
|
| 636 |
+
except Exception as e:
|
| 637 |
+
if attempt < max_retries - 1:
|
| 638 |
+
time.sleep(retry_delay)
|
| 639 |
+
retry_delay *= 2
|
| 640 |
+
continue
|
| 641 |
+
else:
|
| 642 |
+
raise ValueError(
|
| 643 |
+
f"Hugging Face Spaces file processing error: {e}"
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
raise ValueError("Failed to process file after multiple attempts")
|