rsm-wew068 commited on
Commit
d91c091
·
1 Parent(s): 00640b0

jnjjimukm

Browse files
Files changed (1) hide show
  1. utils/email_parser.py +117 -31
utils/email_parser.py CHANGED
@@ -493,7 +493,7 @@ def extract_name_and_email(email_string):
493
  def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
494
  """
495
  Safe wrapper for parsing uploaded files with comprehensive error handling.
496
- This function prevents 403 and other file access errors.
497
  """
498
  if filter_settings is None:
499
  filter_settings = {}
@@ -506,43 +506,55 @@ def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
506
  if not uploaded_file.name.lower().endswith('.zip'):
507
  raise ValueError("File must be a ZIP archive (.zip)")
508
 
509
- # Check file size
510
  try:
511
- file_content = uploaded_file.getvalue()
512
- if len(file_content) == 0:
513
- raise ValueError("Uploaded file is empty")
514
-
515
- file_size_mb = len(file_content) / (1024 * 1024)
516
- if file_size_mb > 500: # 500MB limit
517
- raise ValueError(f"File too large ({file_size_mb:.1f}MB). Please upload a smaller file.")
518
-
519
  except Exception as e:
520
- if "403" in str(e) or "AxiosError" in str(e):
521
- raise ValueError("File access denied. Try uploading the file again or use a different browser.")
522
- raise ValueError(f"Cannot read uploaded file: {str(e)}")
523
-
524
- # Reset file pointer and try parsing
525
- uploaded_file.seek(0)
526
 
527
- # Call the original function with added error handling
528
  try:
529
- return parse_uploaded_file_with_filters(uploaded_file, filter_settings)
 
 
 
 
 
 
 
 
530
  except Exception as parse_error:
531
  error_msg = str(parse_error)
532
 
533
- # Handle specific error types
534
  if "403" in error_msg or "AxiosError" in error_msg:
535
- raise ValueError("File access error (403). This may be due to:\n"
536
- "- Browser security restrictions\n"
537
- "- File permissions\n"
538
- "- Network connectivity issues\n"
539
- "Please try refreshing the page and uploading again.")
 
 
540
  elif "BadZipFile" in error_msg or "zipfile" in error_msg.lower():
541
- raise ValueError("Invalid ZIP file. Please ensure you uploaded a valid Gmail Takeout ZIP file.")
 
 
 
542
  elif "mbox" in error_msg.lower():
543
- raise ValueError("No email data found. Please ensure you uploaded a Gmail Takeout file that contains emails.")
 
 
 
544
  else:
545
  raise ValueError(f"Email parsing failed: {error_msg}")
 
 
 
 
 
 
 
 
546
 
547
  except ValueError:
548
  # Re-raise ValueError as-is (these are user-friendly messages)
@@ -551,10 +563,84 @@ def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
551
  # Catch any other unexpected errors
552
  error_msg = str(e)
553
  if "403" in error_msg or "AxiosError" in error_msg:
554
- raise ValueError("File access denied (403 error). Please try:\n"
555
- "1. Refreshing the page\n"
556
- "2. Using a different browser\n"
557
- "3. Uploading the file again\n"
558
- "4. Checking your internet connection")
 
559
  else:
560
  raise ValueError(f"Unexpected error: {error_msg}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  def parse_uploaded_file_with_filters_safe(uploaded_file, filter_settings=None):
494
  """
495
  Safe wrapper for parsing uploaded files with comprehensive error handling.
496
+ Optimized for Hugging Face Spaces environment.
497
  """
498
  if filter_settings is None:
499
  filter_settings = {}
 
506
  if not uploaded_file.name.lower().endswith('.zip'):
507
  raise ValueError("File must be a ZIP archive (.zip)")
508
 
509
+ # Use Hugging Face Spaces-specific file handling
510
  try:
511
+ tmp_path = handle_hf_spaces_upload(uploaded_file)
 
 
 
 
 
 
 
512
  except Exception as e:
513
+ raise ValueError(str(e))
 
 
 
 
 
514
 
515
+ # Parse using temporary file
516
  try:
517
+ # Create a file-like object for the existing parsing function
518
+ with open(tmp_path, 'rb') as f:
519
+ import io
520
+ uploaded_file_like = io.BytesIO(f.read())
521
+ uploaded_file_like.name = uploaded_file.name
522
+
523
+ return parse_uploaded_file_with_filters(
524
+ uploaded_file_like, filter_settings
525
+ )
526
  except Exception as parse_error:
527
  error_msg = str(parse_error)
528
 
529
+ # Handle specific error types with HF Spaces context
530
  if "403" in error_msg or "AxiosError" in error_msg:
531
+ raise ValueError(
532
+ "🚫 Hugging Face Spaces file access error (403). Try:\n"
533
+ " Wait 30 seconds and upload again\n"
534
+ " Refresh the page (F5)\n"
535
+ " Use a smaller file (under 50MB)\n"
536
+ "• Try Chrome or Firefox browser"
537
+ )
538
  elif "BadZipFile" in error_msg or "zipfile" in error_msg.lower():
539
+ raise ValueError(
540
+ "Invalid ZIP file. Please ensure you uploaded a valid "
541
+ "Gmail Takeout ZIP file."
542
+ )
543
  elif "mbox" in error_msg.lower():
544
+ raise ValueError(
545
+ "No email data found. Please ensure you uploaded a "
546
+ "Gmail Takeout file that contains emails."
547
+ )
548
  else:
549
  raise ValueError(f"Email parsing failed: {error_msg}")
550
+ finally:
551
+ # Cleanup temporary file
552
+ try:
553
+ import os
554
+ if os.path.exists(tmp_path):
555
+ os.unlink(tmp_path)
556
+ except Exception:
557
+ pass # Ignore cleanup errors
558
 
559
  except ValueError:
560
  # Re-raise ValueError as-is (these are user-friendly messages)
 
563
  # Catch any other unexpected errors
564
  error_msg = str(e)
565
  if "403" in error_msg or "AxiosError" in error_msg:
566
+ raise ValueError(
567
+ "🚫 Hugging Face Spaces error (403). Please try:\n"
568
+ " Refreshing the page and uploading again\n"
569
+ " Using a smaller file (under 50MB)\n"
570
+ " Trying a different browser"
571
+ )
572
  else:
573
  raise ValueError(f"Unexpected error: {error_msg}")
574
+
575
+ def handle_hf_spaces_upload(uploaded_file):
576
+ """
577
+ Handle file uploads specifically for Hugging Face Spaces environment.
578
+ This function addresses common 403 errors and permission issues.
579
+ """
580
+ import time
581
+ import tempfile
582
+
583
+ max_retries = 3
584
+ retry_delay = 1
585
+
586
+ for attempt in range(max_retries):
587
+ try:
588
+ # Reset file pointer
589
+ uploaded_file.seek(0)
590
+
591
+ # Read file content with error handling
592
+ try:
593
+ file_content = uploaded_file.getvalue()
594
+ except Exception as e:
595
+ if "403" in str(e) or "AxiosError" in str(e):
596
+ if attempt < max_retries - 1:
597
+ time.sleep(retry_delay)
598
+ retry_delay *= 2 # Exponential backoff
599
+ continue
600
+ else:
601
+ raise ValueError(
602
+ "🚫 Hugging Face Spaces upload error. Please:\n"
603
+ "• Try a smaller file (under 50MB)\n"
604
+ "• Refresh the page and try again\n"
605
+ "• Use Chrome or Firefox browser"
606
+ )
607
+ raise e
608
+
609
+ # Validate file content
610
+ if len(file_content) == 0:
611
+ raise ValueError("Uploaded file is empty")
612
+
613
+ file_size_mb = len(file_content) / (1024 * 1024)
614
+ if file_size_mb > 50: # Conservative limit for HF Spaces
615
+ raise ValueError(
616
+ f"File too large ({file_size_mb:.1f}MB). "
617
+ f"Hugging Face Spaces works best with files under 50MB."
618
+ )
619
+
620
+ # Create temporary file in a safer way for HF Spaces
621
+ try:
622
+ with tempfile.NamedTemporaryFile(
623
+ delete=False,
624
+ suffix='.zip',
625
+ dir=tempfile.gettempdir()
626
+ ) as tmp_file:
627
+ tmp_file.write(file_content)
628
+ return tmp_file.name
629
+ except Exception as e:
630
+ raise ValueError(
631
+ f"Cannot create temporary file in Hugging Face Spaces: {e}"
632
+ )
633
+
634
+ except ValueError:
635
+ raise # Re-raise user-friendly errors
636
+ except Exception as e:
637
+ if attempt < max_retries - 1:
638
+ time.sleep(retry_delay)
639
+ retry_delay *= 2
640
+ continue
641
+ else:
642
+ raise ValueError(
643
+ f"Hugging Face Spaces file processing error: {e}"
644
+ )
645
+
646
+ raise ValueError("Failed to process file after multiple attempts")