romybeaute commited on
Commit
f6ee2b8
·
verified ·
1 Parent(s): 42d0184

implemented to take into account mac format loading UTF

Browse files
Files changed (1) hide show
  1. app.py +37 -7
app.py CHANGED
@@ -465,18 +465,48 @@ else:
465
  # st.error("Uploaded CSV is empty.")
466
  # st.stop()
467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  if up is not None:
469
- try:
470
- # Try loading as standard UTF-8
471
- tmp_df = pd.read_csv(up)
472
- except UnicodeDecodeError:
473
- # If that fails (e.g., Excel/Windows CSV), try ISO-8859-1 (Latin-1)
474
- up.seek(0) # Reset file pointer to the beginning
475
- tmp_df = pd.read_csv(up, encoding='ISO-8859-1')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
 
477
  if tmp_df.empty:
478
  st.error("Uploaded CSV is empty.")
479
  st.stop()
 
 
 
480
 
481
  # Just save; we’ll choose the text column later
482
  uploaded_csv_path = str((PROC_DIR / "uploaded.csv").resolve())
 
465
  # st.error("Uploaded CSV is empty.")
466
  # st.stop()
467
 
468
+ # if up is not None:
469
+ # try:
470
+ # # Try loading as standard UTF-8
471
+ # tmp_df = pd.read_csv(up)
472
+ # except UnicodeDecodeError:
473
+ # # If that fails (e.g., Excel/Windows CSV), try ISO-8859-1 (Latin-1)
474
+ # up.seek(0) # Reset file pointer to the beginning
475
+ # tmp_df = pd.read_csv(up, encoding='ISO-8859-1')
476
+
477
+ # if tmp_df.empty:
478
+ # st.error("Uploaded CSV is empty.")
479
+ # st.stop()
480
+
481
  if up is not None:
482
+ # List of encodings to try:
483
+ # 1. utf-8 (Standard)
484
+ # 2. mac_roman (Fixes the Õ and É issues from Mac Excel)
485
+ # 3. cp1252 (Standard Windows Excel)
486
+ encodings_to_try = ['utf-8', 'mac_roman', 'cp1252', 'ISO-8859-1']
487
+
488
+ tmp_df = None
489
+ success_encoding = None
490
+
491
+ for encoding in encodings_to_try:
492
+ try:
493
+ up.seek(0) # Always reset to start of file before trying
494
+ tmp_df = pd.read_csv(up, encoding=encoding)
495
+ success_encoding = encoding
496
+ break # If we get here, it worked, so stop the loop
497
+ except UnicodeDecodeError:
498
+ continue # If it fails, try the next one
499
+
500
+ if tmp_df is None:
501
+ st.error("Could not decode file. Please save your CSV as 'CSV UTF-8' in Excel.")
502
+ st.stop()
503
 
504
  if tmp_df.empty:
505
  st.error("Uploaded CSV is empty.")
506
  st.stop()
507
+
508
+ # Optional: Print which encoding worked to the logs (for your info)
509
+ print(f"Successfully loaded CSV using {success_encoding} encoding.")
510
 
511
  # Just save; we’ll choose the text column later
512
  uploaded_csv_path = str((PROC_DIR / "uploaded.csv").resolve())