Update ui/ui_core.py
Browse files- ui/ui_core.py +7 -4
ui/ui_core.py
CHANGED
|
@@ -42,21 +42,24 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
|
|
| 42 |
|
| 43 |
df = None
|
| 44 |
if file_path.endswith(".csv"):
|
| 45 |
-
df = pd.read_csv(file_path, encoding_errors="replace", dtype=str,
|
| 46 |
elif file_path.endswith((".xls", ".xlsx")):
|
| 47 |
try:
|
| 48 |
-
df = pd.read_excel(file_path, engine="openpyxl",
|
| 49 |
except:
|
| 50 |
-
df = pd.read_excel(file_path, engine="xlrd",
|
| 51 |
|
| 52 |
if df is None or df.empty:
|
| 53 |
return f"[Warning] No data extracted from: {file_path}"
|
| 54 |
|
|
|
|
|
|
|
| 55 |
lines = []
|
| 56 |
for _, row in df.iterrows():
|
| 57 |
-
line = " | ".join(str(cell) for cell in row if
|
| 58 |
if line:
|
| 59 |
lines.append(line)
|
|
|
|
| 60 |
return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
|
| 61 |
|
| 62 |
except Exception as e:
|
|
|
|
| 42 |
|
| 43 |
df = None
|
| 44 |
if file_path.endswith(".csv"):
|
| 45 |
+
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
|
| 46 |
elif file_path.endswith((".xls", ".xlsx")):
|
| 47 |
try:
|
| 48 |
+
df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
|
| 49 |
except:
|
| 50 |
+
df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
|
| 51 |
|
| 52 |
if df is None or df.empty:
|
| 53 |
return f"[Warning] No data extracted from: {file_path}"
|
| 54 |
|
| 55 |
+
df = df.fillna("") # Handle missing data gracefully
|
| 56 |
+
|
| 57 |
lines = []
|
| 58 |
for _, row in df.iterrows():
|
| 59 |
+
line = " | ".join(str(cell) for cell in row if str(cell).strip())
|
| 60 |
if line:
|
| 61 |
lines.append(line)
|
| 62 |
+
|
| 63 |
return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
|
| 64 |
|
| 65 |
except Exception as e:
|