Spaces:
Runtime error
Runtime error
Laurine Sottani
commited on
Commit
·
cf99660
1
Parent(s):
fa0c299
add docx support
Browse files- file_cleaning_ui.py +12 -0
file_cleaning_ui.py
CHANGED
|
@@ -53,6 +53,18 @@ def extract_and_clean_txt(txt_path: str) -> str:
|
|
| 53 |
]
|
| 54 |
return "\n".join(cleaned)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
def process_file(input_file: gr.File, output_name: str) -> str:
|
| 57 |
"""
|
| 58 |
- Detecte le type (PDF ou texte)
|
|
|
|
| 53 |
]
|
| 54 |
return "\n".join(cleaned)
|
| 55 |
|
| 56 |
+
def extract_and_clean_docx(docx_path: str) -> str:
|
| 57 |
+
"""Lit un fichier type Word et le nettoie."""
|
| 58 |
+
print(f"[+] Lecture du fichier docx : {docx_path}")
|
| 59 |
+
with open(docx_path, "r", encoding="utf-8") as f:
|
| 60 |
+
lines = f.readlines()
|
| 61 |
+
cleaned = [
|
| 62 |
+
clean_text_for_rag(line.strip())
|
| 63 |
+
for line in lines
|
| 64 |
+
if line.strip()
|
| 65 |
+
]
|
| 66 |
+
return "\n".join(cleaned)
|
| 67 |
+
|
| 68 |
def process_file(input_file: gr.File, output_name: str) -> str:
|
| 69 |
"""
|
| 70 |
- Detecte le type (PDF ou texte)
|