Spaces:
Sleeping
Sleeping
| # %%writefile file_loader.py | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| def load_text_from_file(uploaded_file): | |
| if "." not in uploaded_file.name: | |
| raise ValueError("File has no extension") | |
| file_type = uploaded_file.name.split(".")[-1].lower() | |
| try: | |
| if file_type == "pdf": | |
| reader = PdfReader(uploaded_file) | |
| return "\n".join([page.extract_text() for page in reader.pages]) | |
| elif file_type in ["docx", "doc"]: | |
| doc = Document(uploaded_file) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| elif file_type == "txt": | |
| content = uploaded_file.read() | |
| for encoding in ["utf-8", "latin-1", "cp1252"]: | |
| try: | |
| return content.decode(encoding) | |
| except UnicodeDecodeError: | |
| continue | |
| raise ValueError("Unable to decode text file") | |
| else: | |
| raise ValueError(f"Unsupported file type: {file_type}") | |
| except ImportError as e: | |
| raise ImportError(f"Required library not installed: {e}") | |
| except Exception as e: | |
| raise ValueError(f"Error processing file: {e}") | |