Spaces:
Sleeping
Sleeping
File size: 1,231 Bytes
1595f22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# %%writefile file_loader.py
from PyPDF2 import PdfReader
from docx import Document
def load_text_from_file(uploaded_file):
if "." not in uploaded_file.name:
raise ValueError("File has no extension")
file_type = uploaded_file.name.split(".")[-1].lower()
try:
if file_type == "pdf":
reader = PdfReader(uploaded_file)
return "\n".join([page.extract_text() for page in reader.pages])
elif file_type in ["docx", "doc"]:
doc = Document(uploaded_file)
return "\n".join([para.text for para in doc.paragraphs])
elif file_type == "txt":
content = uploaded_file.read()
for encoding in ["utf-8", "latin-1", "cp1252"]:
try:
return content.decode(encoding)
except UnicodeDecodeError:
continue
raise ValueError("Unable to decode text file")
else:
raise ValueError(f"Unsupported file type: {file_type}")
except ImportError as e:
raise ImportError(f"Required library not installed: {e}")
except Exception as e:
raise ValueError(f"Error processing file: {e}")
|