Ai-Resume-Ranking / utils /file_loader.py
yhng2525's picture
Upload 15 files
1595f22 verified
# %%writefile file_loader.py
from PyPDF2 import PdfReader
from docx import Document
def load_text_from_file(uploaded_file):
if "." not in uploaded_file.name:
raise ValueError("File has no extension")
file_type = uploaded_file.name.split(".")[-1].lower()
try:
if file_type == "pdf":
reader = PdfReader(uploaded_file)
return "\n".join([page.extract_text() for page in reader.pages])
elif file_type in ["docx", "doc"]:
doc = Document(uploaded_file)
return "\n".join([para.text for para in doc.paragraphs])
elif file_type == "txt":
content = uploaded_file.read()
for encoding in ["utf-8", "latin-1", "cp1252"]:
try:
return content.decode(encoding)
except UnicodeDecodeError:
continue
raise ValueError("Unable to decode text file")
else:
raise ValueError(f"Unsupported file type: {file_type}")
except ImportError as e:
raise ImportError(f"Required library not installed: {e}")
except Exception as e:
raise ValueError(f"Error processing file: {e}")