Spaces:
Sleeping
Sleeping
File size: 624 Bytes
021dc25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import os
import pdfplumber
from docx import Document
def parse_file(file_path):
ext = os.path.splitext(file_path)[-1].lower()
if ext == ".txt":
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
elif ext == ".docx":
doc = Document(file_path)
return "\n".join([para.text for para in doc.paragraphs])
elif ext == ".pdf":
text = ""
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
text += page.extract_text() + "\n"
return text.strip()
else:
return "Unsupported file format."
|