Spaces:

HF-Pawan
/

Document-Summarization

Running

anyonehomep1mane

Modular Changes, UI changes

901814c 30 days ago

911 Bytes

	import os
	import PyPDF2
	from docx import Document

	SUPPORTED_ERROR = "❌ Supported formats: .txt, .pdf, .docx"

	def extract_text(file_path: str) -> str:
	ext = os.path.splitext(file_path)[1].lower()

	try:
	if ext == ".txt":
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read().strip()

	if ext == ".pdf":
	text = ""
	with open(file_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	for page in reader.pages:
	text += (page.extract_text() or "") + "\n"
	return text.strip()

	if ext == ".docx":
	doc = Document(file_path)
	return "\n".join(
	p.text for p in doc.paragraphs if p.text.strip()
	).strip()

	return SUPPORTED_ERROR

	except Exception as e:
	return f"❌ Error reading file: {str(e)}"