Iskabore's picture
create summarizer module
9057a10
import PyPDF2
import os
from langdetect import detect
def read_txt_file(filepath: str) -> str:
"""Read content from a .txt file."""
try:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
return content
except Exception as e:
print(f"❌ Error reading TXT file: {e}")
return ""
def read_pdf_file(filepath: str) -> str:
"""Extract text from a PDF file using PyPDF2."""
try:
with open(filepath, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text.strip()
except Exception as e:
print(f"❌ Error reading PDF file: {e}")
return ""
def read_file(filepath: str) -> str:
"""Read a file (txt or pdf) and return its content as text."""
if os.path.splitext(filepath)[1].lower() == ".txt":
return read_txt_file(filepath)
elif os.path.splitext(filepath)[1].lower() == ".pdf":
return read_pdf_file(filepath)
else:
print(f"❌ Unsupported file type: {filepath}")
return ""
def detect_language(text: str) -> str:
"""Detect the language of the given text using langdetect."""
try:
return detect(text)
except Exception:
return "unknown"