Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import PyPDF2 | |
| # Load summarization model | |
| summarizer_pipeline = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| def extract_text_from_pdf(file): | |
| # Check if file is a path (string) or file-like object | |
| if isinstance(file, str): | |
| with open(file, "rb") as f: | |
| reader = PyPDF2.PdfReader(f) | |
| text = "".join([page.extract_text() or "" for page in reader.pages]) | |
| else: | |
| reader = PyPDF2.PdfReader(file) | |
| text = "".join([page.extract_text() or "" for page in reader.pages]) | |
| return text.strip() | |
| def summarize_text(text, max_length=130, min_length=30): | |
| if len(text.strip()) == 0: | |
| return "No valid text found in the PDF." | |
| summary = summarizer_pipeline(text[:3000], max_length=max_length, min_length=min_length, do_sample=False) | |
| return summary[0]['summary_text'] | |