Spaces:
Sleeping
Sleeping
| import base64 | |
| import pdfplumber | |
| from transformers import pipeline | |
| # Function to extract text from a PDF and summarize it | |
| def get_pdf_text(pdf_file): | |
| text = "" | |
| # Open the PDF file and extract text | |
| with pdfplumber.open(pdf_file) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() # Extract text from each page | |
| return text | |
| def display_pdf(file_path): | |
| # Read the PDF file | |
| with open(file_path, "rb") as f: | |
| data = f.read() | |
| # Convert PDF content to base64 | |
| base64_pdf = base64.b64encode(data).decode("utf-8") | |
| # Create an iframe to display the PDF | |
| pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px"></iframe>' | |
| return pdf_display | |
| def split_text(text, max_length): | |
| """Split text into smaller chunks based on a specified length.""" | |
| words = text.split() | |
| chunks = [' '.join(words[i:i+max_length]) for i in range(0, len(words), max_length)] | |
| return chunks | |
| def summarize(text,max_length): | |
| summarizer = pipeline(task="summarization", model='facebook/bart-large-cnn') | |
| text_chunks = split_text(text, max_length=max_length) # Split into chunks of 500 words | |
| # Summarize each chunk and combine the results | |
| summaries = [summarizer(chunk)[0]['summary_text'] for chunk in text_chunks] | |
| # Combine the summaries into a final summary | |
| final_summary = ' '.join(summaries) | |
| return final_summary | |
| # return text_chunks[0] |