import gradio as gr from transformers import pipeline from PyPDF2 import PdfReader import os # Load a summarization model summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Function to summarize text def summarize_text(text, max_length, min_length): summary = summarizer(text, max_length=int(max_length), min_length=int(min_length), do_sample=False) return summary[0]['summary_text'] # Function to summarize uploaded documents def summarize_document(file, max_length, min_length): # Read the content of the file if file.name.endswith(".pdf"): reader = PdfReader(file.name) text = "" for page in reader.pages: text += page.extract_text() else: text = file.read().decode("utf-8") # Summarize the extracted text if len(text) > 1024: # Summarizer has input size limitations text = text[:1024] summary = summarize_text(text, max_length, min_length) return summary # Gradio Interface with gr.Blocks() as interface: gr.Markdown("# Document Summarizer") with gr.Tab("Text Summarization"): input_text = gr.Textbox(lines=10, placeholder="Enter the text to summarize...", label="Input Text") max_len = gr.Slider(50, 300, value=130, label="Max Summary Length") min_len = gr.Slider(10, 50, value=30, label="Min Summary Length") output_text = gr.Textbox(lines=5, label="Summary") summarize_button = gr.Button("Summarize") summarize_button.click(summarize_text, [input_text, max_len, min_len], output_text) with gr.Tab("Document Summarization"): input_file = gr.File(label="Upload a Document (PDF or TXT)") max_len_doc = gr.Slider(50, 300, value=130, label="Max Summary Length") min_len_doc = gr.Slider(10, 50, value=30, label="Min Summary Length") output_doc = gr.Textbox(lines=5, label="Summary") summarize_doc_button = gr.Button("Summarize Document") summarize_doc_button.click(summarize_document, [input_file, max_len_doc, min_len_doc], output_doc) # Launch the app if __name__ == "__main__": interface.launch()