import fitz # PyMuPDF import gradio as gr from transformers import pipeline # Initialize summarizer pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def extract_text_from_pdf(file): # doc = fitz.open(stream=file.read(), filetype="pdf") doc = fitz.open(file) text = "" for page in doc: text += page.get_text() return text def summarize_pdf(file): raw_text = extract_text_from_pdf(file) # Limit to avoid token overflow max_chunk = 1024 chunks = [raw_text[i:i+max_chunk] for i in range(0, len(raw_text), max_chunk)] summary = "" for chunk in chunks: res = summarizer(chunk, max_length=130, min_length=30, do_sample=False) summary += res[0]['summary_text'] + " " return summary.strip() # Gradio UI demo = gr.Interface( fn=summarize_pdf, #inputs=gr.File(label="Upload a PDF"), inputs=gr.File(type="filepath", label="Upload a PDF"), outputs=gr.Textbox(label="Summary"), title="📄 PDF Summarizer", description="Upload a PDF file and get an AI-generated summary using Hugging Face Transformers." ) if __name__ == "__main__": demo.launch(ssr_mode=False)