exampletwo / app.py
tejovanth's picture
Update app.py
bfd9f8e verified
raw
history blame
1.28 kB
import gradio as gr
from transformers import pipeline
import fitz # PyMuPDF
# Load summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
return text
# Combine everything into one function
def summarize_pdf(pdf_file):
try:
text = extract_text_from_pdf(pdf_file)
if len(text.strip()) == 0:
return "The PDF seems empty or text is not extractable."
# Truncate long text (BART model has ~1024 token limit)
text = text[:3000]
summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
demo = gr.Interface(
fn=summarize_pdf,
inputs=gr.File(label="Upload PDF of Academic Notes"),
outputs=gr.Textbox(label="Summarized Notes"),
title="📄 Academic Note Summarizer (PDF)",
description="Upload your academic notes in PDF format. The app will extract and summarize the content using a Hugging Face model."
)
demo.launch()