|
|
import fitz |
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
|
|
def extract_text_from_pdf(file): |
|
|
|
|
|
|
|
|
doc = fitz.open(file) |
|
|
text = "" |
|
|
for page in doc: |
|
|
text += page.get_text() |
|
|
return text |
|
|
|
|
|
def summarize_pdf(file): |
|
|
raw_text = extract_text_from_pdf(file) |
|
|
|
|
|
max_chunk = 1024 |
|
|
chunks = [raw_text[i:i+max_chunk] for i in range(0, len(raw_text), max_chunk)] |
|
|
summary = "" |
|
|
for chunk in chunks: |
|
|
res = summarizer(chunk, max_length=130, min_length=30, do_sample=False) |
|
|
summary += res[0]['summary_text'] + " " |
|
|
return summary.strip() |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=summarize_pdf, |
|
|
|
|
|
inputs=gr.File(type="filepath", label="Upload a PDF"), |
|
|
outputs=gr.Textbox(label="Summary"), |
|
|
title="๐ PDF Summarizer", |
|
|
description="Upload a PDF file and get an AI-generated summary using Hugging Face Transformers." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(ssr_mode=False) |
|
|
|