Spaces:
Sleeping
Sleeping
| import fitz # PyMuPDF | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Initialize summarizer pipeline | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def extract_text_from_pdf(file): | |
| # doc = fitz.open(stream=file.read(), filetype="pdf") | |
| doc = fitz.open(file) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| def summarize_pdf(file): | |
| raw_text = extract_text_from_pdf(file) | |
| # Limit to avoid token overflow | |
| max_chunk = 1024 | |
| chunks = [raw_text[i:i+max_chunk] for i in range(0, len(raw_text), max_chunk)] | |
| summary = "" | |
| for chunk in chunks: | |
| res = summarizer(chunk, max_length=130, min_length=30, do_sample=False) | |
| summary += res[0]['summary_text'] + " " | |
| return summary.strip() | |
| # Gradio UI | |
| demo = gr.Interface( | |
| fn=summarize_pdf, | |
| # inputs=gr.File(label="Upload a PDF"), | |
| inputs=gr.File(type="filepath", label="Upload a PDF"), | |
| outputs=gr.Textbox(label="Summary"), | |
| title="📄 PDF Summarizer", | |
| description="Upload a PDF file and get an AI-generated summary using Hugging Face Transformers." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) |