Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import fitz # PyMuPDF | |
| # Load summarization pipeline | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(pdf_file): | |
| doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # Combine everything into one function | |
| def summarize_pdf(pdf_file): | |
| try: | |
| text = extract_text_from_pdf(pdf_file) | |
| if len(text.strip()) == 0: | |
| return "The PDF seems empty or text is not extractable." | |
| # Truncate long text (BART model has ~1024 token limit) | |
| text = text[:3000] | |
| summary = summarizer(text, max_length=150, min_length=40, do_sample=False) | |
| return summary[0]['summary_text'] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio Interface | |
| demo = gr.Interface( | |
| fn=summarize_pdf, | |
| inputs=gr.File(label="Upload PDF of Academic Notes"), | |
| outputs=gr.Textbox(label="Summarized Notes"), | |
| title="📄 Academic Note Summarizer (PDF)", | |
| description="Upload your academic notes in PDF format. The app will extract and summarize the content using a Hugging Face model." | |
| ) | |
| demo.launch() | |