Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from summarize import summarize_text | |
| from pdf2text import convert_pdf_to_text | |
| from PyPDF2 import PdfReader | |
| def is_scanned_pdf(pdf_path): | |
| try: | |
| reader = PdfReader(pdf_path) | |
| for page in reader.pages: | |
| if page.extract_text(): | |
| return False | |
| return True | |
| except: | |
| return True # fallback: assume scanned if error | |
| def summarize_from_text(text): | |
| return summarize_text(text) | |
| def summarize_from_pdf(pdf_file): | |
| scanned = is_scanned_pdf(pdf_file.name) | |
| text_dict = convert_pdf_to_text(pdf_file.name, ocr_model=scanned) | |
| text = text_dict["converted_text"] | |
| return summarize_text(text) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# DocSummarizer 📄✨\nUpload a PDF or paste text to summarize") | |
| with gr.Tab("Summarize Text"): | |
| input_text = gr.Textbox(label="Text to Summarize", lines=20) | |
| output_text = gr.Textbox(label="Summarized Text", lines=10) | |
| summarize_button = gr.Button("Summarize") | |
| summarize_button.click(fn=summarize_from_text, inputs=input_text, outputs=output_text) | |
| with gr.Tab("Summarize PDF"): | |
| pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| output_summary = gr.Textbox(label="Summarized Text", lines=10) | |
| pdf_button = gr.Button("Summarize PDF") | |
| pdf_button.click(fn=summarize_from_pdf, inputs=pdf_file, outputs=output_summary) | |
| demo.launch() | |