Spaces:
Sleeping
Sleeping
| import os | |
| import fitz | |
| import gradio as gr | |
| import re | |
| from summarizer import Summarizer | |
| def preprocess(text): | |
| text = text.replace('\n', ' ') | |
| text = re.sub('\s+', ' ', text) | |
| return text | |
| def pdf_to_text(path, start_page=1, end_page=None): | |
| doc = fitz.open(path) | |
| total_pages = doc.page_count | |
| if end_page is None: | |
| end_page = total_pages | |
| text_list = [] | |
| for i in range(start_page-1, end_page): | |
| text = doc.load_page(i).get_text("text") | |
| text = preprocess(text) | |
| text_list.append(text) | |
| doc.close() | |
| return ' '.join(text_list) | |
| def generate_summary(text, model='bert-base-uncased', ratio=0.2): | |
| model = Summarizer() | |
| summary = model(text, ratio=ratio) | |
| return summary | |
| def pdf_summary(file, secret): | |
| if secret != os.environ.get('Secret'): | |
| return '[Error]: Please provide the correct secret' | |
| elif file is None: | |
| return '[ERROR]: Please upload a PDF file.' | |
| else: | |
| old_file_name = file.name | |
| file_name = file.name | |
| file_name = file_name[:-12] + file_name[-4:] | |
| os.rename(old_file_name, file_name) | |
| text = pdf_to_text(file_name) | |
| if text.strip() == '': | |
| return '[ERROR]: The content of PDF is empty.' | |
| return generate_summary(text) | |
| title = 'PDF Summarizer' | |
| description = "A platform for generating summary for a PDF using BERT model" | |
| with gr.Interface( | |
| fn=pdf_summary, | |
| inputs=[ | |
| gr.File(label='PDF', file_types=['.pdf']), | |
| gr.Textbox(label='Secret') | |
| ], | |
| outputs=gr.Textbox(label='Summary'), | |
| title=title, | |
| description=description | |
| ) as iface: | |
| iface.launch() | |