Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pdf_exctraction import read_pdf | |
| from transformers import pipeline | |
| # Extract the Abstract from the content of the document | |
| def extract_abstract(pdf_path): | |
| text_from_pdf = read_pdf(pdf_path) | |
| abstract_text = "" | |
| for page_content in text_from_pdf.values(): | |
| if "Abstract\n" in page_content[0]: | |
| index_of_abstract = page_content[0].index("Abstract\n") | |
| if index_of_abstract < len(page_content[0]) - 1: | |
| abstract_text = page_content[0][index_of_abstract + 1] | |
| if abstract_text == "": | |
| raise gr.Error("The article does not contains an Abstract or it is not in the expected format") | |
| return abstract_text | |
| # Summarized the abstract extracted from PDF | |
| def summarize_abstract(pdf_path): | |
| abstract = extract_abstract(pdf_path) | |
| abstract = abstract.replace("\n", " ").replace("- ", "").replace(" ", " ") | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| result = summarizer(abstract, max_length=50, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True) | |
| return result[0]['summary_text'] |