Spaces:
Runtime error
Runtime error
| # https://huggingface.co/spaces/azsalihu/AbstractSummary_To_Audio | |
| # Here are the imports | |
| import torch | |
| import PyPDF2 | |
| import gradio as gr | |
| from IPython.display import Audio, display | |
| from transformers import pipeline | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import numpy as np | |
| import scipy | |
| from gtts import gTTS | |
| from io import BytesIO | |
| # Extracting Text function | |
| def extract_text(article): | |
| pdfReader = PyPDF2.PdfReader(article) | |
| pageObj = pdfReader.pages[0] | |
| return pageObj.extract_text() | |
| # Summarization Function | |
| def summarize_abstract(text): | |
| sentences = text.split(". ") | |
| for i, sentence in enumerate(sentences): | |
| if "Abstract" in sentence: | |
| start = i + 1 | |
| end = start + 6 | |
| break | |
| abstract = ". ".join(sentences[start:end+1]) | |
| tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary") | |
| # Tokenize abstract | |
| inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True) | |
| # Generate summary | |
| summary_ids = model.generate(inputs['input_ids'], max_length=50, min_length=30, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, do_sample=True,early_stopping=False) | |
| summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| if '.' in summary: | |
| index = summary.rindex('.') | |
| if index != -1: | |
| summary = summary[:index+1] | |
| return summary | |
| # Abstract to Audio Fuction | |
| def abstract_to_audio(text): | |
| tts = gTTS(text, lang='en') | |
| buffer = BytesIO() | |
| tts.write_to_fp(buffer) | |
| buffer.seek(0) | |
| return buffer.read() | |
| # Combining Extracting text, Summarization, Abstract to Audio functions | |
| def abstract_audio(article): | |
| text = extract_text(article) | |
| summary = summarize_abstract(text) | |
| audio = abstract_to_audio(summary) | |
| return summary, audio | |
| inputs = gr.File() | |
| summary_text = gr.Text() | |
| audio_summary = gr.Audio() | |
| # Building Gradio Interface | |
| myApp = gr.Interface( fn= abstract_audio, inputs=gr.File(), | |
| outputs=[gr.Text(),gr.Audio()], title="Summary of Abstract to Audio ", description="An App that helps you summarises the abstract of an Article\Journal and gives the audio of the summary", examples=["NIPS-2015-hidden-technical-debt-in-machine-learning-systems-Paper.pdf"] | |
| ) | |
| myApp.launch() |