Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline, PegasusForConditionalGeneration | |
| # Initialize the space | |
| summarizeryt = pipeline("summarization", model="facebook/bart-large-cnn") | |
| summarizerbg = pipeline("summarization") | |
| summarizertx = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") | |
| # How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc") | |
| def YTVideoToText(video_link): | |
| # installing & importing libraries | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| # fetching video transcript | |
| video_id = video_link.split("=")[1] | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| # iterating throughout and adding all text together | |
| result = "" | |
| for i in transcript: | |
| result += ' ' + i['text'] | |
| # summarize text | |
| num_iters = int(len(result)/1000) | |
| summarized_text = [] | |
| summarized_text2 = [] | |
| for i in range(0, num_iters + 1): | |
| start = 0 | |
| start = i * 1000 | |
| end = (i + 1) * 1000 | |
| out = summarizeryt(result[start:end], max_new_tokens=130, min_length=30, do_sample=False) | |
| out = out[0] | |
| out = out['summary_text'] | |
| summarized_text.append(out) | |
| summarized_text2 = ' '.join(summarized_text) | |
| # returning summary | |
| return [result, summarized_text2]; | |
| # How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/") | |
| def postSummaryWithBart(blog_link): | |
| # importing libraries | |
| from bs4 import BeautifulSoup | |
| import requests | |
| import re | |
| # getting our blog post | |
| URL = blog_link | |
| r = requests.get(URL) | |
| soup = BeautifulSoup(r.text, 'html.parser') | |
| results = soup.find_all(['h1', 'p']) | |
| text = [result.text for result in results] | |
| ARTICLE = ' '.join(text) | |
| # replacing punctuations with end-of-sentence tags | |
| ARTICLE = ARTICLE.replace('.', '.') | |
| ARTICLE = ARTICLE.replace('?', '?') | |
| ARTICLE = ARTICLE.replace('!', '!') | |
| sentences = re.split('[.?!]', ARTICLE) | |
| # chunking text | |
| max_chunk = 500 | |
| current_chunk = 0 | |
| chunks = [] | |
| for sentence in sentences: | |
| # checking if we have an empty chunk | |
| if len(chunks) == current_chunk + 1: | |
| if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: | |
| chunks[current_chunk].extend(sentence.split(' ')) | |
| else: | |
| current_chunk += 1 | |
| chunks.append(sentence.split(' ')) | |
| else: | |
| print(current_chunk) | |
| chunks.append(sentence.split(' ')) | |
| for chunk_id in range(len(chunks)): | |
| chunks[chunk_id] = ' '.join(chunks[chunk_id]) | |
| # summarizing text | |
| res = summarizerbg(chunks, max_new_tokens=1024, min_length=30, do_sample=False) | |
| text = ''.join([summ['summary_text'] for summ in res]) | |
| # returning summary | |
| return [sentences, text]; | |
| # How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""") | |
| def abstractiveSummaryWithPegasus(words): | |
| # importing & loading model | |
| from transformers import PegasusTokenizer | |
| tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") | |
| # perform summarization | |
| tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt") | |
| summary = summarizertx.generate(**tokens) | |
| actual_summ = tokenizer.decode(summary[0]) | |
| # returning summary | |
| return actual_summ | |
| # Main UI | |
| with gr.Blocks() as ui: | |
| gr.Markdown(""" | |
| ## Permet de faire le résumé d'une video youtube ou d'un article de blog | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| URI = gr.Textbox( | |
| label="URI à résumer", | |
| max_lines=1, | |
| placeholder="https://youtube|website.ext", | |
| api_name="uri" | |
| ) | |
| TRANSCRIPT = gr.Textbox( | |
| label="Transcript à résumer", | |
| lines=10, | |
| placeholder="https://youtube|website.ext", | |
| api_name="transcript" | |
| ) | |
| RESUME = gr.Textbox( | |
| label="Résumé", | |
| lines=10, | |
| interactive=False, | |
| placeholder="https://youtube|website.ext", | |
| api_name="resume" | |
| ) | |
| with gr.Column(): | |
| gr.Button("Process Youtube").click( | |
| fn=YTVideoToText, | |
| inputs=[URI], | |
| outputs=[TRANSCRIPT, RESUME], | |
| api_name="process_uri" | |
| ) | |
| gr.Button("Process HTML").click( | |
| fn=postSummaryWithBart, | |
| inputs=[URI], | |
| outputs=[TRANSCRIPT, RESUME], | |
| api_name="process_uri" | |
| ) | |
| gr.Button("Process TEXT").click( | |
| fn=abstractiveSummaryWithPegasus, | |
| inputs=[TRANSCRIPT], | |
| outputs=[RESUME], | |
| api_name="process_text" | |
| ) | |
| #translator_fr = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-fr-en") | |
| #summarizer = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-12-6") | |
| ui.launch() | |