Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import re | |
| from gradio.mix import Parallel | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForSeq2SeqLM, | |
| ) | |
| #define function for text cleaning | |
| def clean_text(text): | |
| text = text.encode("ascii", errors="ignore").decode( | |
| "ascii" | |
| ) # remove non-ascii, Chinese characters | |
| text = re.sub(r"http\S+", "", text) | |
| text = re.sub(r"ADVERTISEMENT", " ", text) | |
| text = re.sub(r"\n", " ", text) | |
| text = re.sub(r"\n\n", " ", text) | |
| text = re.sub(r"\t", " ", text) | |
| text = text.strip(" ") | |
| text = re.sub( | |
| " +", " ", text | |
| ).strip() # get rid of multiple spaces and replace with a single | |
| return text | |
| # define function for headlines generator 1-3 | |
| modchoice_1 = "chinhon/pegasus-large-commentaries_hd" | |
| def commentaries_headline1(text): | |
| input_text = clean_text(text) | |
| tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1) | |
| model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1) | |
| with tokenizer_1.as_target_tokenizer(): | |
| batch = tokenizer_1( | |
| input_text, truncation=True, padding="longest", return_tensors="pt" | |
| ) | |
| translated = model_1.generate(**batch) | |
| summary_1 = tokenizer_1.batch_decode(translated, skip_special_tokens=True) | |
| return summary_1[0] | |
| headline1 = gr.Interface( | |
| fn=commentaries_headline1, | |
| inputs=gr.inputs.Textbox(), | |
| outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-large"), | |
| ) | |
| modchoice_2 = "chinhon/pegasus-multi_news-commentaries_hdwriter" | |
| def commentaries_headline2(text): | |
| input_text = clean_text(text) | |
| tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2) | |
| model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2) | |
| with tokenizer_2.as_target_tokenizer(): | |
| batch = tokenizer_2( | |
| input_text, truncation=True, padding="longest", return_tensors="pt" | |
| ) | |
| translated = model_2.generate(**batch) | |
| summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True) | |
| return summary_2[0] | |
| headline2 = gr.Interface( | |
| fn=commentaries_headline2, | |
| inputs=gr.inputs.Textbox(), | |
| outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-multi_news"), | |
| ) | |
| modchoice_3 = "chinhon/bart-large-commentaries_hdwriter" | |
| def commentaries_headline3(text): | |
| input_text = clean_text(text) | |
| tokenizer_3 = AutoTokenizer.from_pretrained(modchoice_3) | |
| model_3 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_3) | |
| with tokenizer_3.as_target_tokenizer(): | |
| batch = tokenizer_3( | |
| input_text, truncation=True, padding="longest", return_tensors="pt" | |
| ) | |
| translated = model_3.generate(**batch) | |
| summary_3 = tokenizer_3.batch_decode( | |
| translated, skip_special_tokens=True, max_length=100 | |
| ) | |
| return summary_3[0] | |
| headline3 = gr.Interface( | |
| fn=commentaries_headline3, | |
| inputs=gr.inputs.Textbox(), | |
| outputs=gr.outputs.Textbox(label=" | Model: Fine tuned bart-large"), | |
| ) | |
| #define Gradio interface for 3 parallel apps | |
| Parallel( | |
| headline1, | |
| headline2, | |
| headline3, | |
| title="Commentaries Headlines Generator", | |
| inputs=gr.inputs.Textbox( | |
| lines=20, | |
| label="Paste parts of your commentary here, and choose from 3 suggested headlines", | |
| ), | |
| theme="huggingface", | |
| ).launch(enable_queue=True) | |