Spaces:
Runtime error
Runtime error
| # 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset. | |
| # It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering. | |
| # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline | |
| # import gradio as grad | |
| # import ast | |
| # mdl_name = "deepset/roberta-base-squad2" | |
| # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name) | |
| # def answer_question(question,context): | |
| # text= "{"+"'question': '"+question+"','context': '"+context+"'}" | |
| # di=ast.literal_eval(text) | |
| # response = my_pipeline(di) | |
| # return response | |
| # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch() | |
| #--------------------------------------------------------------------------------- | |
| # 2. Same task, different model. | |
| # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline | |
| # import gradio as grad | |
| # import ast | |
| # mdl_name = "distilbert-base-cased-distilled-squad" | |
| # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name) | |
| # def answer_question(question,context): | |
| # text= "{"+"'question': '"+question+"','context': '"+context+"'}" | |
| # di=ast.literal_eval(text) | |
| # response = my_pipeline(di) | |
| # return response | |
| # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch() | |
| #--------------------------------------------------------------------------------- | |
| # 3. Different task: language translation. | |
| # from transformers import pipeline | |
| # import gradio as grad | |
| # First model translates English to German. | |
| # mdl_name = "Helsinki-NLP/opus-mt-en-de" | |
| # opus_translator = pipeline("translation", model=mdl_name) | |
| # def translate(text): | |
| # response = opus_translator(text) | |
| # return response | |
| # grad.Interface(translate, inputs=["text",], outputs="text").launch() | |
| #---------------------------------------------------------------------------------- | |
| # 4. Language translation without pipeline API. | |
| # Second model translates English to French. | |
| # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| # import gradio as grad | |
| # mdl_name = "Helsinki-NLP/opus-mt-en-fr" | |
| # mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name) | |
| # my_tkn = AutoTokenizer.from_pretrained(mdl_name) | |
| # def translate(text): | |
| # inputs = my_tkn(text, return_tensors="pt") | |
| # trans_output = mdl.generate(**inputs) | |
| # response = my_tkn.decode(trans_output[0], skip_special_tokens=True) | |
| # return response | |
| # txt = grad.Textbox(lines=1, label="English", placeholder="English Text here") | |
| # out = grad.Textbox(lines=1, label="French") | |
| # grad.Interface(translate, inputs=txt, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 5. Different task: abstractive summarization | |
| # Abstractive summarization is more difficult than extractive summarization, | |
| # which pulls key sentences from a document and combines them to form a “summary.” | |
| # Because abstractive summarization involves paraphrasing words, it is also more time-consuming; | |
| # however, it has the potential to produce a more polished and coherent summary. | |
| # from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
| # import gradio as grad | |
| # mdl_name = "google/pegasus-xsum" | |
| # pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name) | |
| # mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name) | |
| # def summarize(text): | |
| # tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt") | |
| # txt_summary = mdl.generate(**tokens) | |
| # response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True) | |
| # return response | |
| # txt = grad.Textbox(lines=10, label="English", placeholder="English Text here") | |
| # out = grad.Textbox(lines=10, label="Summary") | |
| # grad.Interface(summarize, inputs=txt, outputs=out).launch() | |
| #------------------------------------------------------------------------------------------ | |
| # 6. Same model with some tuning with some parameters: num_return_sequences=5, max_length=200, temperature=1.5, num_beams=10 | |
| # from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
| # import gradio as grad | |
| # mdl_name = "google/pegasus-xsum" | |
| # pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name) | |
| # mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name) | |
| # def summarize(text): | |
| # tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt") | |
| # translated_txt = mdl.generate(**tokens, num_return_sequences=5, max_length=200, temperature=1.5, num_beams=10) | |
| # response = pegasus_tkn.batch_decode(translated_txt, skip_special_tokens=True) | |
| # return response | |
| # txt = grad.Textbox(lines=10, label="English", placeholder="English Text here") | |
| # out = grad.Textbox(lines=10, label="Summary") | |
| # grad.Interface(summarize, inputs=txt, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 7. Zero-Shot Learning: | |
| # Zero-shot learning, as the name implies, is to use a pretrained model , trained on a certain set of data, | |
| # on a different set of data, which it has not seen during training. This would mean, as an example, to take | |
| # some model from huggingface that is trained on a certain dataset and use it for inference on examples it has never seen before. | |
| # The transformers are where the zero-shot classification implementations are most frequently found by us. | |
| # There are more than 60 transformer models that function based on the zero-shot classification that are found in the huggingface library. | |
| # When we discuss zero-shot text classification , there is one additional thing that springs to mind. | |
| # In the same vein as zero-shot classification is few-shot classification, which is very similar to zero-shot classification. | |
| # However, in contrast with zero-shot classification, few-shot classification makes use of very few labeled samples during the training process. | |
| # The implementation of the few-shot classification methods can be found in OpenAI, where the GPT3 classifier is a well-known example of a few-shot classifier. | |
| # Deploying the following code works but comes with a warning: "No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli). | |
| # Using a pipeline without specifying a model name and revision in production is not recommended." | |
| # from transformers import pipeline | |
| # import gradio as grad | |
| # zero_shot_classifier = pipeline("zero-shot-classification") | |
| # def classify(text,labels): | |
| # classifer_labels = labels.split(",") | |
| # #["software", "politics", "love", "movies", "emergency", "advertisment","sports"] | |
| # response = zero_shot_classifier(text,classifer_labels) | |
| # return response | |
| # txt=grad.Textbox(lines=1, label="English", placeholder="text to be classified") | |
| # labels=grad.Textbox(lines=1, label="Labels", placeholder="comma separated labels") | |
| # out=grad.Textbox(lines=1, label="Classification") | |
| # grad.Interface(classify, inputs=[txt,labels], outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 8. Text Generation Task/Models with GPT2 model | |
| # The earliest text generation models were based on Markov chains . Markov chains are like a state machine wherein | |
| # using only the previous state, the next state is predicted. This is similar also to what we studied in bigrams. | |
| # Post the Markov chains, recurrent neural networks (RNNs) , which were capable of retaining a greater context of the text, were introduced. | |
| # They are based on neural network architectures that are recurrent in nature. RNNs are able to retain a greater context of the text that was introduced. | |
| # Nevertheless, the amount of information that these kinds of networks are able to remember is constrained, and it is also difficult to train them, | |
| # which means that they are not effective at generating lengthy texts. To counter this issue with RNNs, LSTM architectures were evolved, | |
| # which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models | |
| # used for generating text as an example. | |
| # from transformers import GPT2LMHeadModel,GPT2Tokenizer | |
| # import gradio as grad | |
| # mdl = GPT2LMHeadModel.from_pretrained('gpt2') | |
| # gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2') | |
| # def generate(starting_text): | |
| # tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt') | |
| # # When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the | |
| # # alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same | |
| # # as before if we use the same parameters. | |
| # # The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to | |
| # # choose, it picks the one that has the highest probability. | |
| # # The do_sample parameter select the next word at random from the probability distribution. | |
| # # The temperature parameter controls the level of greed that the generative model exhibits. | |
| # # If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low. | |
| # # As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation. | |
| # # If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability. | |
| # # The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and | |
| # # contain grammatical errors. | |
| # # With less temperature (1.5 --> 0.1), the output becomes less variational. | |
| # gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1) | |
| # response="" | |
| # #response = gpt2_tensors | |
| # for i, x in enumerate(gpt2_tensors): | |
| # response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text | |
| # return gpt2_tensors, response | |
| # txt=grad.Textbox(lines=1, label="English", placeholder="English Text here") | |
| # out_tensors=grad.Textbox(lines=1, label="Generated Tensors") | |
| # out_text=grad.Textbox(lines=1, label="Generated Text") | |
| # grad.Interface(generate, inputs=txt, outputs=[out_tensors, out_text]).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 9. Text Generation: different model "distilgpt2" | |
| # from transformers import pipeline, set_seed | |
| # import gradio as grad | |
| # gpt2_pipe = pipeline('text-generation', model='distilgpt2') | |
| # set_seed(42) | |
| # def generate(starting_text): | |
| # response= gpt2_pipe(starting_text, max_length=20, num_return_sequences=5) | |
| # return response | |
| # txt=grad.Textbox(lines=1, label="English", placeholder="English Text here") | |
| # out=grad.Textbox(lines=1, label="Generated Text") | |
| # grad.Interface(generate, inputs=txt, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 10. Text-to-Text Generation using the T5 model - Task 1 generates a question given some context. | |
| # A transformer-based architecture that takes a text-to-text approach is referred to as T5, which stands for Text-to-Text Transfer Transformer. | |
| # In the text-to-text approach, we take a task like Q&A, classification, summarization, code generation, etc. and turn it into a problem, | |
| # which provides the model with some form of input and then teaches it to generate some form of target text. This makes it possible to apply | |
| # the same model, loss function, hyperparameters, and other settings to all of our varied sets of responsibilities. | |
| # from transformers import AutoModelWithLMHead, AutoTokenizer | |
| # import gradio as grad | |
| # text2text_tkn = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") | |
| # mdl = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") | |
| # def text2text(context,answer): | |
| # input_text = "answer: %s context: %s </s>" % (answer, context) | |
| # features = text2text_tkn ([input_text], return_tensors='pt') | |
| # output = mdl.generate(input_ids=features['input_ids'], | |
| # attention_mask=features['attention_mask'], | |
| # max_length=64) | |
| # response=text2text_tkn.decode(output[0]) | |
| # return response | |
| # context=grad.Textbox(lines=10, label="English", placeholder="Context") | |
| # ans=grad.Textbox(lines=1, label="Answer") | |
| # out=grad.Textbox(lines=1, label="Generated Question") | |
| # grad.Interface(text2text, inputs=[context,ans], outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 11. Text-to-Text Generation using the T5 model - Task 2 summarizes a paragraph of text. | |
| # from transformers import AutoTokenizer, AutoModelWithLMHead | |
| # import gradio as grad | |
| # text2text_tkn = AutoTokenizer.from_pretrained("deep-learning-analytics/wikihow-t5-small") | |
| # mdl = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/wikihow-t5-small") | |
| # def text2text_summary(para): | |
| # initial_txt = para.strip().replace("\n","") | |
| # tkn_text = text2text_tkn.encode(initial_txt, return_tensors="pt") | |
| # tkn_ids = mdl.generate( | |
| # tkn_text, | |
| # max_length=250, | |
| # num_beams=5, | |
| # repetition_penalty=2.5, | |
| # early_stopping=True | |
| # ) | |
| # response = text2text_tkn.decode(tkn_ids[0], skip_special_tokens=True) | |
| # return response | |
| # para=grad.Textbox(lines=10, label="Paragraph", placeholder="Copy paragraph") | |
| # out=grad.Textbox(lines=1, label="Summary") | |
| # grad.Interface(text2text_summary, inputs=para, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 12. Text-to-Text Generation using the T5 model - Task 3 Translation. | |
| # from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| # import gradio as grad | |
| # text2text_tkn= T5Tokenizer.from_pretrained("t5-small") | |
| # mdl = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| # def text2text_translation(text): | |
| # # English to German | |
| # # inp = "translate English to German:: "+text | |
| # # English to Frendh | |
| # inp = "translate English to French:: " +text | |
| # enc = text2text_tkn(inp, return_tensors="pt") | |
| # tokens = mdl.generate(**enc) | |
| # response=text2text_tkn.batch_decode(tokens) | |
| # return response | |
| # para=grad.Textbox(lines=1, label="English Text", placeholder="Text in English") | |
| # out=grad.Textbox(lines=1, label="French Translation") | |
| # grad.Interface(text2text_translation, inputs=para, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 13. Text-to-Text Generation using the T5 model - Task 4 sentiment analysis. | |
| # from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| # import gradio as grad | |
| # text2text_tkn= T5Tokenizer.from_pretrained("t5-small") | |
| # mdl = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| # def text2text_sentiment(text): | |
| # inp = "sst2 sentence: "+text | |
| # enc = text2text_tkn(inp, return_tensors="pt") | |
| # tokens = mdl.generate(**enc) | |
| # response=text2text_tkn.batch_decode(tokens) | |
| # return response | |
| # para=grad.Textbox(lines=1, label="English Text", placeholder="Text in English") | |
| # out=grad.Textbox(lines=1, label="Sentiment") | |
| # grad.Interface(text2text_sentiment, inputs=para, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 14. Text-to-Text Generation using the T5 model - Task 5 grammar check - this doesn't work great unfortunately. | |
| # from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| # import gradio as grad | |
| # text2text_tkn= T5Tokenizer.from_pretrained("t5-small") | |
| # mdl = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| # def text2text_acceptable_sentence(text): | |
| # inp = "cola sentence: "+text | |
| # enc = text2text_tkn(inp, return_tensors="pt") | |
| # tokens = mdl.generate(**enc) | |
| # response=text2text_tkn.batch_decode(tokens) | |
| # return response | |
| # para=grad.Textbox(lines=1, label="English Text", placeholder="Text in English") | |
| # out=grad.Textbox(lines=1, label="Whether the sentence is acceptable or not") | |
| # grad.Interface(text2text_acceptable_sentence, inputs=para, outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 15. Text-to-Text Generation using the T5 model - Task 6 sentence paraphasing | |
| # from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| # import gradio as grad | |
| # text2text_tkn= T5Tokenizer.from_pretrained("t5-small") | |
| # mdl = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| # def text2text_paraphrase(sentence1,sentence2): | |
| # inp1 = "mrpc sentence1: "+sentence1 | |
| # inp2 = "sentence2: "+sentence2 | |
| # combined_inp=inp1+" "+inp2 | |
| # enc = text2text_tkn(combined_inp, return_tensors="pt") | |
| # tokens = mdl.generate(**enc) | |
| # response=text2text_tkn.batch_decode(tokens) | |
| # return response | |
| # sent1=grad.Textbox(lines=1, label="Sentence1", placeholder="Text in English") | |
| # sent2=grad.Textbox(lines=1, label="Sentence2", placeholder="Text in English") | |
| # out=grad.Textbox(lines=1, label="Whether the sentence is acceptable or not") | |
| # grad.Interface(text2text_paraphrase, inputs=[sent1,sent2], outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 16. Text-to-Text Generation using the T5 model - Task 7 check whether a statement deduced from a text is correct or not. | |
| # from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| # import gradio as grad | |
| # text2text_tkn= T5Tokenizer.from_pretrained("t5-small") | |
| # mdl = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| # def text2text_deductible(sentence1,sentence2): | |
| # inp1 = "rte sentence1: "+sentence1 | |
| # inp2 = "sentence2: "+sentence2 | |
| # combined_inp=inp1+" "+inp2 | |
| # enc = text2text_tkn(combined_inp, return_tensors="pt") | |
| # tokens = mdl.generate(**enc) | |
| # response=text2text_tkn.batch_decode(tokens) | |
| # return response | |
| # sent1=grad.Textbox(lines=1, label="Sentence1", placeholder="Text in English") | |
| # sent2=grad.Textbox(lines=1, label="Sentence2", placeholder="Text in English") | |
| # out=grad.Textbox(lines=1, label="Whether sentence2 is deductible from sentence1") | |
| # grad.Interface(text2text_deductible, inputs=[sent1,sent2], outputs=out).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 17. Chatbot/Dialog Bot: a simple bot named Alicia that is based on the Microsoft DialoGPT model . | |
| # from transformers import AutoModelForCausalLM, AutoTokenizer,BlenderbotForConditionalGeneration | |
| # import torch | |
| # chat_tkn = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium") | |
| # mdl = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium") | |
| # #chat_tkn = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill") | |
| # #mdl = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill") | |
| # def converse(user_input, chat_history=[]): | |
| # user_input_ids = chat_tkn(user_input + chat_tkn.eos_token, return_tensors='pt').input_ids | |
| # # keep history in the tensor | |
| # bot_input_ids = torch.cat([torch.LongTensor(chat_history), user_input_ids], dim=-1) | |
| # # get response | |
| # chat_history = mdl.generate(bot_input_ids, max_length=1000, pad_token_id=chat_tkn.eos_token_id).tolist() | |
| # print (chat_history) | |
| # response = chat_tkn.decode(chat_history[0]).split("<|endoftext|>") | |
| # print("starting to print response") | |
| # print(response) | |
| # # html for display | |
| # html = "<div class='mybot'>" | |
| # for x, mesg in enumerate(response): | |
| # if x%2!=0 : | |
| # mesg="Alicia:"+mesg | |
| # clazz="alicia" | |
| # else : | |
| # clazz="user" | |
| # print("value of x") | |
| # print(x) | |
| # print("message") | |
| # print (mesg) | |
| # html += "<div class='mesg {}'> {}</div>".format(clazz, mesg) | |
| # html += "</div>" | |
| # print(html) | |
| # return html, chat_history | |
| # import gradio as grad | |
| # css = """ | |
| # .mychat {display:flex;flex-direction:column} | |
| # .mesg {padding:5px;margin-bottom:5px;border-radius:5px;width:75%} | |
| # .mesg.user {background-color:lightblue;color:white} | |
| # .mesg.alicia {background-color:orange;color:white,align-self:self-end} | |
| # .footer {display:none !important} | |
| # """ | |
| # text=grad.Textbox(placeholder="Lets chat") | |
| # grad.Interface(fn=converse, | |
| # theme="default", | |
| # inputs=[text, "state"], | |
| # outputs=["html", "state"], | |
| # css=css).launch() | |
| #----------------------------------------------------------------------------------- | |
| # 18. Code and Code Comment Generation | |
| # CodeGen is a language model that converts basic English prompts into code that can be executed. | |
| # Instead of writing code yourself, you describe what the code should do using natural language, and | |
| # the machine writes the code for you based on what you’ve described. | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import gradio as grad | |
| codegen_tkn = AutoTokenizer.from_pretrained("Salesforce/codegen-350M-mono") | |
| mdl = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-350M-mono") | |
| def codegen(intent): | |
| # give input as text which reflects intent of the program. | |
| #text = " write a function which takes 2 numbers as input and returns the larger of the two" | |
| input_ids = codegen_tkn(intent, return_tensors="pt").input_ids | |
| gen_ids = mdl.generate(input_ids, max_length=128) | |
| response = codegen_tkn.decode(gen_ids[0], skip_special_tokens=True) | |
| return response | |
| output=grad.Textbox(lines=1, label="Generated Python Code", placeholder="") | |
| inp=grad.Textbox(lines=1, label="Place your intent here") | |
| grad.Interface(codegen, inputs=inp, outputs=output).launch() |