Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import lftk | |
| import spacy | |
| import time | |
| import os | |
| import openai | |
| import json | |
| # Load the Vicuna 7B model and tokenizer | |
| vicuna_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3") | |
| vicuna_model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3", load_in_4bit=True) | |
| # Load the LLaMA 7b model and tokenizer | |
| llama_tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") | |
| llama_model = AutoModelForCausalLM.from_pretrained("daryl149/llama-2-7b-chat-hf", load_in_4bit=True) | |
| os.environ['OPENAI_API_KEY'] | |
| openai.api_key = os.environ['OPENAI_API_KEY'] | |
| def linguistic_features_fn(message): | |
| # Load a trained spaCy pipeline | |
| nlp = spacy.load("en_core_web_sm") | |
| # Create a spaCy doc object | |
| doc = nlp(message) | |
| # Initiate LFTK extractor by passing in the doc | |
| LFTK = lftk.Extractor(docs=doc) | |
| # Customize LFTK extractor (optional) | |
| LFTK.customize(stop_words=True, punctuations=False, round_decimal=3) | |
| # Use LFTK to dynamically extract handcrafted linguistic features | |
| extracted_features = LFTK.extract(features = ["a_word_ps", "a_kup_pw", "n_noun"]) | |
| formatted_output = json.dumps(extracted_features, indent=2) | |
| print(formatted_output) | |
| return formatted_output | |
| def chat(user_prompt, model = 'gpt-3.5-turbo', temperature = 0, verbose = False): | |
| ''' Normal call of OpenAI API ''' | |
| response = openai.ChatCompletion.create( | |
| temperature = temperature, | |
| model = model, | |
| messages=[ | |
| {"role": "user", "content": user_prompt} | |
| ]) | |
| res = response['choices'][0]['message']['content'] | |
| if verbose: | |
| print('User prompt:', user_prompt) | |
| print('GPT response:', res) | |
| return res | |
| def format_chat_prompt(message, chat_history, max_convo_length): | |
| prompt = "" | |
| for turn in chat_history[-max_convo_length:]: | |
| user_message, bot_message = turn | |
| prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" | |
| prompt = f"{prompt}\nUser: {message}\nAssistant:" | |
| return prompt | |
| def gpt_respond(tab_name, message, chat_history, max_convo_length = 10): | |
| # if (have_key == "No"): | |
| # return "", chat_history | |
| formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) | |
| print('GPT ling ents Prompt + Context:') | |
| print(formatted_prompt) | |
| bot_message = chat(user_prompt = f'''Output any <{tab_name}> in the following sentence one per line: "{formatted_prompt}"''') | |
| chat_history.insert(0, (message, bot_message)) | |
| return "", chat_history | |
| def vicuna_respond(tab_name, message, chat_history): | |
| formatted_prompt = f'''Output any {tab_name} in the following sentence one per line: "{message}"''' | |
| print('Vicuna Ling Ents Fn - Prompt + Context:') | |
| print(formatted_prompt) | |
| input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
| output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
| bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| print(bot_message) | |
| # Remove formatted prompt from bot_message | |
| bot_message = bot_message.replace(formatted_prompt, '') | |
| print(bot_message) | |
| chat_history.insert(0, (formatted_prompt, bot_message)) | |
| time.sleep(2) | |
| return tab_name, "", chat_history | |
| def llama_respond(tab_name, message, chat_history): | |
| formatted_prompt = f'''Output any {tab_name} in the following sentence one per line: "{message}"''' | |
| # print('Llama - Prompt + Context:') | |
| # print(formatted_prompt) | |
| input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
| output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
| bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| # Remove formatted prompt from bot_message | |
| bot_message = bot_message.replace(formatted_prompt, '') | |
| # print(bot_message) | |
| chat_history.insert(0, (formatted_prompt, bot_message)) | |
| time.sleep(2) | |
| return tab_name, "", chat_history | |
| def gpt_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history, max_convo_length = 10): | |
| # if (have_key == "No"): | |
| # return "", chat_history | |
| formatted_system_prompt = "" | |
| if (task_name == "POS Tagging"): | |
| if (strategy == "S1"): | |
| formatted_system_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' | |
| elif (strategy == "S2"): | |
| formatted_system_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' | |
| elif (strategy == "S3"): | |
| with open('demonstration_3_42_pos.txt', 'r') as f: | |
| demon_pos = f.read() | |
| formatted_system_prompt = f'''"{demon_pos}". Using the POS tag structure above, POS tag the following sentence: "{message}"''' | |
| elif (task_name == "Chunking"): | |
| if (strategy == "S1"): | |
| formatted_system_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' | |
| elif (strategy == "S2"): | |
| formatted_system_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' | |
| elif (strategy == "S3"): | |
| with open('demonstration_3_42_chunk.txt', 'r') as f: | |
| demon_chunk = f.read() | |
| formatted_system_prompt = f'''"{demon_chunk}". Using the POS tag structure above, POS tag the following sentence: "{message}"''' | |
| formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) | |
| print('GPT coreNLP Prompt + Context:') | |
| print(formatted_prompt) | |
| bot_message = chat(user_prompt = formatted_system_prompt) | |
| chat_history.insert(0, (message, bot_message)) | |
| return "", chat_history | |
| def vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): | |
| formatted_prompt = "" | |
| if (task_name == "POS Tagging"): | |
| if (strategy == "S1"): | |
| formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' | |
| elif (strategy == "S2"): | |
| formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' | |
| elif (strategy == "S3"): | |
| with open('demonstration_3_42_pos.txt', 'r') as f: | |
| demon_pos = f.read() | |
| formatted_prompt = f'''"{demon_pos}". Using the POS tag structure above, POS tag the following sentence: "{message}"''' | |
| elif (task_name == "Chunking"): | |
| if (strategy == "S1"): | |
| formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' | |
| elif (strategy == "S2"): | |
| formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' | |
| elif (strategy == "S3"): | |
| with open('demonstration_3_42_chunk.txt', 'r') as f: | |
| demon_chunk = f.read() | |
| formatted_prompt = f'''"{demon_chunk}". Using the Chunking structure above, Chunk the following sentence: "{message}"''' | |
| print('Vicuna Strategy Fn - Prompt + Context:') | |
| print(formatted_prompt) | |
| input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
| output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
| bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| print(bot_message) | |
| # Remove formatted prompt from bot_message | |
| bot_message = bot_message.replace(formatted_prompt, '') | |
| print(bot_message) | |
| chat_history.insert(0, (formatted_prompt, bot_message)) | |
| time.sleep(2) | |
| return task_name, "", chat_history | |
| def llama_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): | |
| formatted_prompt = "" | |
| if (task_name == "POS Tagging"): | |
| if (strategy == "S1"): | |
| formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' | |
| elif (strategy == "S2"): | |
| formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' | |
| elif (strategy == "S3"): | |
| with open('demonstration_3_42_pos.txt', 'r') as f: | |
| demon_pos = f.read() | |
| formatted_prompt = f'''"{demon_pos}". Using the POS tag structure above, POS tag the following sentence: "{message}"''' | |
| elif (task_name == "Chunking"): | |
| if (strategy == "S1"): | |
| formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' | |
| elif (strategy == "S2"): | |
| formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' | |
| elif (strategy == "S3"): | |
| with open('demonstration_3_42_chunk.txt', 'r') as f: | |
| demon_chunk = f.read() | |
| formatted_prompt = f'''"{demon_chunk}". Using the Chunking structure above, Chunk the following sentence: "{message}"''' | |
| print('Llama Strategies - Prompt + Context:') | |
| print(formatted_prompt) | |
| input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
| output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
| bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| print(bot_message) | |
| # Remove formatted prompt from bot_message | |
| bot_message = bot_message.replace(formatted_prompt, '') | |
| print(bot_message) | |
| chat_history.insert(0, (formatted_prompt, bot_message)) | |
| time.sleep(2) | |
| return task_name, "", chat_history | |
| def interface(): | |
| with gr.Tab("Linguistic Entities"): | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ## 📜 Step-By-Step Instructions | |
| - Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5). | |
| - Select a Linguistic Entity from the Dropdown or enter a custom one. | |
| - Click 'Submit' to send your inputs to the models. | |
| - To submit a new prompt, repeat all the steps above and click 'Submit' again. Your new prompt should appear on the top of previous ones. | |
| ### ⏳ After you click 'Submit', the models will take a couple seconds to process your inputs. | |
| ### 🤖 Then, the models will output the linguistic entity found in your prompt based on your selection! | |
| """) | |
| gr.Markdown(""" | |
| ### 📊 Linguistic Complexity | |
| - We use existing tool, [LFTK](https://github.com/brucewlee/lftk?tab=readme-ov-file), to estimate the liguistic complexity of input sentences. | |
| - For more information regarding the meanings of each feature keyword, please reference their documentation [here](https://docs.google.com/spreadsheets/d/1uXtQ1ah0OL9cmHp2Hey0QcHb4bifJcQFLvYlVIAWWwQ/edit#gid=693915416). | |
| """) | |
| # Inputs | |
| ling_ents_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt here") | |
| # with gr.Row(): | |
| # # Will activate after getting API key | |
| # have_key2 = gr.Dropdown(["Yes", "No"], label="Do you own an API Key?", scale=0.5) | |
| # ling_ents_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your OpenAI key here", type="password") | |
| linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity", allow_custom_value=True, info="If your choice is not included in the options, please type your own.") | |
| ling_ents_btn = gr.Button(value="Submit") | |
| # Outputs | |
| user_prompt_1 = gr.Textbox(label="Original prompt") | |
| # Linguistic Complexities | |
| linguistic_features_textbox = gr.Textbox(label="Linguistic Complexity") | |
| gr.Markdown(" Definitions for the complexity indices can be found [here](https://docs.google.com/spreadsheets/d/1uXtQ1ah0OL9cmHp2Hey0QcHb4bifJcQFLvYlVIAWWwQ/edit#gid=693915416).") | |
| with gr.Row(): | |
| gpt_ling_ents_chatbot = gr.Chatbot(label="gpt-3.5") | |
| llama_ling_ents_chatbot = gr.Chatbot(label="llama-7b") | |
| vicuna_ling_ents_chatbot = gr.Chatbot(label="vicuna-7b") | |
| # clear = gr.ClearButton(components=[ling_ents_prompt, ling_ents_apikey_input, have_key2, linguistic_entities, | |
| # vicuna_ling_ents_chatbot, llama_ling_ents_chatbot, gpt_ling_ents_chatbot,]) | |
| # Event Handler for API Key | |
| # ling_ents_btn.click(update_api_key, inputs=ling_ents_apikey_input) | |
| def update_textbox(prompt): | |
| return prompt | |
| ling_ents_btn.click(fn=update_textbox, inputs=ling_ents_prompt, outputs=user_prompt_1, api_name="ling_ents_btn") | |
| # Show features from LFTK | |
| ling_ents_btn.click(linguistic_features_fn, inputs=[ling_ents_prompt], outputs=[linguistic_features_textbox]) | |
| # Event Handler for GPT 3.5 Chatbot | |
| ling_ents_btn.click(gpt_respond, inputs=[linguistic_entities, ling_ents_prompt, gpt_ling_ents_chatbot], | |
| outputs=[ling_ents_prompt, gpt_ling_ents_chatbot]) | |
| # Event Handler for LLaMA Chatbot | |
| ling_ents_btn.click(llama_respond, inputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot], | |
| outputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot]) | |
| # Event Handler for Vicuna Chatbot | |
| ling_ents_btn.click(vicuna_respond, inputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot], | |
| outputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot]) | |
| with gr.Tab("CoreNLP"): | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ## 📜 Step-By-Step Instructions | |
| - Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5). | |
| - Select a Task from the Dropdown. | |
| - Select a Linguistic Entity from the Dropdown or enter a custom one. | |
| - Click 'Submit' to send your inputs to the models. | |
| - To submit a new prompt, repeat all the steps above and click 'Submit' again. Your new prompt should appear on the top of previous ones. | |
| ### ⏳ After you click 'Submit', the models will take a couple seconds to process your inputs. | |
| ### 🤖 Then, the models will output the POS Tagging or Chunking in your prompt with three different strategies based on your selections! | |
| """) | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| ### 📊 Linguistic Complexity | |
| - We use existing tool, [LFTK](https://github.com/brucewlee/lftk?tab=readme-ov-file), to estimate the liguistic complexity of input sentences. | |
| - For more information regarding the meanings of each feature keyword, please reference their documentation [here](https://docs.google.com/spreadsheets/d/1uXtQ1ah0OL9cmHp2Hey0QcHb4bifJcQFLvYlVIAWWwQ/edit#gid=693915416). | |
| """) | |
| gr.Markdown(""" | |
| ### 🛠️ How each Strategy works | |
| - Strategy 1 - QA-Based Prompting | |
| - The model is prompted with a question-answer format. The input consists of a question, and the model generates a response based on the understanding of the question and its knowledge. | |
| - Strategy 2 - Instruction-Based Prompting | |
| - Involves providing the model with explicit instructions on how to generate a response. Instead of relying solely on context or previous knowledge, the instructions guide the model in generating content that aligns with specific criteria. | |
| - Strategy 3 - Structured Prompting | |
| - Involves presenting information to the model in a structured format, often with defined sections or categories. The model then generates responses following the given structure. | |
| """) | |
| # Inputs | |
| task_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt here") | |
| # with gr.Row(): | |
| # have_key = gr.Dropdown(["Yes", "No"], label="Do you own an API Key?", scale=0.5) | |
| # task_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your OpenAI key here", type="password", visible=True) | |
| task = gr.Dropdown(["POS Tagging", "Chunking"], label="Task") | |
| task_linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity For Strategy 1", allow_custom_value=True, info="If your choice is not included in the options, please type your own.") | |
| task_btn = gr.Button(value="Submit") | |
| # Outputs | |
| user_prompt_2 = gr.Textbox(label="Original prompt", ) | |
| # Linguistic Complexity | |
| linguistic_features_textbox_2 = gr.Textbox(label="Linguistic Complexity") | |
| gr.Markdown(" Definitions for the complexity indices can be found [here](https://docs.google.com/spreadsheets/d/1uXtQ1ah0OL9cmHp2Hey0QcHb4bifJcQFLvYlVIAWWwQ/edit#gid=693915416).") | |
| gr.Markdown("### Strategy 1 - QA-Based Prompting") | |
| strategy1 = gr.Markdown("S1", visible=False) | |
| with gr.Row(): | |
| gpt_S1_chatbot = gr.Chatbot(label="gpt-3.5") | |
| llama_S1_chatbot = gr.Chatbot(label="llama-7b") | |
| vicuna_S1_chatbot = gr.Chatbot(label="vicuna-7b") | |
| gr.Markdown("### Strategy 2 - Instruction-Based Prompting") | |
| strategy2 = gr.Markdown("S2", visible=False) | |
| with gr.Row(): | |
| gpt_S2_chatbot = gr.Chatbot(label="gpt-3.5") | |
| llama_S2_chatbot = gr.Chatbot(label="llama-7b") | |
| vicuna_S2_chatbot = gr.Chatbot(label="vicuna-7b") | |
| gr.Markdown("### Strategy 3 - Structured Prompting") | |
| strategy3 = gr.Markdown("S3", visible=False) | |
| with gr.Row(): | |
| gpt_S3_chatbot = gr.Chatbot(label="gpt-3.5") | |
| llama_S3_chatbot = gr.Chatbot(label="llama-7b") | |
| vicuna_S3_chatbot = gr.Chatbot(label="vicuna-7b") | |
| # clear_all = gr.ClearButton(components=[task_prompt, task_apikey_input, have_key, task, task_linguistic_entities, | |
| # vicuna_S1_chatbot, llama_S1_chatbot, gpt_S1_chatbot, | |
| # vicuna_S2_chatbot, llama_S2_chatbot, gpt_S2_chatbot, | |
| # vicuna_S3_chatbot, llama_S3_chatbot, gpt_S3_chatbot]) | |
| # Event Handler for API Key | |
| # task_btn.click(update_api_key, inputs=task_apikey_input) | |
| # Show user's original prompt | |
| def update_textbox(prompt): | |
| return prompt | |
| task_btn.click(fn=update_textbox, inputs=task_prompt, outputs=user_prompt_2, api_name="task_btn") | |
| # Show features from LFTK | |
| task_btn.click(linguistic_features_fn, inputs=[task_prompt], outputs=[linguistic_features_textbox_2]) | |
| # Event Handler for GPT 3.5 Chatbot POS/Chunk, user must submit api key before submitting the prompt | |
| # Will activate after getting API key | |
| # task_apikey_btn.click(update_api_key, inputs=ling_ents_apikey_input) | |
| task_btn.click(gpt_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, gpt_S1_chatbot], | |
| outputs=[task_prompt, gpt_S1_chatbot]) | |
| task_btn.click(gpt_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, gpt_S2_chatbot], | |
| outputs=[task_prompt, gpt_S2_chatbot]) | |
| task_btn.click(gpt_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, gpt_S3_chatbot], | |
| outputs=[task_prompt, gpt_S3_chatbot]) | |
| # Event Handler for LLaMA Chatbot POS/Chunk | |
| task_btn.click(llama_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, llama_S1_chatbot], | |
| outputs=[task, task_prompt, llama_S1_chatbot]) | |
| task_btn.click(llama_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, llama_S2_chatbot], | |
| outputs=[task, task_prompt, llama_S2_chatbot]) | |
| task_btn.click(llama_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, llama_S3_chatbot], | |
| outputs=[task, task_prompt, llama_S3_chatbot]) | |
| # Event Handlers for Vicuna Chatbot POS/Chunk | |
| task_btn.click(vicuna_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, vicuna_S1_chatbot], | |
| outputs=[task, task_prompt, vicuna_S1_chatbot]) | |
| task_btn.click(vicuna_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, vicuna_S2_chatbot], | |
| outputs=[task, task_prompt, vicuna_S2_chatbot]) | |
| task_btn.click(vicuna_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, vicuna_S3_chatbot], | |
| outputs=[task, task_prompt, vicuna_S3_chatbot]) | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # LingEval | |
| ## A Comparative Analysis of the Core Linguistic Knowledge in Large Language Models | |
| """) | |
| # load interface | |
| interface() | |
| demo.launch() | |