Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import json | |
| import time | |
| import openai | |
| import pickle | |
| import argparse | |
| import requests | |
| from tqdm import tqdm | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer | |
| from fastchat.model import load_model, get_conversation_template, add_model_args | |
| from nltk.tag.mapping import _UNIVERSAL_TAGS | |
| import gradio as gr | |
| from transformers import pipeline | |
| uni_tags = list(_UNIVERSAL_TAGS) | |
| uni_tags[-1] = 'PUNC' | |
| bio_tags = ['B', 'I', 'O'] | |
| chunk_tags = ['ADJP', 'ADVP', 'CONJP', 'INTJ', 'LST', 'NP', 'O', 'PP', 'PRT', 'SBAR', 'UCP', 'VP'] | |
| syntags = ['NP', 'S', 'VP', 'ADJP', 'ADVP', 'SBAR', 'TOP', 'PP', 'POS', 'NAC', "''", 'SINV', 'PRN', 'QP', 'WHNP', 'RB', 'FRAG', | |
| 'WHADVP', 'NX', 'PRT', 'VBZ', 'VBP', 'MD', 'NN', 'WHPP', 'SQ', 'SBARQ', 'LST', 'INTJ', 'X', 'UCP', 'CONJP', 'NNP', 'CD', 'JJ', | |
| 'VBD', 'WHADJP', 'PRP', 'RRC', 'NNS', 'SYM', 'CC'] | |
| openai.api_key = " " | |
| # determinant vs. determiner | |
| # https://wikidiff.com/determiner/determinant | |
| ents_prompt = ['Noun','Verb','Adjective','Adverb','Preposition/Subord','Coordinating Conjunction',# 'Cardinal Number', | |
| 'Determiner', | |
| 'Noun Phrase','Verb Phrase','Adjective Phrase','Adverb Phrase','Preposition Phrase','Conjunction Phrase','Coordinate Phrase','Quantitave Phrase','Complex Nominal', | |
| 'Clause','Dependent Clause','Fragment Clause','T-unit','Complex T-unit',# 'Fragment T-unit', | |
| ][7:] | |
| ents = ['NN', 'VB', 'JJ', 'RB', 'IN', 'CC', 'DT', 'NP', 'VP', 'ADJP', 'ADVP', 'PP', 'CONJP', 'CP', 'QP', 'CN', 'C', 'DC', 'FC', 'T', 'CT'][7:] | |
| ents_prompt_uni_tags = ['Verb', 'Noun', 'Pronoun', 'Adjective', 'Adverb', 'Preposition and Postposition', 'Coordinating Conjunction', | |
| 'Determiner', 'Cardinal Number', 'Particles or other function words', | |
| 'Words that cannot be assigned a POS tag', 'Punctuation'] | |
| ents = uni_tags + ents | |
| ents_prompt = ents_prompt_uni_tags + ents_prompt | |
| for i, j in zip(ents, ents_prompt): | |
| print(i, j) | |
| model_mapping = { | |
| 'gpt3.5': 'gpt2', | |
| #'vicuna-7b': 'lmsys/vicuna-7b-v1.3', | |
| #'llama-7b': './llama/hf/7B', | |
| } | |
| with open('sample_uniform_1k_2.txt', 'r') as f: | |
| selected_idx = f.readlines() | |
| selected_idx = [int(i.strip()) for i in selected_idx]#[s:e] | |
| ptb = [] | |
| with open('ptb.jsonl', 'r') as f: | |
| for l in f: | |
| ptb.append(json.loads(l)) | |
| ## Prompt 1 | |
| template_all = '''Please output the <Noun, Verb, Adjective, Adverb, Preposition/Subord, Coordinating Conjunction, Cardinal Number, Determiner, Noun Phrase, Verb Phrase, Adjective Phrase, Adverb Phrase, Preposition Phrase, Conjunction Phrase, Coordinate Phrase, Quantitave Phrase, Complex Nominal, Clause, Dependent Clause, Fragment Clause, T-unit, Complex T-unit, Fragment T-unit> in the following sentence without any additional text in json format: "{}"''' | |
| template_single = '''Please output any <{}> in the following sentence one per line without any additional text: "{}"''' | |
| ## Prompt 2 | |
| prompt2_pos = '''Please pos tag the following sentence using Universal POS tag set without generating any additional text: {}''' | |
| prompt2_chunk = '''Please do sentence chunking for the following sentence as in CoNLL 2000 shared task without generating any addtional text: {}''' | |
| prompt2_parse = '''Generate textual representation of the constituency parse tree of the following sentence using Penn TreeBank tag set without outputing any additional text: {}''' | |
| prompt2_chunk = '''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {}''' | |
| ## Prompt 3 | |
| with open('demonstration_3_42_pos.txt', 'r') as f: | |
| demon_pos = f.read() | |
| with open('demonstration_3_42_chunk.txt', 'r') as f: | |
| demon_chunk = f.read() | |
| with open('demonstration_3_42_parse.txt', 'r') as f: | |
| demon_parse = f.read() | |
| # Your existing code | |
| theme = gr.themes.Soft() | |
| # issue get request for gpt 3.5 | |
| gpt_pipeline = pipeline(task="text2text-generation", model="gpt2") | |
| #vicuna7b_pipeline = pipeline(task="text2text-generation", model="lmsys/vicuna-7b-v1.3") | |
| #llama7b_pipeline = pipeline(task="text2text-generation", model="./llama/hf/7B") | |
| # Dropdown options for model and task | |
| model_options = list(model_mapping.keys()) | |
| task_options = ['POS', 'Chunking'] # remove parsing | |
| # Function to process text based on model and task | |
| def process_text(model_name, task, text): | |
| gid_list = selected_idx[0:20] | |
| for gid in tqdm(gid_list, desc='Query'): | |
| text = ptb[gid]['text'] | |
| if model_name == 'vicuna-7b': | |
| if task == 'POS': | |
| strategy1_format = template_all.format(text) | |
| strategy2_format = prompt2_pos.format(text) | |
| strategy3_format = demon_pos | |
| result1 = gpt_pipeline(strategy1_format)[0]['generated_text'] | |
| result2 = gpt_pipeline(strategy2_format)[0]['generated_text'] | |
| result3 = gpt_pipeline(strategy3_format)[0]['generated_text'] | |
| return (result1, result2, result3) | |
| elif task == 'Chunking': | |
| strategy1_format = template_all.format(text) | |
| strategy2_format = prompt2_chunk.format(text) | |
| strategy3_format = demon_chunk | |
| result1 = gpt_pipeline(strategy1_format)[0]['generated_text'] | |
| result2 = gpt_pipeline(strategy2_format)[0]['generated_text'] | |
| result3 = gpt_pipeline(strategy3_format)[0]['generated_text'] | |
| return (result1, result2, result3) | |
| # Gradio interface | |
| iface = gr.Interface( | |
| fn=process_text, | |
| inputs=[ | |
| gr.Dropdown(model_options, label="Select Model"), | |
| gr.Dropdown(task_options, label="Select Task"), | |
| gr.Textbox(label="Input Text", placeholder="Enter the text to process..."), | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Strategy 1 QA Result"), | |
| gr.Textbox(label="Strategy 2 Instruction Result"), | |
| gr.Textbox(label="Strategy 3 Structured Prompting Result"), | |
| ], | |
| title = "LLM Evaluator For Linguistic Scrutiny", | |
| theme = theme, | |
| live=False, | |
| ) | |
| iface.launch() | |