Spaces:
Runtime error
Runtime error
| import json | |
| from typing import List, Dict, Tuple, Union, Any | |
| import streamlit as st | |
| from annotated_text import annotated_text | |
| sidebar = st.sidebar | |
| def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]: | |
| gold_set = set(belief_state_example['gold'].split(' ')) | |
| input_set = set(" ".join(belief_state_example['input']).split(' ')) | |
| generated = belief_state_example['generated'] | |
| result = [] | |
| for word in generated.split(' '): | |
| if word in gold_set: | |
| result.append((word, 'gold', '#dfd')) # gold overlap => label green | |
| elif word in input_set: | |
| result.append((word, 'in', '#eea')) # input overlap => label yellow | |
| else: | |
| result.append(word + ' ') # no overlap => no label (replace space) | |
| return result | |
| # load in data | |
| pptod_examples: List[Dict] = [] | |
| models: Dict[str, Dict[str, Any]] = { | |
| 'pptod-small': { | |
| 'name': 'pptod-small', | |
| 'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, ' | |
| 'it is familiar with the meaning of these special separator tokens. However, it does not have' | |
| 'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it ' | |
| 'is unaware of the particular slot name conventions of MultiWoZ.', | |
| 'output_file': './output/pptod-small-10-percent.jsonl' | |
| }, | |
| 't5-small': { | |
| 'name': 't5-small', | |
| 'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens' | |
| 'that the model could not be familiar with.', | |
| 'output_file': './output/t5-small-10-percent.jsonl' | |
| }, | |
| 'bart': { | |
| 'name': 'bart', | |
| 'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens' | |
| 'that the model could not be familiar with.', | |
| 'output_file': './output/bart-100ish-examples.jsonl' | |
| }, | |
| 'dialogpt': { | |
| 'name': 'dialogpt', | |
| 'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains', | |
| 'output_file': './output/dialogpt-100ish-examples.jsonl' | |
| }, | |
| 'my-t5-pptod-checkpoint': { | |
| 'name': 'my-t5-pptod-checkpoint', | |
| 'description': 'My re-implementation of the pptod approach, in re-structured code', | |
| 'output_file': './output/my-pre-train-t5-100ish-examples.jsonl' | |
| }, | |
| 'my-t5-fine-tune-checkpoint': { | |
| 'name': 'my-t5-fine-tune-checkpoint', | |
| 'description': 'My re-implementation of the pptod approach, in re-structured code, fine-tuned on MultiWoZ', | |
| 'output_file': './output/my-fine-tune-t5-100ish-examples.jsonl' | |
| } | |
| } | |
| for model_def in models.values(): | |
| model_def['examples'] = [] | |
| with open(model_def['output_file'], 'r') as f: | |
| for line in f.readlines(): | |
| model_def['examples'].append(json.loads(line.strip())) | |
| model_names = list(models.keys()) | |
| model_name = sidebar.selectbox('Model', model_names) | |
| active_model = models[model_name] | |
| st.write(f""" | |
| #### Inputs | |
| **Selected Model:** `{active_model['name']}` | |
| {active_model['description']} | |
| """) | |
| """ | |
| ### Belief State Prediction | |
| Below is the predicted belief state as a sequence. | |
| - `input` denotes the input, which has been transformed into a list for | |
| human readability but is presented to the model as a sequence. | |
| - `gold` is the target belief state in sequence form (slot-name slot-value pairs) | |
| - `generated` is the model generated belief state sequence | |
| """ | |
| titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])] | |
| title = sidebar.selectbox("Development Example", titles) | |
| active_example = active_model['examples'][int(title[0])][0] | |
| active_belief_spans = active_example['bspn_input'].split("> <") | |
| active_example_bs = {'input': | |
| [ ('<' if i > 0 else '') + | |
| string + | |
| ('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '') | |
| for i, string in enumerate(active_belief_spans)], | |
| 'generated': active_example['bspn_gen'], | |
| 'gold': active_example['bspn']} | |
| st.write(active_example_bs) | |
| """ | |
| ##### Generated Overlap | |
| """ | |
| annotated_text(*bs_unigram_match_annotated_text(active_example_bs)) | |
| """ | |
| --- | |
| ### Response Generation | |
| Below is the predicted response as a sequence. | |
| - `input` denotes the input, which has been transformed into a list for | |
| human readability but is presented to the model as a sequence. | |
| - `gold` is the target response sequence | |
| - `generated` is the model generated response | |
| """ | |
| #title = st.selectbox("Development Example", titles) | |
| active_example_resp = {'input': | |
| [ ('<' if i > 0 else '') + | |
| string + | |
| ('>' if string[-1] is not '>' else '') | |
| for i, string in enumerate(active_example['resp_input'].split("> <"))], | |
| 'generated': active_example['resp_gen'], | |
| 'gold': active_example['resp']} | |
| st.write(active_example_resp) | |
| """ | |
| ##### Generated Overlap | |
| """ | |
| annotated_text(*bs_unigram_match_annotated_text(active_example_resp)) | |