Spaces:
Runtime error
Runtime error
Brendan King
commited on
Commit
·
ce78cc4
1
Parent(s):
07aa55e
Initial run: example explorer in huggingface
Browse files- app.py +127 -0
- output/bart-100ish-examples.jsonl +0 -0
- output/dialogpt-100ish-examples.jsonl +0 -0
- output/pptod-small-10-percent.jsonl +0 -0
- output/t5-small-10-percent.jsonl +0 -0
app.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from typing import List, Dict, Tuple, Union, Any
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from annotated_text import annotated_text
|
| 6 |
+
sidebar = st.sidebar
|
| 7 |
+
def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
|
| 8 |
+
gold_set = set(belief_state_example['gold'].split(' '))
|
| 9 |
+
input_set = set(" ".join(belief_state_example['input']).split(' '))
|
| 10 |
+
generated = belief_state_example['generated']
|
| 11 |
+
result = []
|
| 12 |
+
for word in generated.split(' '):
|
| 13 |
+
if word in gold_set:
|
| 14 |
+
result.append((word, 'gold', '#dfd')) # gold overlap => label green
|
| 15 |
+
elif word in input_set:
|
| 16 |
+
result.append((word, 'in', '#eea')) # input overlap => label yellow
|
| 17 |
+
else:
|
| 18 |
+
result.append(word + ' ') # no overlap => no label (replace space)
|
| 19 |
+
return result
|
| 20 |
+
|
| 21 |
+
# load in data
|
| 22 |
+
pptod_examples: List[Dict] = []
|
| 23 |
+
|
| 24 |
+
models: Dict[str, Dict[str, Any]] = {
|
| 25 |
+
'pptod-small': {
|
| 26 |
+
'name': 'pptod-small',
|
| 27 |
+
'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, '
|
| 28 |
+
'it is familiar with the meaning of these special separator tokens. However, it does not have'
|
| 29 |
+
'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
|
| 30 |
+
'is unaware of the particular slot name conventions of MultiWoZ.',
|
| 31 |
+
'output_file': './output/pptod-small-10-percent.jsonl'
|
| 32 |
+
},
|
| 33 |
+
't5-small': {
|
| 34 |
+
'name': 't5-small',
|
| 35 |
+
'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
|
| 36 |
+
'that the model could not be familiar with.',
|
| 37 |
+
'output_file': './output/t5-small-10-percent.jsonl'
|
| 38 |
+
},
|
| 39 |
+
'bart': {
|
| 40 |
+
'name': 'bart',
|
| 41 |
+
'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
|
| 42 |
+
'that the model could not be familiar with.',
|
| 43 |
+
'output_file': './output/bart-100ish-examples.jsonl'
|
| 44 |
+
},
|
| 45 |
+
'dialogpt': {
|
| 46 |
+
'name': 'dialogpt',
|
| 47 |
+
'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
|
| 48 |
+
'output_file': './output/dialogpt-100ish-examples.jsonl'
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
for model_def in models.values():
|
| 52 |
+
model_def['examples'] = []
|
| 53 |
+
with open(model_def['output_file'], 'r') as f:
|
| 54 |
+
for line in f.readlines():
|
| 55 |
+
model_def['examples'].append(json.loads(line.strip()))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
model_names = list(models.keys())
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
model_name = sidebar.selectbox('Model', model_names)
|
| 62 |
+
active_model = models[model_name]
|
| 63 |
+
|
| 64 |
+
st.write(f"""
|
| 65 |
+
#### Inputs
|
| 66 |
+
|
| 67 |
+
**Selected Model:** `{active_model['name']}`
|
| 68 |
+
|
| 69 |
+
{active_model['description']}
|
| 70 |
+
|
| 71 |
+
""")
|
| 72 |
+
"""
|
| 73 |
+
### Belief State Prediction
|
| 74 |
+
|
| 75 |
+
Below is the predicted belief state as a sequence.
|
| 76 |
+
|
| 77 |
+
- `input` denotes the input, which has been transformed into a list for
|
| 78 |
+
human readability but is presented to the model as a sequence.
|
| 79 |
+
- `gold` is the target belief state in sequence form (slot-name slot-value pairs)
|
| 80 |
+
- `generated` is the model generated belief state sequence
|
| 81 |
+
"""
|
| 82 |
+
titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
|
| 83 |
+
title = sidebar.selectbox("Development Example", titles)
|
| 84 |
+
active_example = active_model['examples'][int(title[0])][0]
|
| 85 |
+
|
| 86 |
+
active_belief_spans = active_example['bspn_input'].split("> <")
|
| 87 |
+
active_example_bs = {'input':
|
| 88 |
+
[ ('<' if i > 0 else '') +
|
| 89 |
+
string +
|
| 90 |
+
('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
|
| 91 |
+
for i, string in enumerate(active_belief_spans)],
|
| 92 |
+
'generated': active_example['bspn_gen'],
|
| 93 |
+
'gold': active_example['bspn']}
|
| 94 |
+
|
| 95 |
+
st.write(active_example_bs)
|
| 96 |
+
"""
|
| 97 |
+
##### Generated Overlap
|
| 98 |
+
"""
|
| 99 |
+
annotated_text(*bs_unigram_match_annotated_text(active_example_bs))
|
| 100 |
+
|
| 101 |
+
"""
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
### Response Generation
|
| 105 |
+
|
| 106 |
+
Below is the predicted response as a sequence.
|
| 107 |
+
|
| 108 |
+
- `input` denotes the input, which has been transformed into a list for
|
| 109 |
+
human readability but is presented to the model as a sequence.
|
| 110 |
+
- `gold` is the target response sequence
|
| 111 |
+
- `generated` is the model generated response
|
| 112 |
+
"""
|
| 113 |
+
#title = st.selectbox("Development Example", titles)
|
| 114 |
+
|
| 115 |
+
active_example_resp = {'input':
|
| 116 |
+
[ ('<' if i > 0 else '') +
|
| 117 |
+
string +
|
| 118 |
+
('>' if string[-1] is not '>' else '')
|
| 119 |
+
for i, string in enumerate(active_example['resp_input'].split("> <"))],
|
| 120 |
+
'generated': active_example['resp_gen'],
|
| 121 |
+
'gold': active_example['resp']}
|
| 122 |
+
|
| 123 |
+
st.write(active_example_resp)
|
| 124 |
+
"""
|
| 125 |
+
##### Generated Overlap
|
| 126 |
+
"""
|
| 127 |
+
annotated_text(*bs_unigram_match_annotated_text(active_example_resp))
|
output/bart-100ish-examples.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
output/dialogpt-100ish-examples.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
output/pptod-small-10-percent.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
output/t5-small-10-percent.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|