Spaces:

Brendan
/

tod-example-explorer

Runtime error

Brendan King

adding my fine-tuned checkpoint

a859ad1 almost 4 years ago

5.29 kB

	import json
	from typing import List, Dict, Tuple, Union, Any

	import streamlit as st
	from annotated_text import annotated_text
	sidebar = st.sidebar
	def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
	gold_set = set(belief_state_example['gold'].split(' '))
	input_set = set(" ".join(belief_state_example['input']).split(' '))
	generated = belief_state_example['generated']
	result = []
	for word in generated.split(' '):
	if word in gold_set:
	result.append((word, 'gold', '#dfd')) # gold overlap => label green
	elif word in input_set:
	result.append((word, 'in', '#eea')) # input overlap => label yellow
	else:
	result.append(word + ' ') # no overlap => no label (replace space)
	return result

	# load in data
	pptod_examples: List[Dict] = []

	models: Dict[str, Dict[str, Any]] = {
	'pptod-small': {
	'name': 'pptod-small',
	'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset in this data format. As such, '
	'it is familiar with the meaning of these special separator tokens. However, it does not have'
	'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
	'is unaware of the particular slot name conventions of MultiWoZ.',
	'output_file': './output/pptod-small-10-percent.jsonl'
	},
	't5-small': {
	'name': 't5-small',
	'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
	'that the model could not be familiar with.',
	'output_file': './output/t5-small-10-percent.jsonl'
	},
	'bart': {
	'name': 'bart',
	'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
	'that the model could not be familiar with.',
	'output_file': './output/bart-100ish-examples.jsonl'
	},
	'dialogpt': {
	'name': 'dialogpt',
	'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
	'output_file': './output/dialogpt-100ish-examples.jsonl'
	},
	'my-t5-pptod-checkpoint': {
	'name': 'my-t5-pptod-checkpoint',
	'description': 'My re-implementation of the pptod approach, in re-structured code',
	'output_file': './output/my-pre-train-t5-100ish-examples.jsonl'
	},
	'my-t5-fine-tune-checkpoint': {
	'name': 'my-t5-fine-tune-checkpoint',
	'description': 'My re-implementation of the pptod approach, in re-structured code, fine-tuned on MultiWoZ',
	'output_file': './output/my-fine-tune-t5-100ish-examples.jsonl'
	}
	}
	for model_def in models.values():
	model_def['examples'] = []
	with open(model_def['output_file'], 'r') as f:
	for line in f.readlines():
	model_def['examples'].append(json.loads(line.strip()))


	model_names = list(models.keys())


	model_name = sidebar.selectbox('Model', model_names)
	active_model = models[model_name]

	st.write(f"""
	#### Inputs

	Selected Model: `{active_model['name']}`

	{active_model['description']}

	""")
	"""
	### Belief State Prediction

	Below is the predicted belief state as a sequence.

	- `input` denotes the input, which has been transformed into a list for
	human readability but is presented to the model as a sequence.
	- `gold` is the target belief state in sequence form (slot-name slot-value pairs)
	- `generated` is the model generated belief state sequence
	"""
	titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
	title = sidebar.selectbox("Development Example", titles)
	active_example = active_model['examples'][int(title[0])][0]

	active_belief_spans = active_example['bspn_input'].split("> <")
	active_example_bs = {'input':
	[ ('<' if i > 0 else '') +
	string +
	('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
	for i, string in enumerate(active_belief_spans)],
	'generated': active_example['bspn_gen'],
	'gold': active_example['bspn']}

	st.write(active_example_bs)
	"""
	##### Generated Overlap
	"""
	annotated_text(*bs_unigram_match_annotated_text(active_example_bs))

	"""
	---

	### Response Generation

	Below is the predicted response as a sequence.

	- `input` denotes the input, which has been transformed into a list for
	human readability but is presented to the model as a sequence.
	- `gold` is the target response sequence
	- `generated` is the model generated response
	"""
	#title = st.selectbox("Development Example", titles)

	active_example_resp = {'input':
	[ ('<' if i > 0 else '') +
	string +
	('>' if string[-1] is not '>' else '')
	for i, string in enumerate(active_example['resp_input'].split("> <"))],
	'generated': active_example['resp_gen'],
	'gold': active_example['resp']}

	st.write(active_example_resp)
	"""
	##### Generated Overlap
	"""
	annotated_text(*bs_unigram_match_annotated_text(active_example_resp))