Spaces:

SaulLu
/

bloom-generations-viewer

Runtime error

App Files Files Community

bloom-generations-viewer / app.py

SaulLu

chose by checkpoint

51628e8 over 3 years ago

raw

history blame contribute delete

2.8 kB

	import streamlit as st
	from datasets import load_dataset

	st.set_page_config(
	page_icon="🧊",
	layout="wide",
	)

	st.write(
	"This is an application for viewing different generations for the same prompt. The generations vary depending on the checkpoint used and also the parameters used for the generation."
	)

	HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
	PROMPT_COLOR = "#CA437E"


	def safe_text(text):
	text = text.replace("\n", "<br>")
	return f"<pre>{text}</pre>"


	def prompt_markup_format(text):
	return f'<font color="black">{text}</font>'


	def generation_markup_format(text):
	return f"<font color={PROMPT_COLOR}>{text}</pre></font>"


	ds = load_dataset("bigscience/bloom-generations", use_auth_token=HF_API_TOKEN)
	ds = ds["train"]


	col_1, col_2 = st.columns(2)
	with col_1:
	possible_checkpoint = ds.unique("checkpoint")
	st.markdown("<h1 style='text-align: center'>Prompt</h1>", unsafe_allow_html=True)
	chosen_checkpoint = st.selectbox("Choose a checkpoint", possible_checkpoint + ["all"])
	if chosen_checkpoint == "all":
	ds_ckp = ds
	else:
	ds_ckp = ds.filter(
	lambda exs: [lang == chosen_checkpoint for lang in exs["checkpoint"]], batched=True
	)
	possible_langs = ds.unique("lang")
	chosen_lang = st.selectbox("Choose a lang", possible_langs + ["all"])
	if chosen_lang == "all":
	ds_lang = ds_ckp
	else:
	ds_lang = ds_ckp.filter(
	lambda exs: [lang == chosen_lang for lang in exs["lang"]], batched=True
	)

	possible_prompts = ds_lang.unique("prompt")
	chosen_prompt = st.selectbox("Choose a prompt", possible_prompts)
	st.markdown(safe_text(chosen_prompt), unsafe_allow_html=True)

	sub_ds = ds_lang.filter(
	lambda exs: [prompt == chosen_prompt for prompt in exs["prompt"]], batched=True
	)


	with col_2:
	st.markdown(
	"<h1 style='text-align: center'>Generation</h1>", unsafe_allow_html=True
	)
	index_sample = st.number_input(
	"Index of the chosen generation",
	min_value=0,
	max_value=len(sub_ds) - 1,
	value=0,
	step=1,
	)

	sample = sub_ds[index_sample]
	generation = sample["generation"]
	stop_index_sample = st.number_input(
	"Stop generation at character number",
	min_value=0,
	max_value=len(generation),
	value=len(generation),
	step=1,
	)
	markdown_text = generation_markup_format(safe_text(generation[:stop_index_sample]))
	st.markdown(markdown_text, unsafe_allow_html=True)
	st.markdown(
	"<h2 style='text-align: center'>Generation configuration</h2>",
	unsafe_allow_html=True,
	)
	config = {
	key: value
	for key, value in sample.items()
	if key not in ["prompt", "generation"]
	}
	config