test_space

Runtime error

test_space / app.py

Kyuho Heo

spacerank

e74285c 6 months ago

10.8 kB

	import gradio as gr

	from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler
	from huggingface_hub import snapshot_download
	from src.data_utils import get_dataframe_category, get_dataframe_language
	import src.config as configs
	from utils import get_profile, get_organizations, get_profile_and_organizations, download_with_restart


	from src.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	EVALUATION_QUEUE_TEXT_OPTION1,
	EVALUATION_QUEUE_TEXT_OPTION2,
	EVALUATION_QUEUE_TEXT_OPTION3,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	BENCHMARK_COLS,
	COLS,
	EVAL_COLS,
	EVAL_TYPES,
	AutoEvalColumn,
	fields,
	WeightType,
	Precision
	)
	from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
	from src.populate import get_evaluation_queue_df, get_leaderboard_df
	from src.submission.submit import add_new_eval_option1, add_new_eval_option2


	from handlers import (
	search_leaderboard,
	update_modelselector_group,
	update_columnselector_group,
	update_leaderboard,
	get_models_by_group,
	)
	from ui import create_leaderboard_tab
	from constants import TAB_KEYS, TAB_NAMES, VLLM_VERSIONS

	def restart_space():
	API.restart_space(repo_id=REPO_ID)

	### Space initialisation
	download_with_restart(
	snapshot_download,
	repo_id=QUEUE_REPO,
	local_dir=EVAL_REQUESTS_PATH,
	repo_type="dataset",
	token=TOKEN,
	restart_func=restart_space
	)
	download_with_restart(
	snapshot_download,
	repo_id=RESULTS_REPO,
	local_dir=EVAL_RESULTS_PATH,
	repo_type="dataset",
	token=TOKEN,
	restart_func=restart_space
	)

	(
	finished_eval_queue_df,
	running_eval_queue_df,
	pending_eval_queue_df,
	) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)

	demo = gr.Blocks(css=custom_css)
	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
	user_state = gr.State()
	organization_state = gr.State()
	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	for _, key in enumerate(TAB_KEYS):
	if key == "Category":
	df = get_dataframe_category()
	column_selector_value = configs.ON_LOAD_COLUMNS_CATEGORY[3:]
	else:
	df = get_dataframe_language()
	column_selector_value = configs.ON_LOAD_COLUMNS_LANG[3:]
	create_leaderboard_tab(
	df,
	key,
	search_leaderboard,
	update_modelselector_group,
	update_leaderboard,
	column_selector_value
	)
	with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
	with gr.Column():
	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT_OPTION1, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown("## ✉️✨ Submit your model here! (if vLLM inference is available)", elem_classes="markdown-text")

	with gr.Row():
	with gr.Column():
	benchmark_type = gr.Dropdown(
	choices=["TRUEBench v0.1"],
	label="The name of the benchmark to be evaluated",
	multiselect=False,
	value="TRUEBench v0.1",
	interactive=True,
	)
	model_name_textbox = gr.Textbox(label="Model name")
	revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
	precision = gr.Dropdown(
	choices=[i.value.name for i in Precision if i != Precision.Unknown],
	label="Precision",
	multiselect=False,
	value="float16",
	interactive=True,
	)
	base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
	vllm_version_type = gr.Dropdown(
	choices=VLLM_VERSIONS,
	label="vLLM version",
	multiselect=False,
	value="v0.9.2",
	interactive=True,
	)
	with gr.Column():
	temperature_textbox = gr.Textbox(label="Sampling Temperature (default: 1.0)", placeholder="1.0")
	top_p_textbox = gr.Textbox(label="Top-p (default: 1.0)", placeholder="1.0")
	top_k_textbox = gr.Textbox(label="Top-k (default: -1)", placeholder="-1")
	presence_penalty_textbox = gr.Textbox(label="Presence penalty (default: 0.0)", placeholder="0.0")
	frequency_penalty_textbox = gr.Textbox(label="Frequency penalty (default: 0.0)", placeholder="0.0")
	repetition_penalty_textbox = gr.Textbox(label="Repetition penalty (default: 1.0)", placeholder="1.0")

	login_button = gr.LoginButton()
	submit_button = gr.Button("Submit Eval")
	submission_result = gr.Markdown()
	event = submit_button.click(get_profile_and_organizations, inputs=[], outputs=[user_state, organization_state])
	event.then(
	add_new_eval_option1,
	[
	benchmark_type,
	model_name_textbox,
	base_model_name_textbox,
	revision_name_textbox,
	precision,
	temperature_textbox,
	top_p_textbox,
	top_k_textbox,
	presence_penalty_textbox,
	frequency_penalty_textbox,
	repetition_penalty_textbox,
	vllm_version_type,
	user_state,
	organization_state
	],
	submission_result,
	)
	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT_OPTION2, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown("## ✉️✨ Submit your model here! (if vLLM inference is unavailable)", elem_classes="markdown-text")

	with gr.Row():
	with gr.Column():
	benchmark_type2 = gr.Dropdown(
	choices=["TRUEBench v0.1"],
	label="The name of the benchmark to be evaluated",
	multiselect=False,
	value="TRUEBench v0.1",
	interactive=True,
	)
	model_name_textbox2 = gr.Textbox(label="Model name")
	revision_name_textbox2 = gr.Textbox(label="Revision commit", placeholder="main")
	precision2 = gr.Dropdown(
	choices=[i.value.name for i in Precision if i != Precision.Unknown],
	label="Precision",
	multiselect=False,
	value="float16",
	interactive=True,
	)
	base_model_name_textbox2 = gr.Textbox(label="Base model (for delta or adapter weights)")

	with gr.Column():
	temperature_textbox2 = gr.Textbox(label="Sampling Temperature (default: 1.0)", placeholder="1.0")
	top_p_textbox2 = gr.Textbox(label="Top-p (default: 1.0)", placeholder="1.0")
	top_k_textbox2 = gr.Textbox(label="Top-k (default: -1)", placeholder="-1")
	presence_penalty_textbox2 = gr.Textbox(label="Presence penalty (default: 0.0)", placeholder="0.0")
	frequency_penalty_textbox2 = gr.Textbox(label="Frequency penalty (default: 0.0)", placeholder="0.0")
	repetition_penalty_textbox2 = gr.Textbox(label="Repetition penalty (default: 1.0)", placeholder="1.0")

	with gr.Row():
	with gr.Column():
	model_load_code_snippet_textbox = gr.Textbox(label="Code for model loading", lines=15, placeholder="model = AutoModel.from_pretrained('your model name', revision=revision)")
	with gr.Column():
	inference_code_snippet_textbox = gr.Textbox(label="Code for inference", lines=15, placeholder="output = model(...)")
	with gr.Column():
	terminate_code_snippet_textbox = gr.Textbox(label="Code for termination", lines=15)

	login_button2 = gr.LoginButton()

	submit_button2 = gr.Button("Submit Eval")
	submission_result2 = gr.Markdown()
	event2 = submit_button2.click(get_profile_and_organizations, inputs=[], outputs=[user_state, organization_state])
	event2.then(
	add_new_eval_option2,
	[
	benchmark_type2,
	model_name_textbox2,
	base_model_name_textbox2,
	revision_name_textbox2,
	precision2,
	temperature_textbox2,
	top_p_textbox2,
	top_k_textbox2,
	presence_penalty_textbox2,
	frequency_penalty_textbox2,
	repetition_penalty_textbox2,
	model_load_code_snippet_textbox,
	inference_code_snippet_textbox,
	terminate_code_snippet_textbox,
	user_state,
	organization_state
	],
	submission_result2,
	)

	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT_OPTION3, elem_classes="markdown-text")

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=20,
	elem_id="citation-button",
	show_copy_button=True,
	)


	scheduler = BackgroundScheduler()
	scheduler.add_job(restart_space, "interval", seconds=1800)
	scheduler.start()
	demo.queue(default_concurrency_limit=40).launch()