Spaces:

JetBrains-Research
/

commit-labeling

Sleeping

commit-labeling / app.py

Petr Tsvetkov

Add examples to the survey guide; fix the verbosity question 50 make "5" a positive grade

2065aab over 1 year ago

11 kB

	import os
	import random
	import uuid
	from datetime import datetime
	from itertools import chain

	import gradio as gr

	from data_loader import load_data

	HF_TOKEN = os.environ.get('HF_TOKEN')
	HF_DATASET = os.environ.get('HF_DATASET')
	N_QUESTIONS = 5

	data, models = load_data()

	n_samples = len(data)
	n_models = len(models)

	saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True)


	def convert_diff_to_unified(diff):
	result = "\n".join(
	[
	f'--- {modified_file["old_path"]}\n'
	f'+++ {modified_file["new_path"]}\n'
	f'{modified_file["diff"]}'
	for modified_file in diff
	]
	)

	return result


	def get_diff2html_view(raw_diff):
	html = f"""
	<div style='width:100%; height:1400px; overflow:auto; position: relative'>
	<div id='diff-raw' hidden>{raw_diff}</div>
	<div class="d2h-view-wrapper">
	<div id='diff-view'></div>
	</div>
	</div>
	"""

	return html


	def get_github_link_md(repo, hash):
	return f'[See the commit on Github](https://github.com/{repo}/commit/{hash})'


	def update_commit_view(sample_ind):
	if sample_ind >= n_samples:
	return None

	record = data[sample_ind]

	diff_view = get_diff2html_view(convert_diff_to_unified(record['mods']))

	repo_val = record['repo']
	hash_val = record['hash']
	github_link_md = get_github_link_md(repo_val, hash_val)

	diff_loaded_timestamp = datetime.now().isoformat()

	models_shuffled = models[:]
	random.shuffle(models_shuffled)

	commit_messages = tuple(record[model] for model in models_shuffled)

	return (
	github_link_md, diff_view, repo_val, hash_val, diff_loaded_timestamp,
	n_forms_submitted) + commit_messages + tuple(
	models_shuffled)


	def reset_answers():
	return (None,) * (N_QUESTIONS * n_models)


	def reset_submit_buttons():
	return tuple(gr.Button(value="Submit", interactive=True) for _ in range(n_models))


	def reset_continue_button():
	return gr.Button(value=f"0/{n_models} forms submitted", interactive=False)


	def next_sample(current_sample_ind, shuffled_idx):
	if current_sample_ind == n_samples:
	return None

	current_sample_ind += 1
	updated_view = update_commit_view(shuffled_idx[current_sample_ind])
	return (current_sample_ind,) + updated_view + reset_answers() + reset_submit_buttons() + (reset_continue_button(),)


	with open("head.html") as head_file:
	head_html = head_file.read()

	with gr.Blocks(theme=gr.themes.Soft(), head=head_html, css="style_overrides.css") as application:
	repo_val = gr.Textbox(interactive=False, label='repo', visible=False)
	hash_val = gr.Textbox(interactive=False, label='hash', visible=False)
	shuffled_idx_val = gr.JSON(visible=False)

	with gr.Row():
	with gr.Accordion("Help"):
	with open("survey_guide.md") as content_file:
	gr.Markdown(content_file.read())

	with gr.Row():
	current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
	value=0,
	interactive=False,
	label='sample_ind',
	info=f"Samples labeled/skipped (out of {n_samples})",
	show_label=False,
	container=False,
	scale=5)
	with gr.Column(scale=1):
	skip_btn = gr.Button("Skip the current sample")
	with gr.Row():
	with gr.Column(scale=2):
	github_link = gr.Markdown()
	diff_view = gr.HTML()
	with gr.Column(scale=1):
	# commit_msg = []
	# is_correct = []
	# # has_what = []
	# # has_why = []
	# is_not_verbose = []
	# # has_headline = []
	# easy_to_read = []
	# overall_rating = []
	# comments = []
	# model_name = []

	commit_msgs = []
	questions = []
	model_names = []
	submit_buttons = []

	SCALE = list(range(1, 6))

	for model_ind in range(n_models):
	with gr.Tab(f"Variant #{model_ind + 1}"):
	commit_msgs.append(gr.TextArea(label="Commit message (can be scrollable)",
	interactive=False,
	))
	gr.Markdown("## Please, rate your level of agreement with each statement\n"
	"\n"
	"1 - strongly disagree, 2 - disagree, 3 - not sure, 4 - agree, 5 - strongly agree")

	model_questions = []

	model_questions.append(gr.Radio(
	info='The information provided in the commit message is consistent with the code changes.',
	label=f'is_correct',
	show_label=False,
	choices=SCALE,
	interactive=True))

	# model_questions.append(gr.Radio(
	# info='The commit message answers the question of WHAT changes have been made.',
	# label=f'has_what',
	# show_label=False,
	# choices=SCALE,
	# interactive=True))
	#
	# model_questions.append(gr.Radio(
	# info='The commit message answers the question of WHY these changes have been made.',
	# label=f'has_why',
	# show_label=False,
	# choices=SCALE,
	# interactive=True))

	model_questions.append(gr.Radio(
	info='The commit message cannot be substantially shortened without loss of important '
	'information.',
	label=f'is_not_verbose',
	show_label=False,
	choices=SCALE,
	interactive=True))

	# model_questions.append(gr.Radio(
	# info='The commit message includes a short headline that provides a good overview of the '
	# 'changes.',
	# label=f'has_headline',
	# show_label=False,
	# choices=SCALE,
	# interactive=True))

	model_questions.append(gr.Radio(
	info='The commit message is easy to read and to understand.',
	label=f'easy_to_read',
	show_label=False,
	choices=SCALE,
	interactive=True))

	model_questions.append(gr.Radio(
	info='Please, describe your overall impression of the commit message (1 - very bad, 5 - very '
	'good)',
	label=f'overall_rating',
	show_label=False,
	choices=SCALE,
	interactive=True))

	model_questions.append(gr.Textbox(
	info='Additional comments on the commit message',
	label=f'comments',
	show_label=False,
	interactive=True))

	assert len(model_questions) == N_QUESTIONS

	questions.append(model_questions)
	model_names.append(gr.Textbox(interactive=False, label=f'model', visible=False))

	submit_buttons.append(gr.Button(value="Submit"))

	n_forms_submitted = gr.Number(visible=False, value=0, precision=0)
	continue_btn = reset_continue_button()

	session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
	label='session')

	with gr.Row(visible=False):
	sample_loaded_timestamp = gr.Textbox(info="Sample loaded", label='loaded_ts', interactive=False,
	container=True, show_label=False)
	sample_submitted_timestamp = gr.Textbox(info="Current time",
	interactive=False, container=True, show_label=False,
	value=lambda: datetime.now().isoformat(), every=1.0,
	label='submitted_ts')

	commit_view = [
	github_link,
	diff_view,
	repo_val,
	hash_val,
	sample_loaded_timestamp,
	n_forms_submitted,
	*commit_msgs,
	*model_names,
	]

	feedback_metadata = [
	session_val,
	repo_val,
	hash_val,
	sample_loaded_timestamp,
	sample_submitted_timestamp
	]

	saver.setup([current_sample_sld] + feedback_metadata + questions[0] + [model_names[0], ], "feedback")

	questions_list = list(chain.from_iterable(questions))

	skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
	outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn])

	continue_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
	outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn])


	def submit_for_model(current_sample, n_forms_submitted_val, *args):
	saver.flag((current_sample,) + args)
	n_forms_submitted_val += 1
	all_forms_submitted = n_forms_submitted_val == n_models
	return (gr.Button(value="Submitted", interactive=False),
	n_forms_submitted_val,
	gr.Button(
	"Next sample" if all_forms_submitted else f"{n_forms_submitted_val}/{n_models} forms submitted",
	interactive=all_forms_submitted))


	for model_ind in range(n_models):
	submit_buttons[model_ind].click(
	submit_for_model,
	inputs=[current_sample_sld, n_forms_submitted] + feedback_metadata + questions[model_ind] + [
	model_names[model_ind], ],
	outputs=[submit_buttons[model_ind], n_forms_submitted, continue_btn]
	)


	def init_session(current_sample):
	session = str(uuid.uuid4())
	shuffled_idx = list(range(n_samples))
	random.shuffle(shuffled_idx)
	return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample])


	application.load(init_session,
	inputs=[current_sample_sld],
	outputs=[session_val, shuffled_idx_val] + commit_view, )

	application.launch()