Spaces:
Sleeping
Sleeping
| from datetime import datetime | |
| import os | |
| import random | |
| import uuid | |
| import gradio as gr | |
| from datasets import load_dataset | |
| HF_TOKEN = os.environ.get('HF_TOKEN') | |
| HF_DATASET = os.environ.get('HF_DATASET') | |
| configuration = "commitchronicle-py-long" # select a configuration | |
| dataset = load_dataset("JetBrains-Research/lca-cmg", | |
| configuration, | |
| split="test", | |
| cache_dir="data") | |
| n_samples = len(dataset) | |
| saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True) | |
| def convert_diff_to_unified(diff): | |
| result = "\n".join( | |
| [ | |
| f'--- {modified_file["old_path"]}\n' | |
| f'+++ {modified_file["new_path"]}\n' | |
| f'{modified_file["diff"]}' | |
| for modified_file in diff | |
| ] | |
| ) | |
| return result | |
| def get_diff2html_view(raw_diff): | |
| html = f""" | |
| <div style='width:100%; height:720px; overflow:auto; position: relative'> | |
| <div id='diff-raw' hidden>{raw_diff}</div> | |
| <div class="d2h-view-wrapper"> | |
| <div id='diff-view'></div> | |
| </div> | |
| </div> | |
| """ | |
| return html | |
| def update_commit_view(sample_ind): | |
| if sample_ind >= n_samples: | |
| return None | |
| record = dataset[sample_ind] | |
| diff_view = get_diff2html_view(convert_diff_to_unified(record['mods'])) | |
| commit_msg = record['message'] | |
| repo_val = record['repo'] | |
| hash_val = record['hash'] | |
| diff_loaded_timestamp = datetime.now().isoformat() | |
| return diff_view, commit_msg, repo_val, hash_val, diff_loaded_timestamp | |
| def next_sample(current_sample_ind, shuffled_idx): | |
| if current_sample_ind == n_samples: | |
| return None | |
| current_sample_ind += 1 | |
| updated_view = update_commit_view(shuffled_idx[current_sample_ind]) | |
| return (current_sample_ind,) + updated_view | |
| with open("head.html") as head_file: | |
| head_html = head_file.read() | |
| with gr.Blocks(theme=gr.themes.Soft(), head=head_html, css="style_overrides.css") as application: | |
| repo_val = gr.Textbox(interactive=False, label='repo', visible=False) | |
| hash_val = gr.Textbox(interactive=False, label='hash', visible=False) | |
| shuffled_idx_val = gr.JSON(visible=False) | |
| with gr.Row(): | |
| current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1, | |
| value=0, | |
| interactive=False, | |
| label='sample_ind', | |
| info=f"Samples labeled/skipped (out of {n_samples})", | |
| show_label=False, | |
| container=False, | |
| scale=5) | |
| with gr.Column(scale=1): | |
| skip_btn = gr.Button("Skip the current sample") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| diff_view = gr.HTML() | |
| with gr.Column(scale=1): | |
| commit_msg = gr.Textbox(label="Commit message", | |
| interactive=False, | |
| ) | |
| gr.Markdown("## Please, rate your level of agreement with each statement\n" | |
| "\n" | |
| "*1 - strongly disagree, 2 - disagree, 3 - agree, 4 - strongly agree*") | |
| is_correct = gr.Slider( | |
| info='The information provided in the commit message is consistent with the code changes.', | |
| label='is_correct', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=4) | |
| has_what = gr.Slider( | |
| info='The commit message answers the question of WHAT changes have been made.', | |
| label='has_what', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=4) | |
| has_why = gr.Slider( | |
| info='The commit message answers the question of WHY these changes have been made.', | |
| label='has_why', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=4) | |
| is_not_verbose = gr.Slider( | |
| info='The commit message can be substantially shortened without loss of important information.', | |
| label='is_not_verbose', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=4) | |
| has_headline = gr.Slider( | |
| info='The commit message includes a short headline that provides a good overview of the changes.', | |
| label='has_headline', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=4) | |
| easy_to_read = gr.Slider( | |
| info='The commit message is easy to read and to understand.', | |
| label='easy_to_read', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=4) | |
| overall_rating = gr.Slider( | |
| info='Please, describe your overall impression of the commit message (1 - very bad, 5 - very good)', | |
| label='overall_rating', | |
| show_label=False, | |
| minimum=1, | |
| step=1, | |
| interactive=True, | |
| maximum=5) | |
| comments = gr.Textbox( | |
| info='Additional comments on the commit message', | |
| label='comments', | |
| show_label=False, | |
| interactive=True) | |
| submit_btn = gr.Button("Submit and continue") | |
| session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False, | |
| label='session') | |
| with gr.Row(visible=False): | |
| sample_loaded_timestamp = gr.Textbox(info="Sample loaded", label='loaded_ts', interactive=False, | |
| container=True, show_label=False) | |
| sample_submitted_timestamp = gr.Textbox(info="Current time", | |
| interactive=False, container=True, show_label=False, | |
| value=lambda: datetime.now().isoformat(), every=1.0, | |
| label='submitted_ts') | |
| commit_view = [ | |
| diff_view, | |
| commit_msg, | |
| repo_val, | |
| hash_val, | |
| sample_loaded_timestamp | |
| ] | |
| feedback_form = [ | |
| session_val, | |
| repo_val, | |
| hash_val, | |
| sample_loaded_timestamp, | |
| sample_submitted_timestamp, | |
| is_correct, | |
| has_what, | |
| has_why, | |
| is_not_verbose, | |
| has_headline, | |
| easy_to_read, | |
| overall_rating, | |
| comments | |
| ] | |
| saver.setup([current_sample_sld] + feedback_form, "feedback") | |
| skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], | |
| outputs=[current_sample_sld] + commit_view) | |
| def submit(current_sample, shuffled_idx, *args): | |
| saver.flag((current_sample,) + args) | |
| return next_sample(current_sample, shuffled_idx) | |
| submit_btn.click(submit, inputs=[current_sample_sld, shuffled_idx_val] + feedback_form, | |
| outputs=[current_sample_sld] + commit_view) | |
| def init_session(current_sample): | |
| session = str(uuid.uuid4()) | |
| shuffled_idx = list(range(n_samples)) | |
| random.shuffle(shuffled_idx) | |
| return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample]) | |
| application.load(init_session, | |
| inputs=[current_sample_sld], | |
| outputs=[session_val, shuffled_idx_val] + commit_view, ) | |
| application.launch() | |