Spaces:

ebrowne
/

retrieval-study

Sleeping

App Files Files Community

elibrowne commited on Aug 25, 2024

Commit

8861533

1 Parent(s): df99dda

Persistence and data collection?

Browse files

Files changed (1) hide show

app.py +32 -35

app.py CHANGED Viewed

@@ -4,25 +4,9 @@ import os
 # PERSISTENT DATA STORAGE: this code is used to make commits
 import json
-from datetime import datetime
-from pathlib import Path
-from uuid import uuid4
-from huggingface_hub import CommitScheduler, hf_hub_download, file_exists, HfApi
 from random import shuffle
-JSON_DATASET_DIR = Path("json_dataset")
-JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
-JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json"
-scheduler = CommitScheduler(
-    repo_id="ebrowne/test-data",
-    repo_type="dataset",
-    folder_path=JSON_DATASET_DIR,
-    path_in_repo="data",
-    token = os.getenv("HF_TOKEN")
-)
 # Global variables which interact with loading and unloading
 user_data = {}
 current_response = {}
@@ -81,11 +65,11 @@ def update_huggingface(id):
         token = os.getenv("HF_TOKEN")
     )
-def reset_current_response():
     global current_response
     current_response = {
         "user_id": user_id,
-        "question_id": "QID",
         "user_answer": 0,
         "e5_scores": [], # list of ten [score, score, score, score]
         "e5_set": [], # two values
@@ -108,17 +92,11 @@ def load_current_question():
         print("Done")
         gr.Info("You've finished — thank you so much! There are no more questions. :)")
         current_question = {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
     else:
         qid = user_data["order"][q_index]
         current_question = all_questions[qid]
-# This method is being used to save each set of individual scores (in case the main files have issues, the data should be saved)
-def commit_current_and_reset():
-    with scheduler.lock:
-        with JSON_DATASET_PATH.open("a") as f:
-            json.dump(current_response, f)
-            f.write("\n")
-    reset_current_response()
 # THEMING: colors and styles (Gradio native)
@@ -189,9 +167,11 @@ with gr.Blocks(theme = theme) as user_eval:
         def next_p(e0, e1, e2, e3):
             global step
             global mode
             step += 1
-            print(e0)
-            print(e1 + e2 + e3)
             if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): # should always be 10
                 # Step 10: all sources
                 collapsible_string = ""
@@ -224,10 +204,13 @@ with gr.Blocks(theme = theme) as user_eval:
             global step
             global mode
             global user_data
             step += 1
-            print(e_h + e_s)
             if step == 11:
                 # Step 11: guaranteed to be generation
                 return {
                     selection: gr.HTML("""
                         <h2> Autogenerated Response </h2>
@@ -236,9 +219,11 @@ with gr.Blocks(theme = theme) as user_eval:
                     eval_satisfied: gr.Slider(value = 1)
                 }
             # Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
-            if step > 11 and not current_question["top10_contains_gold_passage"]:
                 # When mode is 0 -> reset with mode = 1
                 if mode == 0:
                     return {
                         selection: gr.HTML("""
                                 <h2> Retrieved Passage </h2>
@@ -249,6 +234,8 @@ with gr.Blocks(theme = theme) as user_eval:
                     }
                 # When mode is 1 -> display GP and GP generation, then switch
                 if step == 12:
                     return {
                         selection: gr.HTML("""
                             <h2> Retrieved Passage </h2>
@@ -258,6 +245,8 @@ with gr.Blocks(theme = theme) as user_eval:
                         eval_satisfied: gr.Slider(value = 1)
                     }
                 elif step == 13:
                    return {
                         selection: gr.HTML("""
                             <h2> Autogenerated Response </h2>
@@ -266,9 +255,11 @@ with gr.Blocks(theme = theme) as user_eval:
                         eval_helps: gr.Slider(value = 1),
                         eval_satisfied: gr.Slider(value = 1)
                    }
-                else:
                     user_data["current"] += 1
-                    # TODO save answers x2
                     update_huggingface(user_id) # persistence — update progress online, save answers
                     load_current_question()
                     return {
@@ -277,19 +268,24 @@ with gr.Blocks(theme = theme) as user_eval:
                         eval_helps: gr.Slider(value = 1),
                         eval_satisfied: gr.Slider(value = 1)
                     }
             else:
                 # When mode is 0 -> reset with mode = 1
                 if mode == 0:
                     return {
-                        selection: gr.HTML("""
                                 <h2> Retrieved Passage </h2>
-                                <p> """ + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
                         forward_btn: gr.Textbox("load new data"),
                         eval_helps: gr.Slider(value = 1),
                         eval_satisfied: gr.Slider(value = 1)
                     }
                 # When mode is 1 -> change question
                 user_data["current"] += 1
                 update_huggingface(user_id)
                 load_current_question()
                 return {
@@ -298,6 +294,7 @@ with gr.Blocks(theme = theme) as user_eval:
                     eval_helps: gr.Slider(value = 1),
                     eval_satisfied: gr.Slider(value = 1)
                 }
         btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
         btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])

 # PERSISTENT DATA STORAGE: this code is used to make commits
 import json
+from huggingface_hub import hf_hub_download, file_exists, HfApi
 from random import shuffle
 # Global variables which interact with loading and unloading
 user_data = {}
 current_response = {}
         token = os.getenv("HF_TOKEN")
     )
+def reset_current_response(qid):
     global current_response
     current_response = {
         "user_id": user_id,
+        "question_id": qid,
         "user_answer": 0,
         "e5_scores": [], # list of ten [score, score, score, score]
         "e5_set": [], # two values
         print("Done")
         gr.Info("You've finished — thank you so much! There are no more questions. :)")
         current_question = {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
+        reset_current_response("USER FINISHED")
     else:
         qid = user_data["order"][q_index]
         current_question = all_questions[qid]
+        reset_current_response(user_data["order"][q_index])
 # THEMING: colors and styles (Gradio native)
         def next_p(e0, e1, e2, e3):
             global step
             global mode
+            global current_response
             step += 1
+            # Add user data to the current response
+            current_response[user_data["modes"][user_data["current"]][mode] + "_scores"].append([e0, e1, e2, e3])
+            # Next item
             if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): # should always be 10
                 # Step 10: all sources
                 collapsible_string = ""
             global step
             global mode
             global user_data
+            global current_response
             step += 1
             if step == 11:
                 # Step 11: guaranteed to be generation
+                # Add user data to the current response as SET evaluation, which comes before the generation
+                current_response[user_data["modes"][user_data["current"]][mode] + "_set"] = [e_h, e_s]
                 return {
                     selection: gr.HTML("""
                         <h2> Autogenerated Response </h2>
                     eval_satisfied: gr.Slider(value = 1)
                 }
             # Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
+            if step > 11: # and not current_question["top10_contains_gold_passage"]
                 # When mode is 0 -> reset with mode = 1
                 if mode == 0:
+                    # The user just evaluated a generation for mode 0
+                    current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
                     return {
                         selection: gr.HTML("""
                                 <h2> Retrieved Passage </h2>
                     }
                 # When mode is 1 -> display GP and GP generation, then switch
                 if step == 12:
+                    # The user just evaluated a generation for mode 1
+                    current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
                     return {
                         selection: gr.HTML("""
                             <h2> Retrieved Passage </h2>
                         eval_satisfied: gr.Slider(value = 1)
                     }
                 elif step == 13:
+                   # The user just evaluated the gold passage
+                   current_response["gold_set"] = [e_h, e_s]
                    return {
                         selection: gr.HTML("""
                             <h2> Autogenerated Response </h2>
                         eval_helps: gr.Slider(value = 1),
                         eval_satisfied: gr.Slider(value = 1)
                    }
+                else: # step = 14
+                    # The user just evaluated the gold passage generation
+                    current_response["gold_generation"] = [e_h, e_s]
                     user_data["current"] += 1
+                    user_data["responses"].append(current_response) # adds new answers to current list of responses
                     update_huggingface(user_id) # persistence — update progress online, save answers
                     load_current_question()
                     return {
                         eval_helps: gr.Slider(value = 1),
                         eval_satisfied: gr.Slider(value = 1)
                     }
+            # VERY UNCLEAN CODE: for practical purposes, this else block is unreachable: not current_question["top10_contains_gold_passage"] will always be True
+            """
             else:
                 # When mode is 0 -> reset with mode = 1
                 if mode == 0:
                     return {
+                        selection: gr.HTML(\"""
                                 <h2> Retrieved Passage </h2>
+                                <p> \""" + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
                         forward_btn: gr.Textbox("load new data"),
                         eval_helps: gr.Slider(value = 1),
                         eval_satisfied: gr.Slider(value = 1)
                     }
                 # When mode is 1 -> change question
                 user_data["current"] += 1
+                user_data["responses"].append(current_response) # adds new answers to current list of responses
+                # Update stored data with new current, additional data
                 update_huggingface(user_id)
                 load_current_question()
                 return {
                     eval_helps: gr.Slider(value = 1),
                     eval_satisfied: gr.Slider(value = 1)
                 }
+                """
         btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
         btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])