Spaces:

BtB-ExpC
/

Exercises

Sleeping

App Files Files Community

BtB-ExpC commited on Feb 9, 2025

Commit

57a4fb4

1 Parent(s): 040f013

new double negation prompt + 10 responses instead of 5

Browse files

Files changed (2) hide show

app.py +14 -4
config/templates.py +12 -12

app.py CHANGED Viewed

@@ -95,7 +95,7 @@ async def run_diagnoser(user_query: str, chosen_model: str, exercise_format: str
     responses = await asyncio.gather(*tasks)
     # pad up to 5 if needed
-    all_responses = list(responses) + [""] * (5 - len(responses))
     # Return a tuple of exactly 5 responses.
     return tuple(all_responses)
@@ -133,9 +133,9 @@ with gr.Blocks() as interface:
                 interactive=True,
             )
             sampling_count = gr.Dropdown(
-                choices=["1", "2", "3", "4", "5"],
                 value="1",
-                label="Sampling Count 🚧",
                 interactive=True,
             )
         # Set up a change callback so that if the user selects "Claude 3.5", the exercise format updates to "XML"
@@ -163,6 +163,11 @@ with gr.Blocks() as interface:
                 diagnoser_response_3 = gr.Textbox(label="Response 3", interactive=False)
                 diagnoser_response_4 = gr.Textbox(label="Response 4", interactive=False)
                 diagnoser_response_5 = gr.Textbox(label="Response 5", interactive=False)
             with gr.TabItem("🤔 Generate distractors"):
                 # Insert an HTML info icon with a tooltip at the top of the tab content.
                 gr.HTML(
@@ -212,7 +217,12 @@ with gr.Blocks() as interface:
             diagnoser_response_2,
             diagnoser_response_3,
             diagnoser_response_4,
-            diagnoser_response_5
         ]
     )

     responses = await asyncio.gather(*tasks)
     # pad up to 5 if needed
+    all_responses = list(responses) + [""] * (10 - len(responses))
     # Return a tuple of exactly 5 responses.
     return tuple(all_responses)
                 interactive=True,
             )
             sampling_count = gr.Dropdown(
+                choices=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
                 value="1",
+                label="Sampling Count",
                 interactive=True,
             )
         # Set up a change callback so that if the user selects "Claude 3.5", the exercise format updates to "XML"
                 diagnoser_response_3 = gr.Textbox(label="Response 3", interactive=False)
                 diagnoser_response_4 = gr.Textbox(label="Response 4", interactive=False)
                 diagnoser_response_5 = gr.Textbox(label="Response 5", interactive=False)
+                diagnoser_response_6 = gr.Textbox(label="Response 6", interactive=False)
+                diagnoser_response_7 = gr.Textbox(label="Response 7", interactive=False)
+                diagnoser_response_8 = gr.Textbox(label="Response 8", interactive=False)
+                diagnoser_response_9 = gr.Textbox(label="Response 9", interactive=False)
+                diagnoser_response_10 = gr.Textbox(label="Response 10", interactive=False)
             with gr.TabItem("🤔 Generate distractors"):
                 # Insert an HTML info icon with a tooltip at the top of the tab content.
                 gr.HTML(
             diagnoser_response_2,
             diagnoser_response_3,
             diagnoser_response_4,
+            diagnoser_response_5,
+            diagnoser_response_6,
+            diagnoser_response_7,
+            diagnoser_response_8,
+            diagnoser_response_9,
+            diagnoser_response_10
         ]
     )

config/templates.py CHANGED Viewed

@@ -21,11 +21,11 @@ diagnose_template = ChatPromptTemplate(
 template_diagnose_double_negation = ChatPromptTemplate(
     messages=[
-        ("system", """You analyze a multiple-choice exercise for the presence of double negatives.
         Here are some examples of double negatives:
         <example 1>
-        <exercise 1>
         Stelling
         Expertfolio wordt niet aangeboden door ENI.
@@ -35,14 +35,14 @@ template_diagnose_double_negation = ChatPromptTemplate(
         Correct antwoord:
         1. Deze stelling is niet correct
-        </exercise 1>
         <double negative explanation>
-        Een niet-correctvraag met 'niet' (het is niet correct dat Expertfolio niet wordt aangeboden) is een dubbele ontkenning.
         </double negative explanation>
         </example 1>
         <example 2>
-        <exercise 2>
         Vraag
         Welk aspect hoort niet bij eenzaamheid?
@@ -53,10 +53,10 @@ template_diagnose_double_negation = ChatPromptTemplate(
         4. Geen lijfelijk contact hebben
         Correct antwoord:
-        Het ontbreken van betekenisvolle relaties
-        </exercise 2>
         <double negative explanation>
-        In de vraag staat al 'niet'. In keuzeoptie 4 staat ook nog 'geen', dat is dus een dubbele ontkenning.
         </double negative explanation>
         </example 2>.
         If it's obvious that there is or isn't a double negative in this exercise, just give a short one-sentence diagnosis on this.
@@ -167,16 +167,16 @@ diagnose_scorecard_template = ChatPromptTemplate(
         (and a third icon if need be: - ❔ means the diagnosis is unclear)
         The scorecard should always look like this:
         <template>
-        The exercise does not contain/contains a double negative: ✅/❌ -- The correct answer does not/does stand out: ✅/❌ -- None/Some of the distractors are too obviously false: ✅/❌ -- None/Some of the distractors are actually also kinda correct: ✅/❌
         </template>
         <example 1>
-        The exercise doesn't contain a double negative: ✅ -- The correct answer does not stand out: ✅ -- None of the distractors are too obviously false: ✅ -- None of the distractors are actually also kinda correct: ✅
         </example 1>
         <example 2>
-        The exercise doesn't contain a double negative: ✅ -- The correct answer does stand out: ❌ -- None of the distractors are too obviously false: ✅ -- Some of the distractors are actually also kinda correct: ❌
         </example 2>
         <example 3>
-        The exercise contains a double negative: ❌ -- The correct answer does not stand out: ✅ -- Some of the distractors are too obviously false: ❌ -- None of the distractors are actually also kinda correct: ✅
         </example 3>
         """),
         ("human", "{combined_diagnosis}")

 template_diagnose_double_negation = ChatPromptTemplate(
     messages=[
+        ("system", """Analyze a multiple-choice exercise for the presence of double negatives: either two negations in the question/statement itself, or a negation in the question/statement AND in an answer option.
         Here are some examples of double negatives:
         <example 1>
+        <exercise>
         Stelling
         Expertfolio wordt niet aangeboden door ENI.
         Correct antwoord:
         1. Deze stelling is niet correct
+        </exercise>
         <double negative explanation>
+        The statement itself contains one negation (wordt 'niet' aangeboden), and one answer option contains another (is 'niet' correct). Interpreted together, this forms a statement with a double negation ('het is niet correct dat Expertfolio niet wordt aangeboden' is een dubbele ontkenning).
         </double negative explanation>
         </example 1>
         <example 2>
+        <exercise>
         Vraag
         Welk aspect hoort niet bij eenzaamheid?
         4. Geen lijfelijk contact hebben
         Correct antwoord:
+        1. Betekenisvolle relaties hebben
+        </exercise>
         <double negative explanation>
+        The question itself contains one negation  (hoort 'niet' bij), and an answer option contains the second ('Geen' lijfelijk contact). Together, the resulting statement contains a double negation ('Geen lichamelijk contact hebben hoort niet bij eenzaamheid').
         </double negative explanation>
         </example 2>.
         If it's obvious that there is or isn't a double negative in this exercise, just give a short one-sentence diagnosis on this.
         (and a third icon if need be: - ❔ means the diagnosis is unclear)
         The scorecard should always look like this:
         <template>
+        1. The exercise does not contain/contains a double negative: ✅/❌ -- 2. The correct answer does not/does stand out: ✅/❌ -- 3. None/Some of the distractors are too obviously false: ✅/❌ -- 4. None/Some of the distractors are actually also kinda correct: ✅/❌
         </template>
         <example 1>
+        1. The exercise doesn't contain a double negative: ✅ -- 2. The correct answer does not stand out: ✅ -- 3. None of the distractors are too obviously false: ✅ -- 4. None of the distractors are actually also kinda correct: ✅
         </example 1>
         <example 2>
+        1. The exercise doesn't contain a double negative: ✅ -- 2. The correct answer does stand out: ❌ -- 3. None of the distractors are too obviously false: ✅ -- 4. Some of the distractors are actually also kinda correct: ❌
         </example 2>
         <example 3>
+        1. The exercise contains a double negative: ❌ -- 2. The correct answer does not stand out: ✅ -- 3. Some of the distractors are too obviously false: ❌ -- 4. None of the distractors are actually also kinda correct: ✅
         </example 3>
         """),
         ("human", "{combined_diagnosis}")