Spaces:

BtB-ExpC
/

Exercises

Sleeping

App Files Files Community

BtB-ExpC commited on Feb 12, 2025

Commit

7c6a50c

1 Parent(s): 125f28b

revamp of clearly incorrect prompt + added 2 new tabs

Browse files

Files changed (5) hide show

app/ui/prompts_tab.py +31 -0
app/ui/test_set_tab.py +36 -0
config/system_prompt_texts.py +35 -1
config/templates.py +1 -5
main.py +10 -0

app/ui/prompts_tab.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio as gr
+from chains.learning_objectives_generator.runner import run_learning_objectives_generator
+from config.llm_config import llms
+def build_prompts_tab():
+    with gr.TabItem("🗒🚧️ See Prompts"):
+        gr.HTML(
+            """
+            <div style="margin-bottom: 10px;">
+                <span style="font-size: 1.5em; cursor: help;" title="Behind-the-scenes prompt perusing at your leisure">
+                    ℹ️
+                </span>
+            </div>
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                pipeline_choice = gr.Dropdown(
+                    choices=["Exercise Diagnosis 🩺", "Distractors Brainstorm 🤔", "Learning Objectives Identification 🧠", "ALL OF THEM ✨", ],
+                    value="Exercise Diagnosis 🩺",
+                    label="Tasks Pipelines"
+                )
+            with gr.Column(scale=2):
+                pass # only here to keep the first column in check: force narrower dropdown
+        gr.HTML = gr.Textbox(label="Text Search 🚧", placeholder="Dummy placeholder, doesn't work (yet?)")
+    # Return references
+    return (pipeline_choice)

app/ui/test_set_tab.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+from chains.learning_objectives_generator.runner import run_learning_objectives_generator
+from config.llm_config import llms
+def build_test_set_tab():
+    with gr.TabItem("❔ Test Set"):
+        gr.HTML(
+            """
+            <div style="margin-bottom: 10px;">
+                <span style="font-size: 1.5em; cursor: help;" title="Uncontaminated repository of exercises and study texts (not present in the prompts)">
+                    ℹ️
+                </span>
+            </div>
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                subset_choice = gr.Dropdown(
+                    choices=["Exercises ❔🚧", "Study Texts ️ℹ️🚧", "Show all ❔ℹ️"],
+                    value="Both ❔ℹ️",
+                    label="Subset Filter 🚧"
+                )
+            with gr.Column(scale=2):
+                pass # only here to keep the first column in check: force narrower dropdown
+        gr.HTML = gr.Textbox(label="Text Search 🚧", placeholder="Dummy placeholder element, doesn't work")
+        with open("test_samples.md", "r", encoding="utf-8") as file:
+            markdown_content = file.read()
+            gr.Markdown(markdown_content)
+    # Return references
+    return (subset_choice)

config/system_prompt_texts.py CHANGED Viewed

@@ -177,9 +177,43 @@ Your only focus is to accurately diagnose this issue of an inappropriately diffe
 Do some reasoning first, and give your diagnosis then.
 """
-template_diagnose_distractor_clearly_wrong_text = """
 """
 template_diagnose_distractor_partially_correct_text = """
 """

 Do some reasoning first, and give your diagnosis then.
 """
+template_diagnose_distractor_clearly_wrong_text = """
+<task_definition>
+   You assess multiple-choice exercise distractors (incorrect answer options) to identify any that are completely ineffective due to being too obviously wrong.
+</task_definition>
+<key_concepts>
+    <effectiveness_criterion>
+        A distractor is considered effective if it sounds plausible to at least some students. It's acceptable if most students would dismiss it, as long as not all of them would.
+    </effectiveness_criterion>
+    <failure_threshold>
+        A distractor fails when it would be dismissed even by a Dumb Student who:
+        - Didn't prepare for the test at all
+        - Has minimal domain knowledge
+        - Has below average world knowledge
+        - Is pretty stupid in general
+    </failure_threshold>
+</key_concepts>
+<analysis_guidance>
+    Your analysis should engage deeply with understanding the student perspective. Really try to vividly imagine this hypothetical Dumb Student, in line with the test's likely target demographic. They are bottom of their class. What would be their likely interpretations, their thought patterns? Really inhabit this perspective as you examine each distractor in the context of the exercise.
+    Explore multiple angles in your reasoning. Consider edge cases, alternative interpretations, and different ways different students might approach the exercise. Document your thought process thoroughly, showing the nuance in your considerations.
+</analysis_guidance>
+<output_requirements>
+    1. Focus solely on diagnosing the issue (no need to suggest improvements)
+    2. Show detailed reasoning throughout your analysis
+    3. Maintain nuance and depth in your exploration
+    4. Finally (and only then, in your very last sentence) conclude with a clear, direct final verdict
+</output_requirements>
 """
 template_diagnose_distractor_partially_correct_text = """
 """

config/templates.py CHANGED Viewed

@@ -68,11 +68,7 @@ template_diagnose_correct_answer_stands_out = ChatPromptTemplate(
 template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
     messages=[
-        ("system", """You assess a multiple-choice exercise to determine if any distractors
-        are clearly incorrect and therefore too easy to eliminate. Effective distractors should at least sound plausible to some students.
-        Identify distractors that are too obviously wrong, such that even students that are completely uninformed about the topic can eliminate them.
-        Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
-        Do some reasoning first, and give your diagnosis then."""),
         ("human", "{standardized_exercise}")
     ],
     input_variables=["standardized_exercise"]

 template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
     messages=[
+        ("system", template_diagnose_distractor_clearly_wrong_text),
         ("human", "{standardized_exercise}")
     ],
     input_variables=["standardized_exercise"]

main.py CHANGED Viewed

@@ -4,6 +4,8 @@ import logging
 from app.ui.diagnoser_tab import build_diagnoser_tab
 from app.ui.distractors_tab import build_distractors_tab
 from app.ui.learning_objectives_tab import build_learning_objectives_tab
 from chains.diagnoser.runner import run_diagnoser
 from chains.distractors.runner import run_distractors
 from chains.learning_objectives_generator.runner import run_learning_objectives_generator
@@ -84,6 +86,14 @@ with gr.Blocks() as interface:
             [box_0, box_1, box_2, box_3]
              ) = build_learning_objectives_tab()
     # -------------------------------
     # Set Up Interactions
     # -------------------------------

 from app.ui.diagnoser_tab import build_diagnoser_tab
 from app.ui.distractors_tab import build_distractors_tab
 from app.ui.learning_objectives_tab import build_learning_objectives_tab
+from app.ui.prompts_tab import build_prompts_tab
+from app.ui.test_set_tab import build_test_set_tab
 from chains.diagnoser.runner import run_diagnoser
 from chains.distractors.runner import run_distractors
 from chains.learning_objectives_generator.runner import run_learning_objectives_generator
             [box_0, box_1, box_2, box_3]
              ) = build_learning_objectives_tab()
+            # Build unfinished tab
+            (pipeline_choice,
+             ) = build_prompts_tab()
+            # Build unfinished tab
+            (subset_choice,
+             ) = build_test_set_tab()
     # -------------------------------
     # Set Up Interactions
     # -------------------------------