BtB-ExpC commited on
Commit
7c6a50c
·
1 Parent(s): 125f28b

revamp of clearly incorrect prompt + added 2 new tabs

Browse files
app/ui/prompts_tab.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from chains.learning_objectives_generator.runner import run_learning_objectives_generator
3
+ from config.llm_config import llms
4
+
5
+ def build_prompts_tab():
6
+ with gr.TabItem("🗒🚧️ See Prompts"):
7
+ gr.HTML(
8
+ """
9
+ <div style="margin-bottom: 10px;">
10
+ <span style="font-size: 1.5em; cursor: help;" title="Behind-the-scenes prompt perusing at your leisure">
11
+ ℹ️
12
+ </span>
13
+ </div>
14
+ """
15
+ )
16
+
17
+ with gr.Row():
18
+ with gr.Column(scale=1):
19
+ pipeline_choice = gr.Dropdown(
20
+ choices=["Exercise Diagnosis 🩺", "Distractors Brainstorm 🤔", "Learning Objectives Identification 🧠", "ALL OF THEM ✨", ],
21
+ value="Exercise Diagnosis 🩺",
22
+ label="Tasks Pipelines"
23
+ )
24
+ with gr.Column(scale=2):
25
+ pass # only here to keep the first column in check: force narrower dropdown
26
+
27
+ gr.HTML = gr.Textbox(label="Text Search 🚧", placeholder="Dummy placeholder, doesn't work (yet?)")
28
+
29
+
30
+ # Return references
31
+ return (pipeline_choice)
app/ui/test_set_tab.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from chains.learning_objectives_generator.runner import run_learning_objectives_generator
3
+ from config.llm_config import llms
4
+
5
+ def build_test_set_tab():
6
+ with gr.TabItem("❔ Test Set"):
7
+ gr.HTML(
8
+ """
9
+ <div style="margin-bottom: 10px;">
10
+ <span style="font-size: 1.5em; cursor: help;" title="Uncontaminated repository of exercises and study texts (not present in the prompts)">
11
+ ℹ️
12
+ </span>
13
+ </div>
14
+ """
15
+ )
16
+
17
+ with gr.Row():
18
+ with gr.Column(scale=1):
19
+ subset_choice = gr.Dropdown(
20
+ choices=["Exercises ❔🚧", "Study Texts ️ℹ️🚧", "Show all ❔ℹ️"],
21
+ value="Both ❔ℹ️",
22
+ label="Subset Filter 🚧"
23
+ )
24
+ with gr.Column(scale=2):
25
+ pass # only here to keep the first column in check: force narrower dropdown
26
+
27
+ gr.HTML = gr.Textbox(label="Text Search 🚧", placeholder="Dummy placeholder element, doesn't work")
28
+
29
+ with open("test_samples.md", "r", encoding="utf-8") as file:
30
+ markdown_content = file.read()
31
+
32
+ gr.Markdown(markdown_content)
33
+
34
+
35
+ # Return references
36
+ return (subset_choice)
config/system_prompt_texts.py CHANGED
@@ -177,9 +177,43 @@ Your only focus is to accurately diagnose this issue of an inappropriately diffe
177
  Do some reasoning first, and give your diagnosis then.
178
  """
179
 
180
- template_diagnose_distractor_clearly_wrong_text = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  """
182
 
 
 
 
 
183
  template_diagnose_distractor_partially_correct_text = """
184
  """
185
 
 
177
  Do some reasoning first, and give your diagnosis then.
178
  """
179
 
180
+ template_diagnose_distractor_clearly_wrong_text = """
181
+ <task_definition>
182
+ You assess multiple-choice exercise distractors (incorrect answer options) to identify any that are completely ineffective due to being too obviously wrong.
183
+ </task_definition>
184
+
185
+ <key_concepts>
186
+ <effectiveness_criterion>
187
+ A distractor is considered effective if it sounds plausible to at least some students. It's acceptable if most students would dismiss it, as long as not all of them would.
188
+ </effectiveness_criterion>
189
+
190
+ <failure_threshold>
191
+ A distractor fails when it would be dismissed even by a Dumb Student who:
192
+ - Didn't prepare for the test at all
193
+ - Has minimal domain knowledge
194
+ - Has below average world knowledge
195
+ - Is pretty stupid in general
196
+ </failure_threshold>
197
+ </key_concepts>
198
+
199
+ <analysis_guidance>
200
+ Your analysis should engage deeply with understanding the student perspective. Really try to vividly imagine this hypothetical Dumb Student, in line with the test's likely target demographic. They are bottom of their class. What would be their likely interpretations, their thought patterns? Really inhabit this perspective as you examine each distractor in the context of the exercise.
201
+
202
+ Explore multiple angles in your reasoning. Consider edge cases, alternative interpretations, and different ways different students might approach the exercise. Document your thought process thoroughly, showing the nuance in your considerations.
203
+ </analysis_guidance>
204
+
205
+ <output_requirements>
206
+ 1. Focus solely on diagnosing the issue (no need to suggest improvements)
207
+ 2. Show detailed reasoning throughout your analysis
208
+ 3. Maintain nuance and depth in your exploration
209
+ 4. Finally (and only then, in your very last sentence) conclude with a clear, direct final verdict
210
+ </output_requirements>
211
  """
212
 
213
+
214
+
215
+
216
+
217
  template_diagnose_distractor_partially_correct_text = """
218
  """
219
 
config/templates.py CHANGED
@@ -68,11 +68,7 @@ template_diagnose_correct_answer_stands_out = ChatPromptTemplate(
68
 
69
  template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
70
  messages=[
71
- ("system", """You assess a multiple-choice exercise to determine if any distractors
72
- are clearly incorrect and therefore too easy to eliminate. Effective distractors should at least sound plausible to some students.
73
- Identify distractors that are too obviously wrong, such that even students that are completely uninformed about the topic can eliminate them.
74
- Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
75
- Do some reasoning first, and give your diagnosis then."""),
76
  ("human", "{standardized_exercise}")
77
  ],
78
  input_variables=["standardized_exercise"]
 
68
 
69
  template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
70
  messages=[
71
+ ("system", template_diagnose_distractor_clearly_wrong_text),
 
 
 
 
72
  ("human", "{standardized_exercise}")
73
  ],
74
  input_variables=["standardized_exercise"]
main.py CHANGED
@@ -4,6 +4,8 @@ import logging
4
  from app.ui.diagnoser_tab import build_diagnoser_tab
5
  from app.ui.distractors_tab import build_distractors_tab
6
  from app.ui.learning_objectives_tab import build_learning_objectives_tab
 
 
7
  from chains.diagnoser.runner import run_diagnoser
8
  from chains.distractors.runner import run_distractors
9
  from chains.learning_objectives_generator.runner import run_learning_objectives_generator
@@ -84,6 +86,14 @@ with gr.Blocks() as interface:
84
  [box_0, box_1, box_2, box_3]
85
  ) = build_learning_objectives_tab()
86
 
 
 
 
 
 
 
 
 
87
  # -------------------------------
88
  # Set Up Interactions
89
  # -------------------------------
 
4
  from app.ui.diagnoser_tab import build_diagnoser_tab
5
  from app.ui.distractors_tab import build_distractors_tab
6
  from app.ui.learning_objectives_tab import build_learning_objectives_tab
7
+ from app.ui.prompts_tab import build_prompts_tab
8
+ from app.ui.test_set_tab import build_test_set_tab
9
  from chains.diagnoser.runner import run_diagnoser
10
  from chains.distractors.runner import run_distractors
11
  from chains.learning_objectives_generator.runner import run_learning_objectives_generator
 
86
  [box_0, box_1, box_2, box_3]
87
  ) = build_learning_objectives_tab()
88
 
89
+ # Build unfinished tab
90
+ (pipeline_choice,
91
+ ) = build_prompts_tab()
92
+
93
+ # Build unfinished tab
94
+ (subset_choice,
95
+ ) = build_test_set_tab()
96
+
97
  # -------------------------------
98
  # Set Up Interactions
99
  # -------------------------------