revamp of clearly incorrect prompt + added 2 new tabs
Browse files- app/ui/prompts_tab.py +31 -0
- app/ui/test_set_tab.py +36 -0
- config/system_prompt_texts.py +35 -1
- config/templates.py +1 -5
- main.py +10 -0
app/ui/prompts_tab.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from chains.learning_objectives_generator.runner import run_learning_objectives_generator
|
| 3 |
+
from config.llm_config import llms
|
| 4 |
+
|
| 5 |
+
def build_prompts_tab():
|
| 6 |
+
with gr.TabItem("🗒🚧️ See Prompts"):
|
| 7 |
+
gr.HTML(
|
| 8 |
+
"""
|
| 9 |
+
<div style="margin-bottom: 10px;">
|
| 10 |
+
<span style="font-size: 1.5em; cursor: help;" title="Behind-the-scenes prompt perusing at your leisure">
|
| 11 |
+
ℹ️
|
| 12 |
+
</span>
|
| 13 |
+
</div>
|
| 14 |
+
"""
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
with gr.Row():
|
| 18 |
+
with gr.Column(scale=1):
|
| 19 |
+
pipeline_choice = gr.Dropdown(
|
| 20 |
+
choices=["Exercise Diagnosis 🩺", "Distractors Brainstorm 🤔", "Learning Objectives Identification 🧠", "ALL OF THEM ✨", ],
|
| 21 |
+
value="Exercise Diagnosis 🩺",
|
| 22 |
+
label="Tasks Pipelines"
|
| 23 |
+
)
|
| 24 |
+
with gr.Column(scale=2):
|
| 25 |
+
pass # only here to keep the first column in check: force narrower dropdown
|
| 26 |
+
|
| 27 |
+
gr.HTML = gr.Textbox(label="Text Search 🚧", placeholder="Dummy placeholder, doesn't work (yet?)")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# Return references
|
| 31 |
+
return (pipeline_choice)
|
app/ui/test_set_tab.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from chains.learning_objectives_generator.runner import run_learning_objectives_generator
|
| 3 |
+
from config.llm_config import llms
|
| 4 |
+
|
| 5 |
+
def build_test_set_tab():
|
| 6 |
+
with gr.TabItem("❔ Test Set"):
|
| 7 |
+
gr.HTML(
|
| 8 |
+
"""
|
| 9 |
+
<div style="margin-bottom: 10px;">
|
| 10 |
+
<span style="font-size: 1.5em; cursor: help;" title="Uncontaminated repository of exercises and study texts (not present in the prompts)">
|
| 11 |
+
ℹ️
|
| 12 |
+
</span>
|
| 13 |
+
</div>
|
| 14 |
+
"""
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
with gr.Row():
|
| 18 |
+
with gr.Column(scale=1):
|
| 19 |
+
subset_choice = gr.Dropdown(
|
| 20 |
+
choices=["Exercises ❔🚧", "Study Texts ️ℹ️🚧", "Show all ❔ℹ️"],
|
| 21 |
+
value="Both ❔ℹ️",
|
| 22 |
+
label="Subset Filter 🚧"
|
| 23 |
+
)
|
| 24 |
+
with gr.Column(scale=2):
|
| 25 |
+
pass # only here to keep the first column in check: force narrower dropdown
|
| 26 |
+
|
| 27 |
+
gr.HTML = gr.Textbox(label="Text Search 🚧", placeholder="Dummy placeholder element, doesn't work")
|
| 28 |
+
|
| 29 |
+
with open("test_samples.md", "r", encoding="utf-8") as file:
|
| 30 |
+
markdown_content = file.read()
|
| 31 |
+
|
| 32 |
+
gr.Markdown(markdown_content)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Return references
|
| 36 |
+
return (subset_choice)
|
config/system_prompt_texts.py
CHANGED
|
@@ -177,9 +177,43 @@ Your only focus is to accurately diagnose this issue of an inappropriately diffe
|
|
| 177 |
Do some reasoning first, and give your diagnosis then.
|
| 178 |
"""
|
| 179 |
|
| 180 |
-
template_diagnose_distractor_clearly_wrong_text = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
"""
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
template_diagnose_distractor_partially_correct_text = """
|
| 184 |
"""
|
| 185 |
|
|
|
|
| 177 |
Do some reasoning first, and give your diagnosis then.
|
| 178 |
"""
|
| 179 |
|
| 180 |
+
template_diagnose_distractor_clearly_wrong_text = """
|
| 181 |
+
<task_definition>
|
| 182 |
+
You assess multiple-choice exercise distractors (incorrect answer options) to identify any that are completely ineffective due to being too obviously wrong.
|
| 183 |
+
</task_definition>
|
| 184 |
+
|
| 185 |
+
<key_concepts>
|
| 186 |
+
<effectiveness_criterion>
|
| 187 |
+
A distractor is considered effective if it sounds plausible to at least some students. It's acceptable if most students would dismiss it, as long as not all of them would.
|
| 188 |
+
</effectiveness_criterion>
|
| 189 |
+
|
| 190 |
+
<failure_threshold>
|
| 191 |
+
A distractor fails when it would be dismissed even by a Dumb Student who:
|
| 192 |
+
- Didn't prepare for the test at all
|
| 193 |
+
- Has minimal domain knowledge
|
| 194 |
+
- Has below average world knowledge
|
| 195 |
+
- Is pretty stupid in general
|
| 196 |
+
</failure_threshold>
|
| 197 |
+
</key_concepts>
|
| 198 |
+
|
| 199 |
+
<analysis_guidance>
|
| 200 |
+
Your analysis should engage deeply with understanding the student perspective. Really try to vividly imagine this hypothetical Dumb Student, in line with the test's likely target demographic. They are bottom of their class. What would be their likely interpretations, their thought patterns? Really inhabit this perspective as you examine each distractor in the context of the exercise.
|
| 201 |
+
|
| 202 |
+
Explore multiple angles in your reasoning. Consider edge cases, alternative interpretations, and different ways different students might approach the exercise. Document your thought process thoroughly, showing the nuance in your considerations.
|
| 203 |
+
</analysis_guidance>
|
| 204 |
+
|
| 205 |
+
<output_requirements>
|
| 206 |
+
1. Focus solely on diagnosing the issue (no need to suggest improvements)
|
| 207 |
+
2. Show detailed reasoning throughout your analysis
|
| 208 |
+
3. Maintain nuance and depth in your exploration
|
| 209 |
+
4. Finally (and only then, in your very last sentence) conclude with a clear, direct final verdict
|
| 210 |
+
</output_requirements>
|
| 211 |
"""
|
| 212 |
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
|
| 217 |
template_diagnose_distractor_partially_correct_text = """
|
| 218 |
"""
|
| 219 |
|
config/templates.py
CHANGED
|
@@ -68,11 +68,7 @@ template_diagnose_correct_answer_stands_out = ChatPromptTemplate(
|
|
| 68 |
|
| 69 |
template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
|
| 70 |
messages=[
|
| 71 |
-
("system",
|
| 72 |
-
are clearly incorrect and therefore too easy to eliminate. Effective distractors should at least sound plausible to some students.
|
| 73 |
-
Identify distractors that are too obviously wrong, such that even students that are completely uninformed about the topic can eliminate them.
|
| 74 |
-
Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
|
| 75 |
-
Do some reasoning first, and give your diagnosis then."""),
|
| 76 |
("human", "{standardized_exercise}")
|
| 77 |
],
|
| 78 |
input_variables=["standardized_exercise"]
|
|
|
|
| 68 |
|
| 69 |
template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
|
| 70 |
messages=[
|
| 71 |
+
("system", template_diagnose_distractor_clearly_wrong_text),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
("human", "{standardized_exercise}")
|
| 73 |
],
|
| 74 |
input_variables=["standardized_exercise"]
|
main.py
CHANGED
|
@@ -4,6 +4,8 @@ import logging
|
|
| 4 |
from app.ui.diagnoser_tab import build_diagnoser_tab
|
| 5 |
from app.ui.distractors_tab import build_distractors_tab
|
| 6 |
from app.ui.learning_objectives_tab import build_learning_objectives_tab
|
|
|
|
|
|
|
| 7 |
from chains.diagnoser.runner import run_diagnoser
|
| 8 |
from chains.distractors.runner import run_distractors
|
| 9 |
from chains.learning_objectives_generator.runner import run_learning_objectives_generator
|
|
@@ -84,6 +86,14 @@ with gr.Blocks() as interface:
|
|
| 84 |
[box_0, box_1, box_2, box_3]
|
| 85 |
) = build_learning_objectives_tab()
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
# -------------------------------
|
| 88 |
# Set Up Interactions
|
| 89 |
# -------------------------------
|
|
|
|
| 4 |
from app.ui.diagnoser_tab import build_diagnoser_tab
|
| 5 |
from app.ui.distractors_tab import build_distractors_tab
|
| 6 |
from app.ui.learning_objectives_tab import build_learning_objectives_tab
|
| 7 |
+
from app.ui.prompts_tab import build_prompts_tab
|
| 8 |
+
from app.ui.test_set_tab import build_test_set_tab
|
| 9 |
from chains.diagnoser.runner import run_diagnoser
|
| 10 |
from chains.distractors.runner import run_distractors
|
| 11 |
from chains.learning_objectives_generator.runner import run_learning_objectives_generator
|
|
|
|
| 86 |
[box_0, box_1, box_2, box_3]
|
| 87 |
) = build_learning_objectives_tab()
|
| 88 |
|
| 89 |
+
# Build unfinished tab
|
| 90 |
+
(pipeline_choice,
|
| 91 |
+
) = build_prompts_tab()
|
| 92 |
+
|
| 93 |
+
# Build unfinished tab
|
| 94 |
+
(subset_choice,
|
| 95 |
+
) = build_test_set_tab()
|
| 96 |
+
|
| 97 |
# -------------------------------
|
| 98 |
# Set Up Interactions
|
| 99 |
# -------------------------------
|