Cleaned up model picker and exercise formatting standardization handling
Browse files- app.py +24 -18
- chains/distractors_chain.py +9 -9
- config/chain_configs.py +3 -3
- config/exercise_standardizer.py +2 -1
- config/llm_config.py +25 -21
- config/templates.py +7 -8
app.py
CHANGED
|
@@ -116,10 +116,10 @@ async def run_distractors(
|
|
| 116 |
chain_instance = config["class"](
|
| 117 |
template_distractors_brainstorm_1=config["template_distractors_brainstorm_1"],
|
| 118 |
template_distractors_brainstorm_2=config["template_distractors_brainstorm_2"],
|
| 119 |
-
llm_brainstorm_1=llms.get(model_choice_distractors_1, config["llm_brainstorm_1"]), # User-selected
|
| 120 |
-
llm_brainstorm_2=llms.get(model_choice_distractors_2, config["llm_brainstorm_2"]),
|
| 121 |
template_consolidate=config["template_consolidate"],
|
| 122 |
-
llm_consolidate=config["llm_consolidate"],
|
| 123 |
)
|
| 124 |
|
| 125 |
# 3) Create N tasks in parallel (one full distractor generation pipeline per sample)
|
|
@@ -262,14 +262,14 @@ with gr.Blocks() as interface:
|
|
| 262 |
with gr.Row():
|
| 263 |
model_choice_distractors_1 = gr.Dropdown(
|
| 264 |
choices=list(llms.keys()),
|
| 265 |
-
value="GPT-4o (
|
| 266 |
-
label="LLM 1",
|
| 267 |
interactive=True,
|
| 268 |
)
|
| 269 |
model_choice_distractors_2 = gr.Dropdown(
|
| 270 |
choices=list(llms.keys()),
|
| 271 |
-
value="
|
| 272 |
-
label="LLM 2",
|
| 273 |
interactive=True,
|
| 274 |
)
|
| 275 |
exercise_format_distractors = gr.Dropdown(
|
|
@@ -278,25 +278,31 @@ with gr.Blocks() as interface:
|
|
| 278 |
label="Exercise Format",
|
| 279 |
interactive=True,
|
| 280 |
)
|
| 281 |
-
|
| 282 |
-
choices=["
|
| 283 |
-
value="
|
| 284 |
-
label="
|
| 285 |
interactive=True,
|
| 286 |
)
|
| 287 |
-
|
| 288 |
-
choices=
|
| 289 |
-
value="
|
| 290 |
-
label="
|
| 291 |
interactive=True,
|
| 292 |
)
|
| 293 |
final_distractors_specification = gr.Dropdown(
|
| 294 |
-
choices=["all unique distractors", "the best distractors", "only the very best
|
| 295 |
-
"a wide range of", "
|
| 296 |
-
value="all unique distractors",
|
| 297 |
label="Finally display X distractors",
|
| 298 |
interactive=True,
|
| 299 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
# Set up a change callback so that if the user selects any model with "Claude" in the name, the exercise format updates to "XML"
|
| 301 |
model_choice_distractors_1.change(
|
| 302 |
fn=update_exercise_format,
|
|
|
|
| 116 |
chain_instance = config["class"](
|
| 117 |
template_distractors_brainstorm_1=config["template_distractors_brainstorm_1"],
|
| 118 |
template_distractors_brainstorm_2=config["template_distractors_brainstorm_2"],
|
| 119 |
+
llm_brainstorm_1=llms.get(model_choice_distractors_1, config["llm_brainstorm_1"]), # User-selected LLM 1
|
| 120 |
+
llm_brainstorm_2=llms.get(model_choice_distractors_2, config["llm_brainstorm_2"]), # User-selected LLM 2
|
| 121 |
template_consolidate=config["template_consolidate"],
|
| 122 |
+
llm_consolidate=llms.get(model_choice_distractors_3, config["llm_consolidate"]), # User-selected LLM 3
|
| 123 |
)
|
| 124 |
|
| 125 |
# 3) Create N tasks in parallel (one full distractor generation pipeline per sample)
|
|
|
|
| 262 |
with gr.Row():
|
| 263 |
model_choice_distractors_1 = gr.Dropdown(
|
| 264 |
choices=list(llms.keys()),
|
| 265 |
+
value="GPT-4o (mid temp)",
|
| 266 |
+
label="LLM 1 - for brainstorming",
|
| 267 |
interactive=True,
|
| 268 |
)
|
| 269 |
model_choice_distractors_2 = gr.Dropdown(
|
| 270 |
choices=list(llms.keys()),
|
| 271 |
+
value="Claude 3.5 (mid temp)",
|
| 272 |
+
label="LLM 2 - for brainstorming",
|
| 273 |
interactive=True,
|
| 274 |
)
|
| 275 |
exercise_format_distractors = gr.Dropdown(
|
|
|
|
| 278 |
label="Exercise Format",
|
| 279 |
interactive=True,
|
| 280 |
)
|
| 281 |
+
intermediate_distractors_specification = gr.Dropdown(
|
| 282 |
+
choices=[" ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ", " 8 ", " 9 ", " 10 ", " a few ", " some ", " a whole lot of ", " a wide range of ", " novel "],
|
| 283 |
+
value=" 8 ",
|
| 284 |
+
label="Brainstorm X distractors x4",
|
| 285 |
interactive=True,
|
| 286 |
)
|
| 287 |
+
model_choice_distractors_3 = gr.Dropdown(
|
| 288 |
+
choices=list(llms.keys()),
|
| 289 |
+
value="GPT-4o (low temp)",
|
| 290 |
+
label="LLM 3 - for interpreting results",
|
| 291 |
interactive=True,
|
| 292 |
)
|
| 293 |
final_distractors_specification = gr.Dropdown(
|
| 294 |
+
choices=[" ", " of all unique distractors", " of the top 5", " of the best distractors", " of only the very best", " of the best 4", " of the best 5", " of the best 6", " of the best 7", " of the best 8", " of the best 9", " of the best 10", " of the best 11", " of the best 12", " of a few of them", " of some of them", " of most of them",
|
| 295 |
+
" of a wide range of", " of the 3 worst"],
|
| 296 |
+
value=" of all unique distractors",
|
| 297 |
label="Finally display X distractors",
|
| 298 |
interactive=True,
|
| 299 |
)
|
| 300 |
+
sampling_count_distractors = gr.Dropdown(
|
| 301 |
+
choices=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
|
| 302 |
+
value="1",
|
| 303 |
+
label="Response Count",
|
| 304 |
+
interactive=True,
|
| 305 |
+
)
|
| 306 |
# Set up a change callback so that if the user selects any model with "Claude" in the name, the exercise format updates to "XML"
|
| 307 |
model_choice_distractors_1.change(
|
| 308 |
fn=update_exercise_format,
|
chains/distractors_chain.py
CHANGED
|
@@ -41,32 +41,32 @@ class DistractorsChain(BaseModel):
|
|
| 41 |
response = await llm_brainstorm.ainvoke(messages)
|
| 42 |
content = getattr(response, "content", response)
|
| 43 |
|
| 44 |
-
return f"[
|
| 45 |
|
| 46 |
tasks = []
|
| 47 |
-
# Template 1,
|
| 48 |
tasks.append(run_brainstorm(
|
| 49 |
self.template_distractors_brainstorm_1,
|
| 50 |
self.llm_brainstorm_1,
|
| 51 |
-
"T1-
|
| 52 |
))
|
| 53 |
-
# Template 1,
|
| 54 |
tasks.append(run_brainstorm(
|
| 55 |
self.template_distractors_brainstorm_1,
|
| 56 |
self.llm_brainstorm_2,
|
| 57 |
-
"T1-
|
| 58 |
))
|
| 59 |
-
# Template 2,
|
| 60 |
tasks.append(run_brainstorm(
|
| 61 |
self.template_distractors_brainstorm_2,
|
| 62 |
self.llm_brainstorm_1,
|
| 63 |
-
"T2-
|
| 64 |
))
|
| 65 |
-
# Template 2,
|
| 66 |
tasks.append(run_brainstorm(
|
| 67 |
self.template_distractors_brainstorm_2,
|
| 68 |
self.llm_brainstorm_2,
|
| 69 |
-
"T2-
|
| 70 |
))
|
| 71 |
|
| 72 |
# Kick them off concurrently
|
|
|
|
| 41 |
response = await llm_brainstorm.ainvoke(messages)
|
| 42 |
content = getattr(response, "content", response)
|
| 43 |
|
| 44 |
+
return f"[ --- list separator {index_label} ---]\n\n{content}"
|
| 45 |
|
| 46 |
tasks = []
|
| 47 |
+
# Template 1, LLM 1
|
| 48 |
tasks.append(run_brainstorm(
|
| 49 |
self.template_distractors_brainstorm_1,
|
| 50 |
self.llm_brainstorm_1,
|
| 51 |
+
"T1-1"
|
| 52 |
))
|
| 53 |
+
# Template 1, LLM 2
|
| 54 |
tasks.append(run_brainstorm(
|
| 55 |
self.template_distractors_brainstorm_1,
|
| 56 |
self.llm_brainstorm_2,
|
| 57 |
+
"T1-2"
|
| 58 |
))
|
| 59 |
+
# Template 2, LLM 1
|
| 60 |
tasks.append(run_brainstorm(
|
| 61 |
self.template_distractors_brainstorm_2,
|
| 62 |
self.llm_brainstorm_1,
|
| 63 |
+
"T2-1"
|
| 64 |
))
|
| 65 |
+
# Template 2, LLM 2
|
| 66 |
tasks.append(run_brainstorm(
|
| 67 |
self.template_distractors_brainstorm_2,
|
| 68 |
self.llm_brainstorm_2,
|
| 69 |
+
"T2-2"
|
| 70 |
))
|
| 71 |
|
| 72 |
# Kick them off concurrently
|
config/chain_configs.py
CHANGED
|
@@ -19,8 +19,8 @@ chain_configs = {
|
|
| 19 |
"diagnoser": {
|
| 20 |
"class": DiagnoserChain,
|
| 21 |
"template_standardize": standardize_template,
|
| 22 |
-
"llm_standardize": llms["GPT-4o-mini
|
| 23 |
-
"llm_4o_mini": llms["GPT-4o-mini"],
|
| 24 |
"llm_4o": llms["GPT-4o (low temp)"],
|
| 25 |
# 4 different diagnosis templates (to run in parallel:
|
| 26 |
"templates_diagnose": [
|
|
@@ -35,7 +35,7 @@ chain_configs = {
|
|
| 35 |
"distractors": {
|
| 36 |
"class": DistractorsChain,
|
| 37 |
"template_standardize": standardize_template,
|
| 38 |
-
"llm_standardize": llms["GPT-4o-mini
|
| 39 |
"template_distractors_brainstorm_1": template_distractors_brainstorm_1,
|
| 40 |
"template_distractors_brainstorm_2": template_distractors_brainstorm_2,
|
| 41 |
"llm_brainstorm_1": llms["GPT-4o (low temp)"],
|
|
|
|
| 19 |
"diagnoser": {
|
| 20 |
"class": DiagnoserChain,
|
| 21 |
"template_standardize": standardize_template,
|
| 22 |
+
"llm_standardize": llms["GPT-4o-mini (zero temp)"], # Always fixed
|
| 23 |
+
"llm_4o_mini": llms["GPT-4o-mini (low temp)"],
|
| 24 |
"llm_4o": llms["GPT-4o (low temp)"],
|
| 25 |
# 4 different diagnosis templates (to run in parallel:
|
| 26 |
"templates_diagnose": [
|
|
|
|
| 35 |
"distractors": {
|
| 36 |
"class": DistractorsChain,
|
| 37 |
"template_standardize": standardize_template,
|
| 38 |
+
"llm_standardize": llms["GPT-4o-mini (zero temp)"], # Always fixed
|
| 39 |
"template_distractors_brainstorm_1": template_distractors_brainstorm_1,
|
| 40 |
"template_distractors_brainstorm_2": template_distractors_brainstorm_2,
|
| 41 |
"llm_brainstorm_1": llms["GPT-4o (low temp)"],
|
config/exercise_standardizer.py
CHANGED
|
@@ -22,6 +22,7 @@ async def standardize_exercise(user_query: str, exercise_format: str, template:
|
|
| 22 |
)
|
| 23 |
|
| 24 |
std_messages = prompt_std.to_messages()
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
return standardized_exercise
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
std_messages = prompt_std.to_messages()
|
| 25 |
+
response = await llm.ainvoke(std_messages)
|
| 26 |
+
standardized_exercise = getattr(response, "content", response)
|
| 27 |
|
| 28 |
return standardized_exercise
|
config/llm_config.py
CHANGED
|
@@ -12,7 +12,7 @@ DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
|
|
| 12 |
# Define temperature presets (adjust as needed)
|
| 13 |
ZERO = 0
|
| 14 |
LOW = 0.2
|
| 15 |
-
MID = 0.
|
| 16 |
HIGH = 1.2
|
| 17 |
|
| 18 |
# Factory functions for each provider
|
|
@@ -35,15 +35,13 @@ def create_deepseek_llm(model_name: str, temperature: float):
|
|
| 35 |
# all of them in one dictionary
|
| 36 |
llms = {
|
| 37 |
# OpenAI models with temperature
|
| 38 |
-
|
| 39 |
"GPT-4o (low temp)": create_openai_llm("gpt-4o", LOW),
|
| 40 |
"GPT-4o (mid temp)": create_openai_llm("gpt-4o", MID),
|
| 41 |
"GPT-4o (high temp)": create_openai_llm("gpt-4o", HIGH),
|
| 42 |
-
"GPT-4o-mini
|
| 43 |
-
"GPT-4o-mini": create_openai_llm("gpt-4o-mini", LOW),
|
| 44 |
-
"GPT-
|
| 45 |
-
"GPT-4o-mini_high_temp": create_openai_llm("gpt-4o-mini", HIGH),
|
| 46 |
-
"GPT-4 Turbo": create_openai_llm("gpt-4-turbo-2024-04-09", LOW),
|
| 47 |
|
| 48 |
# OpenAI reasoning models (no temperature)
|
| 49 |
"o1": create_openai_reasoning_llm("o1-2024-12-17"),
|
|
@@ -52,40 +50,46 @@ llms = {
|
|
| 52 |
"o3-mini (high-reasoning effort version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="high"),
|
| 53 |
|
| 54 |
# Anthropic models (Claude)
|
|
|
|
| 55 |
"Claude 3.5 (low temp)": create_anthropic_llm("claude-3-5-sonnet-latest", LOW),
|
| 56 |
"Claude 3.5 (mid temp)": create_anthropic_llm("claude-3-5-sonnet-latest", MID),
|
| 57 |
"Claude 3.5 (high temp)": create_anthropic_llm("claude-3-5-sonnet-latest", HIGH),
|
| 58 |
|
| 59 |
# DeepSeek
|
|
|
|
| 60 |
"Deepseek R1 (low temp)π§": create_anthropic_llm("deepseek-reasoner", LOW),
|
|
|
|
|
|
|
| 61 |
}
|
| 62 |
|
| 63 |
# specific for Diagnosis tab
|
| 64 |
-
|
| 65 |
-
# OpenAI models
|
| 66 |
-
|
| 67 |
"GPT-4o (low temp)": create_openai_llm("gpt-4o", LOW),
|
| 68 |
"GPT-4o (mid temp)": create_openai_llm("gpt-4o", MID),
|
| 69 |
"GPT-4o (high temp)": create_openai_llm("gpt-4o", HIGH),
|
| 70 |
-
"GPT-4o-mini
|
| 71 |
-
"GPT-4o-mini": create_openai_llm("gpt-4o-mini", LOW),
|
| 72 |
-
"GPT-
|
| 73 |
-
"GPT-4o-mini_high_temp": create_openai_llm("gpt-4o-mini", HIGH),
|
| 74 |
-
"GPT-4 Turbo": create_openai_llm("gpt-4-turbo-2024-04-09", LOW),
|
| 75 |
|
| 76 |
# OpenAI reasoning models (no temperature)
|
| 77 |
"o1": create_openai_reasoning_llm("o1-2024-12-17"),
|
| 78 |
-
"o3-mini (low-reasoning version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="low"),
|
| 79 |
-
"o3-mini (medium-reasoning version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="medium"),
|
| 80 |
-
"o3-mini (high-reasoning version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="high"),
|
| 81 |
-
|
| 82 |
-
|
| 83 |
|
| 84 |
-
# Anthropic models
|
|
|
|
| 85 |
"Claude 3.5 (low temp)": create_anthropic_llm("claude-3-5-sonnet-latest", LOW),
|
| 86 |
"Claude 3.5 (mid temp)": create_anthropic_llm("claude-3-5-sonnet-latest", MID),
|
| 87 |
"Claude 3.5 (high temp)": create_anthropic_llm("claude-3-5-sonnet-latest", HIGH),
|
|
|
|
|
|
|
| 88 |
|
| 89 |
# DeepSeek
|
|
|
|
| 90 |
"Deepseek R1 (low temp)π§": create_anthropic_llm("deepseek-reasoner", LOW),
|
|
|
|
|
|
|
| 91 |
}
|
|
|
|
| 12 |
# Define temperature presets (adjust as needed)
|
| 13 |
ZERO = 0
|
| 14 |
LOW = 0.2
|
| 15 |
+
MID = 0.6
|
| 16 |
HIGH = 1.2
|
| 17 |
|
| 18 |
# Factory functions for each provider
|
|
|
|
| 35 |
# all of them in one dictionary
|
| 36 |
llms = {
|
| 37 |
# OpenAI models with temperature
|
| 38 |
+
"GPT-4o (zero temp)": create_openai_llm("gpt-4o", ZERO),
|
| 39 |
"GPT-4o (low temp)": create_openai_llm("gpt-4o", LOW),
|
| 40 |
"GPT-4o (mid temp)": create_openai_llm("gpt-4o", MID),
|
| 41 |
"GPT-4o (high temp)": create_openai_llm("gpt-4o", HIGH),
|
| 42 |
+
"GPT-4o-mini (zero temp)": create_openai_llm("gpt-4o-mini", ZERO),
|
| 43 |
+
"GPT-4o-mini (low temp)": create_openai_llm("gpt-4o-mini", LOW),
|
| 44 |
+
"GPT-4 Turbo (low temp)": create_openai_llm("gpt-4-turbo-2024-04-09", LOW),
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# OpenAI reasoning models (no temperature)
|
| 47 |
"o1": create_openai_reasoning_llm("o1-2024-12-17"),
|
|
|
|
| 50 |
"o3-mini (high-reasoning effort version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="high"),
|
| 51 |
|
| 52 |
# Anthropic models (Claude)
|
| 53 |
+
"Claude 3.5 (zero temp)": create_anthropic_llm("claude-3-5-sonnet-latest", ZERO),
|
| 54 |
"Claude 3.5 (low temp)": create_anthropic_llm("claude-3-5-sonnet-latest", LOW),
|
| 55 |
"Claude 3.5 (mid temp)": create_anthropic_llm("claude-3-5-sonnet-latest", MID),
|
| 56 |
"Claude 3.5 (high temp)": create_anthropic_llm("claude-3-5-sonnet-latest", HIGH),
|
| 57 |
|
| 58 |
# DeepSeek
|
| 59 |
+
"Deepseek R1 (zero temp)π§": create_anthropic_llm("deepseek-reasoner", ZERO),
|
| 60 |
"Deepseek R1 (low temp)π§": create_anthropic_llm("deepseek-reasoner", LOW),
|
| 61 |
+
"Deepseek R1 (mid temp)π§": create_anthropic_llm("deepseek-reasoner", MID),
|
| 62 |
+
"Deepseek R1 (high temp)π§": create_anthropic_llm("deepseek-reasoner", HIGH),
|
| 63 |
}
|
| 64 |
|
| 65 |
# specific for Diagnosis tab
|
| 66 |
+
llms_most_wanted = {
|
| 67 |
+
# OpenAI models
|
| 68 |
+
"GPT-4o (zero temp)": create_openai_llm("gpt-4o", ZERO),
|
| 69 |
"GPT-4o (low temp)": create_openai_llm("gpt-4o", LOW),
|
| 70 |
"GPT-4o (mid temp)": create_openai_llm("gpt-4o", MID),
|
| 71 |
"GPT-4o (high temp)": create_openai_llm("gpt-4o", HIGH),
|
| 72 |
+
"GPT-4o-mini (zero temp)": create_openai_llm("gpt-4o-mini", ZERO),
|
| 73 |
+
"GPT-4o-mini (low temp)": create_openai_llm("gpt-4o-mini", LOW),
|
| 74 |
+
"GPT-4 Turbo (low temp)": create_openai_llm("gpt-4-turbo-2024-04-09", LOW),
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# OpenAI reasoning models (no temperature)
|
| 77 |
"o1": create_openai_reasoning_llm("o1-2024-12-17"),
|
| 78 |
+
"o3-mini (low-reasoning effort version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="low"),
|
| 79 |
+
"o3-mini (medium-reasoning effort version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="medium"),
|
| 80 |
+
"o3-mini (high-reasoning effort version)": create_openai_reasoning_llm("o3-mini", reasoning_effort="high"),
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
# Anthropic models
|
| 83 |
+
"Claude 3.5 (zero temp)": create_anthropic_llm("claude-3-5-sonnet-latest", ZERO),
|
| 84 |
"Claude 3.5 (low temp)": create_anthropic_llm("claude-3-5-sonnet-latest", LOW),
|
| 85 |
"Claude 3.5 (mid temp)": create_anthropic_llm("claude-3-5-sonnet-latest", MID),
|
| 86 |
"Claude 3.5 (high temp)": create_anthropic_llm("claude-3-5-sonnet-latest", HIGH),
|
| 87 |
+
"Claude 3.5 Haiku (zero temp)": create_anthropic_llm("claude-3-5-haiku-latest", HIGH),
|
| 88 |
+
"Claude 3.5 Haiku (low temp)": create_anthropic_llm("claude-3-5-haiku-latest", HIGH),
|
| 89 |
|
| 90 |
# DeepSeek
|
| 91 |
+
"Deepseek R1 (zero temp)π§": create_anthropic_llm("deepseek-reasoner", ZERO),
|
| 92 |
"Deepseek R1 (low temp)π§": create_anthropic_llm("deepseek-reasoner", LOW),
|
| 93 |
+
"Deepseek R1 (mid temp)π§": create_anthropic_llm("deepseek-reasoner", MID),
|
| 94 |
+
"Deepseek R1 (high temp)π§": create_anthropic_llm("deepseek-reasoner", HIGH),
|
| 95 |
}
|
config/templates.py
CHANGED
|
@@ -189,7 +189,7 @@ diagnose_scorecard_template = ChatPromptTemplate(
|
|
| 189 |
|
| 190 |
template_distractors_brainstorm_1 = ChatPromptTemplate(
|
| 191 |
messages=[
|
| 192 |
-
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with
|
| 193 |
"alternative answer options that are not correct, yet also not so implausible that even poorly informed students would immediately dismiss them. Make sure to use the same language as the existing exercise."),
|
| 194 |
("human", "{standardized_exercise}")
|
| 195 |
],
|
|
@@ -198,7 +198,7 @@ template_distractors_brainstorm_1 = ChatPromptTemplate(
|
|
| 198 |
|
| 199 |
template_distractors_brainstorm_2 = ChatPromptTemplate(
|
| 200 |
messages=[
|
| 201 |
-
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with
|
| 202 |
"alternative answer options that are not correct, yet not so implausible that even poorly informed students would immediately dismiss them. Go about this very methodically: "
|
| 203 |
"Really try to think outside of the box and get creative here, providing potential alternative distractors across a wide range of options. "
|
| 204 |
"Before you present your final selection, take your time to really consider the entire solution space, weighing your different ideas an options, then to list the distractors. Make sure to use the same language as the existing exercise."),
|
|
@@ -211,13 +211,12 @@ template_distractors_brainstorm_2 = ChatPromptTemplate(
|
|
| 211 |
template_consolidate_distractors = ChatPromptTemplate(
|
| 212 |
messages=[
|
| 213 |
("system", "You are given several lists of potential distractors (answer options to a multiple choice exercise), that need to be consolidated into one list. "
|
| 214 |
-
"Filter out duplicates, do some logical sorting among them, and just return one plain list
|
| 215 |
-
"Only focus on the distractors (answer options) themselves, ignore any reasoning about them. Return only the list,
|
| 216 |
-
("human", "For context, this is the exercise that the distractors are about
|
| 217 |
"{standardized_exercise} "
|
| 218 |
-
""
|
| 219 |
-
"
|
| 220 |
-
"{brainstorm_outputs}")
|
| 221 |
],
|
| 222 |
input_variables=["standardized_exercise", "brainstorm_outputs", "final_distractors_specification"]
|
| 223 |
)
|
|
|
|
| 189 |
|
| 190 |
template_distractors_brainstorm_1 = ChatPromptTemplate(
|
| 191 |
messages=[
|
| 192 |
+
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with{intermediate_distractors_specification}additional high-quality distractors: "
|
| 193 |
"alternative answer options that are not correct, yet also not so implausible that even poorly informed students would immediately dismiss them. Make sure to use the same language as the existing exercise."),
|
| 194 |
("human", "{standardized_exercise}")
|
| 195 |
],
|
|
|
|
| 198 |
|
| 199 |
template_distractors_brainstorm_2 = ChatPromptTemplate(
|
| 200 |
messages=[
|
| 201 |
+
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with{intermediate_distractors_specification}additional high-quality distractors: "
|
| 202 |
"alternative answer options that are not correct, yet not so implausible that even poorly informed students would immediately dismiss them. Go about this very methodically: "
|
| 203 |
"Really try to think outside of the box and get creative here, providing potential alternative distractors across a wide range of options. "
|
| 204 |
"Before you present your final selection, take your time to really consider the entire solution space, weighing your different ideas an options, then to list the distractors. Make sure to use the same language as the existing exercise."),
|
|
|
|
| 211 |
template_consolidate_distractors = ChatPromptTemplate(
|
| 212 |
messages=[
|
| 213 |
("system", "You are given several lists of potential distractors (answer options to a multiple choice exercise), that need to be consolidated into one list. "
|
| 214 |
+
"Filter out duplicates, do some logical sorting among them, and just return one plain list{final_distractors_specification}. "
|
| 215 |
+
"Only focus on the distractors (answer options) themselves, ignore any reasoning about them. Return only the list, nothing else. Format the list without numbering or bullet points, just put every distractor on its own line. Use the same language as the existing exercise. "),
|
| 216 |
+
("human", "For context, this is the exercise that the distractors are about:\n "
|
| 217 |
"{standardized_exercise} "
|
| 218 |
+
"Here are the lists:\n "
|
| 219 |
+
"{brainstorm_outputs} ")
|
|
|
|
| 220 |
],
|
| 221 |
input_variables=["standardized_exercise", "brainstorm_outputs", "final_distractors_specification"]
|
| 222 |
)
|