fix include_diagnosis, parameter order
Browse files- chains/exercises/run_fluster_with_diagnosis.py +3 -1
- chains/exercises/runner_without.py +0 -2
- config/system_prompt_texts.py +11 -0
- config/templates.py +1 -8
- main.py +1 -1
chains/exercises/run_fluster_with_diagnosis.py
CHANGED
|
@@ -203,8 +203,10 @@ async def diagnose_and_fix_all(
|
|
| 203 |
)
|
| 204 |
diag_strings.append(diag_result)
|
| 205 |
|
|
|
|
|
|
|
| 206 |
if "❌" in scorecard:
|
| 207 |
-
ex_fixed = await fix_exercise(ex, scorecard)
|
| 208 |
fixed_exs.append(ex_fixed)
|
| 209 |
else:
|
| 210 |
fixed_exs.append(ex)
|
|
|
|
| 203 |
)
|
| 204 |
diag_strings.append(diag_result)
|
| 205 |
|
| 206 |
+
fluster_config = chain_configs["fluster"]
|
| 207 |
+
|
| 208 |
if "❌" in scorecard:
|
| 209 |
+
ex_fixed = await fix_exercise(ex, scorecard, fluster_config)
|
| 210 |
fixed_exs.append(ex_fixed)
|
| 211 |
else:
|
| 212 |
fixed_exs.append(ex)
|
chains/exercises/runner_without.py
CHANGED
|
@@ -108,8 +108,6 @@ async def run_fluster_no_diagnosis(
|
|
| 108 |
template_write_b,
|
| 109 |
llm_a,
|
| 110 |
llm_b,
|
| 111 |
-
# template_refine,
|
| 112 |
-
# llm_refine,
|
| 113 |
template_sanitize,
|
| 114 |
llm_sanitize
|
| 115 |
)
|
|
|
|
| 108 |
template_write_b,
|
| 109 |
llm_a,
|
| 110 |
llm_b,
|
|
|
|
|
|
|
| 111 |
template_sanitize,
|
| 112 |
llm_sanitize
|
| 113 |
)
|
config/system_prompt_texts.py
CHANGED
|
@@ -672,6 +672,17 @@ If you're unsure about any of your distractors or "false statements" one way or
|
|
| 672 |
After lots of iterative prep, trying out different things and reasoning through a wide range of potential options, finally return a complete exercise set of 1 bigger multiple choice exercise and 2 smaller True/False statements.
|
| 673 |
"""
|
| 674 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
uitgangspunt_template_for_writing_a_fluster = """
|
| 676 |
# Task outline
|
| 677 |
Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.
|
|
|
|
| 672 |
After lots of iterative prep, trying out different things and reasoning through a wide range of potential options, finally return a complete exercise set of 1 bigger multiple choice exercise and 2 smaller True/False statements.
|
| 673 |
"""
|
| 674 |
|
| 675 |
+
template_diagnose_distractor_partially_correct_text = """
|
| 676 |
+
You analyze a multiple-choice exercise to detect distractors that are
|
| 677 |
+
partially correct. Some answer choices may contain elements of truth, leading to
|
| 678 |
+
ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractors, in the context of this exercise, could be considered a (partially) correct answer?
|
| 679 |
+
After this, consider if this is bad enough in the context of this question. It's fine if the correct answer is still obviously most correct, and some distractors contain elements of truth, or are 'somewhat true but clearly less clue than the correct answer'. There is only a problem if the gap becomes too small and unclear.
|
| 680 |
+
As an intuition pump, ask this question: would there be any experts that would consider this distractors also a correct answer? If so, diagnose the problem. If not, it's fine.
|
| 681 |
+
Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
|
| 682 |
+
Do some reasoning first, and give your diagnosis then. All of your output should be measured and nuanced, except for your very final sentence where you clearly state your conclusion.
|
| 683 |
+
"""
|
| 684 |
+
|
| 685 |
+
|
| 686 |
uitgangspunt_template_for_writing_a_fluster = """
|
| 687 |
# Task outline
|
| 688 |
Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.
|
config/templates.py
CHANGED
|
@@ -80,14 +80,7 @@ template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
|
|
| 80 |
|
| 81 |
template_diagnose_distractor_partially_correct = ChatPromptTemplate(
|
| 82 |
messages=[
|
| 83 |
-
("system",
|
| 84 |
-
partially correct. Some answer choices may contain elements of truth, leading to
|
| 85 |
-
ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractors, in the context of this exercise, could be considered a (partially) correct answer?
|
| 86 |
-
After this, consider if this is bad enough in the context of this question. It's fine if the correct answer is still obviously most correct, and some distractors contain elements of truth, or are 'somewhat true but clearly less clue than the correct answer'. There is only a problem if the gap becomes too small and unclear.
|
| 87 |
-
As an intuition pump, ask this question: would there be any experts that would consider this distractors also a correct answer? If so, diagnose the problem. If not, it's fine.
|
| 88 |
-
Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
|
| 89 |
-
Do some reasoning first, and give your diagnosis then. All of your output should be measured and nuanced, except for your very final sentence where you clearly state your conclusion.
|
| 90 |
-
"""),
|
| 91 |
("human", "{standardized_exercise}")
|
| 92 |
],
|
| 93 |
input_variables=["standardized_exercise"]
|
|
|
|
| 80 |
|
| 81 |
template_diagnose_distractor_partially_correct = ChatPromptTemplate(
|
| 82 |
messages=[
|
| 83 |
+
("system", template_diagnose_distractor_partially_correct_text),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
("human", "{standardized_exercise}")
|
| 85 |
],
|
| 86 |
input_variables=["standardized_exercise"]
|
main.py
CHANGED
|
@@ -95,8 +95,8 @@ with gr.Blocks() as interface:
|
|
| 95 |
# Build write_fluster tab
|
| 96 |
(model_choice_fluster_1,
|
| 97 |
model_choice_fluster_2,
|
| 98 |
-
exercises_input,
|
| 99 |
include_diagnosis,
|
|
|
|
| 100 |
write_fluster_button,
|
| 101 |
[fluster_box_0, fluster_box_1, fluster_box_2, fluster_box_3],
|
| 102 |
diagnosis_box_1,
|
|
|
|
| 95 |
# Build write_fluster tab
|
| 96 |
(model_choice_fluster_1,
|
| 97 |
model_choice_fluster_2,
|
|
|
|
| 98 |
include_diagnosis,
|
| 99 |
+
exercises_input,
|
| 100 |
write_fluster_button,
|
| 101 |
[fluster_box_0, fluster_box_1, fluster_box_2, fluster_box_3],
|
| 102 |
diagnosis_box_1,
|