Spaces:

BtB-ExpC
/

Exercises

Sleeping

App Files Files Community

BtB-ExpC commited on Feb 20, 2025

Commit

f595ced

1 Parent(s): 0c92067

fix include_diagnosis, parameter order

Browse files

Files changed (5) hide show

chains/exercises/run_fluster_with_diagnosis.py +3 -1
chains/exercises/runner_without.py +0 -2
config/system_prompt_texts.py +11 -0
config/templates.py +1 -8
main.py +1 -1

chains/exercises/run_fluster_with_diagnosis.py CHANGED Viewed

@@ -203,8 +203,10 @@ async def diagnose_and_fix_all(
         )
         diag_strings.append(diag_result)
         if "❌" in scorecard:
-            ex_fixed = await fix_exercise(ex, scorecard)
             fixed_exs.append(ex_fixed)
         else:
             fixed_exs.append(ex)

         )
         diag_strings.append(diag_result)
+        fluster_config = chain_configs["fluster"]
         if "❌" in scorecard:
+            ex_fixed = await fix_exercise(ex, scorecard, fluster_config)
             fixed_exs.append(ex_fixed)
         else:
             fixed_exs.append(ex)

chains/exercises/runner_without.py CHANGED Viewed

@@ -108,8 +108,6 @@ async def run_fluster_no_diagnosis(
             template_write_b,
             llm_a,
             llm_b,
-            # template_refine,
-            # llm_refine,
             template_sanitize,
             llm_sanitize
         )

             template_write_b,
             llm_a,
             llm_b,
             template_sanitize,
             llm_sanitize
         )

config/system_prompt_texts.py CHANGED Viewed

@@ -672,6 +672,17 @@ If you're unsure about any of your distractors or "false statements" one way or
 After lots of iterative prep, trying out different things and reasoning through a wide range of potential options, finally return a complete exercise set of 1 bigger multiple choice exercise and 2 smaller True/False statements.
 """
 uitgangspunt_template_for_writing_a_fluster = """
 # Task outline
 Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.

 After lots of iterative prep, trying out different things and reasoning through a wide range of potential options, finally return a complete exercise set of 1 bigger multiple choice exercise and 2 smaller True/False statements.
 """
+template_diagnose_distractor_partially_correct_text = """
+You analyze a multiple-choice exercise to detect distractors that are
+partially correct. Some answer choices may contain elements of truth, leading to
+ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractors, in the context of this exercise, could be considered a (partially) correct answer?
+After this, consider if this is bad enough in the context of this question. It's fine if the correct answer is still obviously most correct, and some distractors contain elements of truth, or are 'somewhat true but clearly less clue than the correct answer'. There is only a problem if the gap becomes too small and unclear.
+As an intuition pump, ask this question: would there be any experts that would consider this distractors also a correct answer? If so, diagnose the problem. If not, it's fine.
+Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
+Do some reasoning first, and give your diagnosis then. All of your output should be measured and nuanced, except for your very final sentence where you clearly state your conclusion.
+"""
 uitgangspunt_template_for_writing_a_fluster = """
 # Task outline
 Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.

config/templates.py CHANGED Viewed

@@ -80,14 +80,7 @@ template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
 template_diagnose_distractor_partially_correct = ChatPromptTemplate(
     messages=[
-        ("system", """You analyze a multiple-choice exercise to detect distractors that are
-        partially correct. Some answer choices may contain elements of truth, leading to
-        ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractors, in the context of this exercise, could be considered a (partially) correct answer?
-        After this, consider if this is bad enough in the context of this question. It's fine if the correct answer is still obviously most correct, and some distractors contain elements of truth, or are 'somewhat true but clearly less clue than the correct answer'. There is only a problem if the gap becomes too small and unclear.
-        As an intuition pump, ask this question: would there be any experts that would consider this distractors also a correct answer? If so, diagnose the problem. If not, it's fine.
-        Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
-        Do some reasoning first, and give your diagnosis then. All of your output should be measured and nuanced, except for your very final sentence where you clearly state your conclusion.
-        """),
         ("human", "{standardized_exercise}")
     ],
     input_variables=["standardized_exercise"]

 template_diagnose_distractor_partially_correct = ChatPromptTemplate(
     messages=[
+        ("system", template_diagnose_distractor_partially_correct_text),
         ("human", "{standardized_exercise}")
     ],
     input_variables=["standardized_exercise"]

main.py CHANGED Viewed

@@ -95,8 +95,8 @@ with gr.Blocks() as interface:
             # Build write_fluster tab
             (model_choice_fluster_1,
              model_choice_fluster_2,
-             exercises_input,
              include_diagnosis,
              write_fluster_button,
              [fluster_box_0, fluster_box_1, fluster_box_2, fluster_box_3],
              diagnosis_box_1,

             # Build write_fluster tab
             (model_choice_fluster_1,
              model_choice_fluster_2,
              include_diagnosis,
+             exercises_input,
              write_fluster_button,
              [fluster_box_0, fluster_box_1, fluster_box_2, fluster_box_3],
              diagnosis_box_1,