BtB-ExpC commited on
Commit
f595ced
·
1 Parent(s): 0c92067

fix include_diagnosis, parameter order

Browse files
chains/exercises/run_fluster_with_diagnosis.py CHANGED
@@ -203,8 +203,10 @@ async def diagnose_and_fix_all(
203
  )
204
  diag_strings.append(diag_result)
205
 
 
 
206
  if "❌" in scorecard:
207
- ex_fixed = await fix_exercise(ex, scorecard)
208
  fixed_exs.append(ex_fixed)
209
  else:
210
  fixed_exs.append(ex)
 
203
  )
204
  diag_strings.append(diag_result)
205
 
206
+ fluster_config = chain_configs["fluster"]
207
+
208
  if "❌" in scorecard:
209
+ ex_fixed = await fix_exercise(ex, scorecard, fluster_config)
210
  fixed_exs.append(ex_fixed)
211
  else:
212
  fixed_exs.append(ex)
chains/exercises/runner_without.py CHANGED
@@ -108,8 +108,6 @@ async def run_fluster_no_diagnosis(
108
  template_write_b,
109
  llm_a,
110
  llm_b,
111
- # template_refine,
112
- # llm_refine,
113
  template_sanitize,
114
  llm_sanitize
115
  )
 
108
  template_write_b,
109
  llm_a,
110
  llm_b,
 
 
111
  template_sanitize,
112
  llm_sanitize
113
  )
config/system_prompt_texts.py CHANGED
@@ -672,6 +672,17 @@ If you're unsure about any of your distractors or "false statements" one way or
672
  After lots of iterative prep, trying out different things and reasoning through a wide range of potential options, finally return a complete exercise set of 1 bigger multiple choice exercise and 2 smaller True/False statements.
673
  """
674
 
 
 
 
 
 
 
 
 
 
 
 
675
  uitgangspunt_template_for_writing_a_fluster = """
676
  # Task outline
677
  Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.
 
672
  After lots of iterative prep, trying out different things and reasoning through a wide range of potential options, finally return a complete exercise set of 1 bigger multiple choice exercise and 2 smaller True/False statements.
673
  """
674
 
675
+ template_diagnose_distractor_partially_correct_text = """
676
+ You analyze a multiple-choice exercise to detect distractors that are
677
+ partially correct. Some answer choices may contain elements of truth, leading to
678
+ ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractors, in the context of this exercise, could be considered a (partially) correct answer?
679
+ After this, consider if this is bad enough in the context of this question. It's fine if the correct answer is still obviously most correct, and some distractors contain elements of truth, or are 'somewhat true but clearly less clue than the correct answer'. There is only a problem if the gap becomes too small and unclear.
680
+ As an intuition pump, ask this question: would there be any experts that would consider this distractors also a correct answer? If so, diagnose the problem. If not, it's fine.
681
+ Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
682
+ Do some reasoning first, and give your diagnosis then. All of your output should be measured and nuanced, except for your very final sentence where you clearly state your conclusion.
683
+ """
684
+
685
+
686
  uitgangspunt_template_for_writing_a_fluster = """
687
  # Task outline
688
  Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.
config/templates.py CHANGED
@@ -80,14 +80,7 @@ template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
80
 
81
  template_diagnose_distractor_partially_correct = ChatPromptTemplate(
82
  messages=[
83
- ("system", """You analyze a multiple-choice exercise to detect distractors that are
84
- partially correct. Some answer choices may contain elements of truth, leading to
85
- ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractors, in the context of this exercise, could be considered a (partially) correct answer?
86
- After this, consider if this is bad enough in the context of this question. It's fine if the correct answer is still obviously most correct, and some distractors contain elements of truth, or are 'somewhat true but clearly less clue than the correct answer'. There is only a problem if the gap becomes too small and unclear.
87
- As an intuition pump, ask this question: would there be any experts that would consider this distractors also a correct answer? If so, diagnose the problem. If not, it's fine.
88
- Your only focus is to accurately diagnose this issue, no need to provide a fix. Really take your time to arrive at the correct diagnosis.
89
- Do some reasoning first, and give your diagnosis then. All of your output should be measured and nuanced, except for your very final sentence where you clearly state your conclusion.
90
- """),
91
  ("human", "{standardized_exercise}")
92
  ],
93
  input_variables=["standardized_exercise"]
 
80
 
81
  template_diagnose_distractor_partially_correct = ChatPromptTemplate(
82
  messages=[
83
+ ("system", template_diagnose_distractor_partially_correct_text),
 
 
 
 
 
 
 
84
  ("human", "{standardized_exercise}")
85
  ],
86
  input_variables=["standardized_exercise"]
main.py CHANGED
@@ -95,8 +95,8 @@ with gr.Blocks() as interface:
95
  # Build write_fluster tab
96
  (model_choice_fluster_1,
97
  model_choice_fluster_2,
98
- exercises_input,
99
  include_diagnosis,
 
100
  write_fluster_button,
101
  [fluster_box_0, fluster_box_1, fluster_box_2, fluster_box_3],
102
  diagnosis_box_1,
 
95
  # Build write_fluster tab
96
  (model_choice_fluster_1,
97
  model_choice_fluster_2,
 
98
  include_diagnosis,
99
+ exercises_input,
100
  write_fluster_button,
101
  [fluster_box_0, fluster_box_1, fluster_box_2, fluster_box_3],
102
  diagnosis_box_1,