BtB-ExpC commited on
Commit
40f7f2d
Β·
1 Parent(s): 19d0e21

scorecard step added to diagnosis

Browse files
chains/diagnoser_chain.py CHANGED
@@ -8,16 +8,19 @@ from config.exercise_standardizer import standardize_exercise
8
 
9
  class DiagnoserChain(BaseModel):
10
  template_standardize: ChatPromptTemplate
11
- llm_standardize: Any # Fixed LLM for step 1
12
  templates_diagnose: List[ChatPromptTemplate]
13
  llm_diagnose: Any # User-selectable LLM for step 2
 
14
 
15
  async def run(self, user_query: str, exercise_format: str) -> str:
16
  """
17
  Runs the composite chain:
18
- 1. Standardizes the exercise formatting.
19
- 2. Feeds the standardized exercise into multiple diagnosis prompts in parallel.
20
- 3. Combines the outputs from all prompts.
 
 
21
  """
22
  # Step 1: Standardize the exercise.
23
  standardized_exercise = await standardize_exercise(
@@ -25,23 +28,30 @@ class DiagnoserChain(BaseModel):
25
  )
26
 
27
  # Step 2: Define an async helper to run a single diagnosis prompt.
28
- async def run_single(template: ChatPromptTemplate, idx: int) -> str:
29
  prompt = await template.aformat_prompt(standardized_exercise=standardized_exercise)
30
  messages = prompt.to_messages()
31
  diagnosis_response = await self.llm_diagnose.ainvoke(messages)
32
  content = diagnosis_response.content if hasattr(diagnosis_response, "content") else diagnosis_response
33
- return f"**Diagnosis {idx}:**\n{content}"
34
 
35
  # Launch all diagnosis tasks concurrently.
36
  tasks = [
37
- run_single(template, idx)
38
  for idx, template in enumerate(self.templates_diagnose, start=1)
39
  ]
40
  diagnoses = await asyncio.gather(*tasks)
41
 
42
  # Step 3: Combine the outputs from each prompt.
43
- combined_diagnosis = "\n\n---\n\n".join(diagnoses)
44
- return combined_diagnosis
 
 
 
 
 
 
 
45
 
46
  class Config:
47
  arbitrary_types_allowed = True
 
8
 
9
  class DiagnoserChain(BaseModel):
10
  template_standardize: ChatPromptTemplate
11
+ llm_standardize: Any # Fixed LLM for step 1 and 3
12
  templates_diagnose: List[ChatPromptTemplate]
13
  llm_diagnose: Any # User-selectable LLM for step 2
14
+ template_diagnose_scorecard: ChatPromptTemplate
15
 
16
  async def run(self, user_query: str, exercise_format: str) -> str:
17
  """
18
  Runs the composite chain:
19
+ 1. Standardizes the exercise formatting
20
+ 2. Feeds the standardized exercise into multiple diagnosis prompts in parallel
21
+ 3. Combines the outputs from each prompt.
22
+ 4. Generates one-line scorecard of combined diagnoses
23
+
24
  """
25
  # Step 1: Standardize the exercise.
26
  standardized_exercise = await standardize_exercise(
 
28
  )
29
 
30
  # Step 2: Define an async helper to run a single diagnosis prompt.
31
+ async def run_single_diagnosis(template: ChatPromptTemplate, idx: int) -> str:
32
  prompt = await template.aformat_prompt(standardized_exercise=standardized_exercise)
33
  messages = prompt.to_messages()
34
  diagnosis_response = await self.llm_diagnose.ainvoke(messages)
35
  content = diagnosis_response.content if hasattr(diagnosis_response, "content") else diagnosis_response
36
+ return f"[DIAGNOSIS {idx}]{content}"
37
 
38
  # Launch all diagnosis tasks concurrently.
39
  tasks = [
40
+ run_single_diagnosis(template, idx)
41
  for idx, template in enumerate(self.templates_diagnose, start=1)
42
  ]
43
  diagnoses = await asyncio.gather(*tasks)
44
 
45
  # Step 3: Combine the outputs from each prompt.
46
+ combined_diagnosis = "\n\n---\n".join(diagnoses)
47
+
48
+ # Step 4: Generate scorecard
49
+ prompt = await self.template_diagnose_scorecard.aformat_prompt(combined_diagnosis=combined_diagnosis)
50
+ scorecard_messages = prompt.to_messages()
51
+ scorecard_response = await self.llm_diagnose.ainvoke(scorecard_messages)
52
+ scorecard = scorecard_response.content if hasattr(scorecard_response, "content") else scorecard_response
53
+
54
+ return scorecard + "\n" + combined_diagnosis
55
 
56
  class Config:
57
  arbitrary_types_allowed = True
config/chain_configs.py CHANGED
@@ -1,7 +1,8 @@
1
  # config/chain_configs.py
2
  from config.templates import standardize_template, diagnose_template, distractors_template, \
3
- diagnose_double_negation_template, diagnose_correct_answer_stands_out_template, \
4
- diagnose_distractor_clearly_wrong_template, diagnose_distractor_partially_correct_template
 
5
  from chains.diagnoser_chain import DiagnoserChain
6
  from chains.distractors_chain import DistractorsChain
7
  from config.llm_config import llms
@@ -12,13 +13,14 @@ chain_configs = {
12
  "class": DiagnoserChain,
13
  "template_standardize": standardize_template,
14
  "llm_standardize": llms["GPT-4o-mini"], # Always fixed
15
- # Provide a list of 4 different diagnosis templates:
16
  "templates_diagnose": [
17
- diagnose_double_negation_template,
18
- diagnose_correct_answer_stands_out_template,
19
- diagnose_distractor_clearly_wrong_template,
20
- diagnose_distractor_partially_correct_template,
21
  ],
 
22
  "llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
23
  },
24
  "distractors": {
 
1
  # config/chain_configs.py
2
  from config.templates import standardize_template, diagnose_template, distractors_template, \
3
+ template_diagnose_double_negation, template_diagnose_correct_answer_stands_out, \
4
+ template_diagnose_distractor_clearly_wrong, template_diagnose_distractor_partially_correct, \
5
+ diagnose_scorecard_template
6
  from chains.diagnoser_chain import DiagnoserChain
7
  from chains.distractors_chain import DistractorsChain
8
  from config.llm_config import llms
 
13
  "class": DiagnoserChain,
14
  "template_standardize": standardize_template,
15
  "llm_standardize": llms["GPT-4o-mini"], # Always fixed
16
+ # 4 different diagnosis templates (to run in parallel:
17
  "templates_diagnose": [
18
+ template_diagnose_double_negation,
19
+ template_diagnose_correct_answer_stands_out,
20
+ template_diagnose_distractor_clearly_wrong,
21
+ template_diagnose_distractor_partially_correct,
22
  ],
23
+ "template_diagnose_scorecard": diagnose_scorecard_template,
24
  "llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
25
  },
26
  "distractors": {
config/templates.py CHANGED
@@ -19,7 +19,7 @@ diagnose_template = ChatPromptTemplate(
19
  input_variables=["standardized_exercise"]
20
  )
21
 
22
- diagnose_double_negation_template = ChatPromptTemplate(
23
  messages=[
24
  ("system", """You analyze a multiple-choice exercise for the presence of double negatives.
25
  Here are some examples of double negatives:
@@ -66,7 +66,7 @@ diagnose_double_negation_template = ChatPromptTemplate(
66
  input_variables=["standardized_exercise"]
67
  )
68
 
69
- diagnose_correct_answer_stands_out_template = ChatPromptTemplate(
70
  messages=[
71
  ("system", """You evaluate a multiple-choice exercise to determine if the correct answer
72
  stands out too much compared to the distractors. If the correct answer is significantly
@@ -127,7 +127,7 @@ diagnose_correct_answer_stands_out_template = ChatPromptTemplate(
127
  # </explanation how the correct answer stands out>
128
  # </example where X>
129
 
130
- diagnose_distractor_clearly_wrong_template = ChatPromptTemplate(
131
  messages=[
132
  ("system", """You assess a multiple-choice exercise to determine if any distractors
133
  are clearly incorrect and therefore too easy to eliminate. Effective distractors should
@@ -140,7 +140,7 @@ diagnose_distractor_clearly_wrong_template = ChatPromptTemplate(
140
  input_variables=["standardized_exercise"]
141
  )
142
 
143
- diagnose_distractor_partially_correct_template = ChatPromptTemplate(
144
  messages=[
145
  ("system", """You analyze a multiple-choice exercise to detect distractors that are
146
  partially correct. Some answer choices may contain elements of truth, leading to
@@ -153,6 +153,35 @@ diagnose_distractor_partially_correct_template = ChatPromptTemplate(
153
  input_variables=["standardized_exercise"]
154
  )
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  # Template for the distractors brainstorm
157
  distractors_template = ChatPromptTemplate(
158
  messages=[
 
19
  input_variables=["standardized_exercise"]
20
  )
21
 
22
+ template_diagnose_double_negation = ChatPromptTemplate(
23
  messages=[
24
  ("system", """You analyze a multiple-choice exercise for the presence of double negatives.
25
  Here are some examples of double negatives:
 
66
  input_variables=["standardized_exercise"]
67
  )
68
 
69
+ template_diagnose_correct_answer_stands_out = ChatPromptTemplate(
70
  messages=[
71
  ("system", """You evaluate a multiple-choice exercise to determine if the correct answer
72
  stands out too much compared to the distractors. If the correct answer is significantly
 
127
  # </explanation how the correct answer stands out>
128
  # </example where X>
129
 
130
+ template_diagnose_distractor_clearly_wrong = ChatPromptTemplate(
131
  messages=[
132
  ("system", """You assess a multiple-choice exercise to determine if any distractors
133
  are clearly incorrect and therefore too easy to eliminate. Effective distractors should
 
140
  input_variables=["standardized_exercise"]
141
  )
142
 
143
+ template_diagnose_distractor_partially_correct = ChatPromptTemplate(
144
  messages=[
145
  ("system", """You analyze a multiple-choice exercise to detect distractors that are
146
  partially correct. Some answer choices may contain elements of truth, leading to
 
153
  input_variables=["standardized_exercise"]
154
  )
155
 
156
+ diagnose_scorecard_template = ChatPromptTemplate(
157
+ messages=[
158
+ ("system", """You analyze the results of the diagnoses of 4 issues, and consolidate that into a very simple one-line visual scorecard that summarizes all diagnoses, immediately giving an overview of the 4 results.
159
+ Use these two icons:
160
+ - βœ… means the diagnosis came back negative, the issues is not present.
161
+ - ❌ means the diagnosis came back positive, the issues is present.
162
+ (and a third icon if need be: - ❔ means you don't understand the diagnosis result)
163
+ The scorecard should always look like this:
164
+ <template>
165
+ |Double negative: [icon] |Correct answer stands out: [icon] |Distractor clearly false: [icon] |Distractor kinda correct: [icon] |
166
+ </template>
167
+ <example 1>
168
+ |Double negative:βœ…||Correct answer stands out:βœ…||Distractor clearly false:βœ…||Distractor kinda correct:βœ…|
169
+ </example1 >
170
+ <example 2>
171
+ |Double negative:βœ…||Correct answer stands out:❌||Distractor clearly false:βœ…||Distractor kinda correct:❌|
172
+ </example 2>
173
+ <example 3>
174
+ |Double negative:❌||Correct answer stands out:❌||Distractor clearly false:βœ…||Distractor kinda correct:❔|
175
+ </example 3>
176
+ <example 4>
177
+ |Double negative:βœ…||Correct answer stands out:βœ…||Distractor clearly false:❌||Distractor kinda correct:βœ…|
178
+ </example 4>
179
+ """),
180
+ ("human", "{combined_diagnosis}")
181
+ ],
182
+ input_variables=["combined_diagnosis"]
183
+ )
184
+
185
  # Template for the distractors brainstorm
186
  distractors_template = ChatPromptTemplate(
187
  messages=[