BtB-ExpC commited on
Commit
a02fb0d
·
1 Parent(s): aefff3e

diagnosis prompts finished

Browse files
app.py CHANGED
@@ -141,7 +141,7 @@ with gr.Blocks() as interface:
141
  )
142
  diagnoser_input = gr.Textbox(label="Enter exercise(s) in any format", placeholder="Exercise body: <mc:exercise xmlns:mc= ...")
143
  diagnoser_button = gr.Button("Submit")
144
- diagnoser_response_1 = gr.Markdown(label="Response 1")
145
  diagnoser_response_2 = gr.Textbox(label="Response 2", interactive=False)
146
  diagnoser_response_3 = gr.Textbox(label="Response 3", interactive=False)
147
  diagnoser_response_4 = gr.Textbox(label="Response 4", interactive=False)
 
141
  )
142
  diagnoser_input = gr.Textbox(label="Enter exercise(s) in any format", placeholder="Exercise body: <mc:exercise xmlns:mc= ...")
143
  diagnoser_button = gr.Button("Submit")
144
+ diagnoser_response_1 = gr.Textbox(label="Response 1", interactive=False)
145
  diagnoser_response_2 = gr.Textbox(label="Response 2", interactive=False)
146
  diagnoser_response_3 = gr.Textbox(label="Response 3", interactive=False)
147
  diagnoser_response_4 = gr.Textbox(label="Response 4", interactive=False)
chains/diagnoser_chain.py CHANGED
@@ -1,4 +1,5 @@
1
  # chains/diagnoser_chain.py
 
2
  from pydantic import BaseModel
3
  from typing import Any
4
  from langchain_core.prompts.chat import ChatPromptTemplate
@@ -14,20 +15,33 @@ class DiagnoserChain(BaseModel):
14
  async def run(self, user_query: str, exercise_format: str) -> str:
15
  """
16
  Runs the composite chain:
17
- 1. Standardizes the exercise formatting (if exercise_format isn't Raw).
18
- 2. Generates a diagnosis from the standardized format.
 
19
  """
20
- # --- Step 1: Standardize the exercise formatting (if exercise_format isn't 'Raw (original)') ---
21
  standardized_exercise = await standardize_exercise(
22
  user_query, exercise_format, self.template_standardize, self.llm_standardize
23
  )
24
 
25
- # --- Step 2: Generate a diagnosis using the standardized exercise ---
26
- prompt_diagnose = await self.template_diagnose.aformat_prompt(standardized_exercise=standardized_exercise)
27
- diagnose_messages = prompt_diagnose.to_messages()
28
- diagnosis = await self.llm_diagnose.ainvoke(diagnose_messages)
 
 
 
29
 
30
- return diagnosis.content if hasattr(diagnosis, "content") else diagnosis
 
 
 
 
 
 
 
 
 
31
 
32
  class Config:
33
  arbitrary_types_allowed = True
 
1
  # chains/diagnoser_chain.py
2
+ import asyncio
3
  from pydantic import BaseModel
4
  from typing import Any
5
  from langchain_core.prompts.chat import ChatPromptTemplate
 
15
  async def run(self, user_query: str, exercise_format: str) -> str:
16
  """
17
  Runs the composite chain:
18
+ 1. Standardizes the exercise formatting.
19
+ 2. Feeds the standardized exercise into multiple diagnosis prompts in parallel.
20
+ 3. Combines the outputs from all prompts.
21
  """
22
+ # Step 1: Standardize the exercise.
23
  standardized_exercise = await standardize_exercise(
24
  user_query, exercise_format, self.template_standardize, self.llm_standardize
25
  )
26
 
27
+ # Step 2: Define an async helper to run a single diagnosis prompt.
28
+ async def run_single(template: ChatPromptTemplate, idx: int) -> str:
29
+ prompt = await template.aformat_prompt(standardized_exercise=standardized_exercise)
30
+ messages = prompt.to_messages()
31
+ diagnosis_response = await self.llm_diagnose.ainvoke(messages)
32
+ content = diagnosis_response.content if hasattr(diagnosis_response, "content") else diagnosis_response
33
+ return f"**Diagnosis {idx}:**\n{content}"
34
 
35
+ # Launch all diagnosis tasks concurrently.
36
+ tasks = [
37
+ run_single(template, idx)
38
+ for idx, template in enumerate(self.templates_diagnose, start=1)
39
+ ]
40
+ diagnoses = await asyncio.gather(*tasks)
41
+
42
+ # Step 3: Combine the outputs from each prompt.
43
+ combined_diagnosis = "\n\n---\n\n".join(diagnoses)
44
+ return combined_diagnosis
45
 
46
  class Config:
47
  arbitrary_types_allowed = True
config/chain_configs.py CHANGED
@@ -10,9 +10,14 @@ chain_configs = {
10
  "class": DiagnoserChain,
11
  "template_standardize": standardize_template,
12
  "llm_standardize": llms["GPT-4o-mini"], # Always fixed
13
- "template_diagnose": diagnose_template,
 
 
 
 
 
 
14
  "llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
15
-
16
  },
17
  "distractors": {
18
  "class": DistractorsChain,
 
10
  "class": DiagnoserChain,
11
  "template_standardize": standardize_template,
12
  "llm_standardize": llms["GPT-4o-mini"], # Always fixed
13
+ # Provide a list of 4 different diagnosis templates:
14
+ "templates_diagnose": [
15
+ diagnose_double_negation_template,
16
+ diagnose_correct_answer_stands_out_template,
17
+ diagnose_distractor_clearly_wrong_template,
18
+ diagnose_distractor_partially_correct_template,
19
+ ],
20
  "llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
 
21
  },
22
  "distractors": {
23
  "class": DistractorsChain,
config/templates.py CHANGED
@@ -13,16 +13,152 @@ standardize_template = ChatPromptTemplate(
13
  # Template to generate a diagnosis from the standardized exercise.
14
  diagnose_template = ChatPromptTemplate(
15
  messages=[
16
- ("system", "You are a diagnostic assistant. Based on the given exercise(s), provide a detailed diagnosis of potential issues. What makes this exercise sub-par, worse than it could be, not yet perfect? Only give the diagnosis, no solutions."),
17
  ("human", "{standardized_exercise}")
18
  ],
19
  input_variables=["standardized_exercise"]
20
  )
21
 
22
- # Template for the distractors brainstorm (a single-step chain).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  distractors_template = ChatPromptTemplate(
24
  messages=[
25
- ("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with 10 additional distractors: alternative answer options that are not correct, yet plausible enough that a poorly informed student might pick them. Vary the degree of 'almost correctness' and 'clearly incorrectness' between them to provide a wide range of options."),
 
 
26
  ("human", "{user_input}")
27
  ],
28
  input_variables=["standardized_exercise"]
 
13
  # Template to generate a diagnosis from the standardized exercise.
14
  diagnose_template = ChatPromptTemplate(
15
  messages=[
16
+ ("system", "Based on the given exercise, provide a detailed diagnosis of potential issues. What makes this exercise sub-par, worse than it could be, not yet perfect? Only give the diagnosis, no solutions."),
17
  ("human", "{standardized_exercise}")
18
  ],
19
  input_variables=["standardized_exercise"]
20
  )
21
 
22
+ diagnose_double_negation_template = ChatPromptTemplate(
23
+ messages=[
24
+ ("system", """You analyze a multiple-choice exercise for the presence of double negatives.
25
+ Here are some examples of double negatives:
26
+
27
+ <example 1>
28
+ <exercise 1>
29
+ Stelling
30
+ Expertfolio wordt niet aangeboden door ENI.
31
+
32
+ Keuzeopties:
33
+ 1. Deze stelling is niet correct
34
+ 2. Deze stelling is correct
35
+
36
+ Correct antwoord:
37
+ 1. Deze stelling is niet correct
38
+ </exercise 1>
39
+ <double negative explanation>
40
+ Een niet-correctvraag met 'niet' (het is niet correct dat Expertfolio niet wordt aangeboden) is een dubbele ontkenning.
41
+ </double negative explanation>
42
+ </example 1>
43
+
44
+ <example 2>
45
+ <exercise 2>
46
+ Vraag
47
+ Welk aspect hoort niet bij eenzaamheid?
48
+
49
+ Keuzeopties:
50
+ 1. Betekenisvolle relaties hebben
51
+ 2. Depressiviteit en angst
52
+ 3. Veel alleen zijn
53
+ 4. Geen lijfelijk contact hebben
54
+
55
+ Correct antwoord:
56
+ Het ontbreken van betekenisvolle relaties
57
+ </exercise 2>
58
+ <double negative explanation>
59
+ In de vraag staat al 'niet'. In keuzeoptie 4 staat ook nog 'geen', dat is dus een dubbele ontkenning.
60
+ </double negative explanation>
61
+ </example 2>.
62
+ If it's obvious that there is or isn't a double negative in this exercise, just give a short one-sentence diagnosis on this.
63
+ If you're not quite sure, do some reasoning first, and give your diagnosis then."""),
64
+ ("human", "{standardized_exercise}")
65
+ ],
66
+ input_variables=["standardized_exercise"]
67
+ )
68
+
69
+ diagnose_correct_answer_stands_out_template = ChatPromptTemplate(
70
+ messages=[
71
+ ("system", """You evaluate a multiple-choice exercise to determine if the correct answer
72
+ stands out too much compared to the distractors. If the correct answer is significantly
73
+ longer, more detailed, or structurally or grammatically different, this is undesirable. Identify such
74
+ cases.
75
+ Here are some examples of cases where the correct answer stands out:
76
+
77
+ <example where the correct answer is much longer>
78
+ <exercise>
79
+ Theorie:
80
+ De volgende afbeelding komt uit een onderzoek over eenzaamheid dat in 2012 is uitgevoerd.
81
+
82
+ Vraag:
83
+ Bij welke groep komt eenzaamheid volgens dit onderzoek het vaakst voor?
84
+
85
+ 1. Gehandicapten
86
+ 2. Mantelzorgers
87
+ 3. Mensen met langdurige psychische aandoeningen
88
+ 4. Sporters
89
+
90
+ Correct antwoord:
91
+ 3. Mensen met langdurige psychische aandoeningen.
92
+ </exercise>
93
+ <explanation how the correct answer stands out>
94
+ Alle afleiders zijn 1 woord (kort), terwijl het correcte antwoord een zin is (duidelijk langer).
95
+ </explanation how the correct answer stands out>
96
+ </example where X>
97
+
98
+ <example where the correct answer is grammatically different>
99
+ <exercise>
100
+ Vraag: Wat is alimentatie?
101
+
102
+ 1. Geld dat betaald moet worden na een scheiding
103
+ 2. Een lening van de overheid
104
+ 3. Een maandelijkse bijdrage aan liefdadigheid
105
+ 4. Een belastingteruggave
106
+
107
+ Correct antwoord:
108
+ 1. Geld dat betaald moet worden na een scheiding of als men niet meer samen is met de andere ouder van de kinderen.
109
+
110
+ </exercise>
111
+ <explanation how the correct answer stands out>
112
+ Alle afleiders beginnen met "Een", maar het correcte antwoord begint anders.
113
+ </explanation how the correct answer stands out>
114
+ </example where the correct answer is grammatically different>
115
+
116
+ Your only focus is to accurately diagnose this issue, no need to provide a fix. If the correct answer in the given exercise clearly does or does not stand out, just give a short one-sentence diagnosis on this.
117
+ If you're not quite sure, do some reasoning first, and give your diagnosis then."""),
118
+ ("human", "{standardized_exercise}")
119
+ ],
120
+ input_variables=["standardized_exercise"]
121
+ )
122
+
123
+ # <example where X>
124
+ # <exercise>
125
+ # </exercise>
126
+ # <explanation how the correct answer stands out>
127
+ # </explanation how the correct answer stands out>
128
+ # </example where X>
129
+
130
+ diagnose_distractor_clearly_wrong_template = ChatPromptTemplate(
131
+ messages=[
132
+ ("system", """You assess a multiple-choice exercise to determine if any distractors
133
+ are clearly incorrect and therefore too easy to eliminate. Effective distractors should
134
+ be plausible but incorrect.
135
+ Identify distractors that are obviously wrong, such that even students that are completely uninformed about the topic can eliminate them.
136
+ Your only focus is to accurately diagnose this issue, no need to provide a fix. If all distractors in the given exercise clearly are or aren't obviously incorrect, just give a short one-sentence diagnosis on this.
137
+ If you're not quite sure, do some reasoning first, and give your diagnosis then."""),
138
+ ("human", "{standardized_exercise}")
139
+ ],
140
+ input_variables=["standardized_exercise"]
141
+ )
142
+
143
+ diagnose_distractor_partially_correct_template = ChatPromptTemplate(
144
+ messages=[
145
+ ("system", """You analyze a multiple-choice exercise to detect distractors that are
146
+ partially correct. Some answer choices may contain elements of truth, leading to
147
+ ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractor, in the context of this exercise, could be considered a (partially) correct answer?
148
+ Your only focus is to accurately diagnose this issue, no need to provide a fix. If all distractors in the given exercise clearly are or aren't unambiguously false, just give a short one-sentence diagnosis on this.
149
+ If you're not quite sure, do some reasoning first, and give your diagnosis then.
150
+ """),
151
+ ("human", "{standardized_exercise}")
152
+ ],
153
+ input_variables=["standardized_exercise"]
154
+ )
155
+
156
+ # Template for the distractors brainstorm
157
  distractors_template = ChatPromptTemplate(
158
  messages=[
159
+ ("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with 10 additional distractors: "
160
+ "alternative answer options that are not correct, yet plausible enough that a poorly informed student might pick them. "
161
+ "Vary the degree of 'almost correctness' and 'clearly incorrectness' between them to provide a wide range of options."),
162
  ("human", "{user_input}")
163
  ],
164
  input_variables=["standardized_exercise"]