diagnosis prompts finished
Browse files- app.py +1 -1
- chains/diagnoser_chain.py +22 -8
- config/chain_configs.py +7 -2
- config/templates.py +139 -3
app.py
CHANGED
|
@@ -141,7 +141,7 @@ with gr.Blocks() as interface:
|
|
| 141 |
)
|
| 142 |
diagnoser_input = gr.Textbox(label="Enter exercise(s) in any format", placeholder="Exercise body: <mc:exercise xmlns:mc= ...")
|
| 143 |
diagnoser_button = gr.Button("Submit")
|
| 144 |
-
diagnoser_response_1 = gr.
|
| 145 |
diagnoser_response_2 = gr.Textbox(label="Response 2", interactive=False)
|
| 146 |
diagnoser_response_3 = gr.Textbox(label="Response 3", interactive=False)
|
| 147 |
diagnoser_response_4 = gr.Textbox(label="Response 4", interactive=False)
|
|
|
|
| 141 |
)
|
| 142 |
diagnoser_input = gr.Textbox(label="Enter exercise(s) in any format", placeholder="Exercise body: <mc:exercise xmlns:mc= ...")
|
| 143 |
diagnoser_button = gr.Button("Submit")
|
| 144 |
+
diagnoser_response_1 = gr.Textbox(label="Response 1", interactive=False)
|
| 145 |
diagnoser_response_2 = gr.Textbox(label="Response 2", interactive=False)
|
| 146 |
diagnoser_response_3 = gr.Textbox(label="Response 3", interactive=False)
|
| 147 |
diagnoser_response_4 = gr.Textbox(label="Response 4", interactive=False)
|
chains/diagnoser_chain.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# chains/diagnoser_chain.py
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from typing import Any
|
| 4 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
|
@@ -14,20 +15,33 @@ class DiagnoserChain(BaseModel):
|
|
| 14 |
async def run(self, user_query: str, exercise_format: str) -> str:
|
| 15 |
"""
|
| 16 |
Runs the composite chain:
|
| 17 |
-
1. Standardizes the exercise formatting
|
| 18 |
-
2.
|
|
|
|
| 19 |
"""
|
| 20 |
-
#
|
| 21 |
standardized_exercise = await standardize_exercise(
|
| 22 |
user_query, exercise_format, self.template_standardize, self.llm_standardize
|
| 23 |
)
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
class Config:
|
| 33 |
arbitrary_types_allowed = True
|
|
|
|
| 1 |
# chains/diagnoser_chain.py
|
| 2 |
+
import asyncio
|
| 3 |
from pydantic import BaseModel
|
| 4 |
from typing import Any
|
| 5 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
|
|
|
| 15 |
async def run(self, user_query: str, exercise_format: str) -> str:
|
| 16 |
"""
|
| 17 |
Runs the composite chain:
|
| 18 |
+
1. Standardizes the exercise formatting.
|
| 19 |
+
2. Feeds the standardized exercise into multiple diagnosis prompts in parallel.
|
| 20 |
+
3. Combines the outputs from all prompts.
|
| 21 |
"""
|
| 22 |
+
# Step 1: Standardize the exercise.
|
| 23 |
standardized_exercise = await standardize_exercise(
|
| 24 |
user_query, exercise_format, self.template_standardize, self.llm_standardize
|
| 25 |
)
|
| 26 |
|
| 27 |
+
# Step 2: Define an async helper to run a single diagnosis prompt.
|
| 28 |
+
async def run_single(template: ChatPromptTemplate, idx: int) -> str:
|
| 29 |
+
prompt = await template.aformat_prompt(standardized_exercise=standardized_exercise)
|
| 30 |
+
messages = prompt.to_messages()
|
| 31 |
+
diagnosis_response = await self.llm_diagnose.ainvoke(messages)
|
| 32 |
+
content = diagnosis_response.content if hasattr(diagnosis_response, "content") else diagnosis_response
|
| 33 |
+
return f"**Diagnosis {idx}:**\n{content}"
|
| 34 |
|
| 35 |
+
# Launch all diagnosis tasks concurrently.
|
| 36 |
+
tasks = [
|
| 37 |
+
run_single(template, idx)
|
| 38 |
+
for idx, template in enumerate(self.templates_diagnose, start=1)
|
| 39 |
+
]
|
| 40 |
+
diagnoses = await asyncio.gather(*tasks)
|
| 41 |
+
|
| 42 |
+
# Step 3: Combine the outputs from each prompt.
|
| 43 |
+
combined_diagnosis = "\n\n---\n\n".join(diagnoses)
|
| 44 |
+
return combined_diagnosis
|
| 45 |
|
| 46 |
class Config:
|
| 47 |
arbitrary_types_allowed = True
|
config/chain_configs.py
CHANGED
|
@@ -10,9 +10,14 @@ chain_configs = {
|
|
| 10 |
"class": DiagnoserChain,
|
| 11 |
"template_standardize": standardize_template,
|
| 12 |
"llm_standardize": llms["GPT-4o-mini"], # Always fixed
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
|
| 15 |
-
|
| 16 |
},
|
| 17 |
"distractors": {
|
| 18 |
"class": DistractorsChain,
|
|
|
|
| 10 |
"class": DiagnoserChain,
|
| 11 |
"template_standardize": standardize_template,
|
| 12 |
"llm_standardize": llms["GPT-4o-mini"], # Always fixed
|
| 13 |
+
# Provide a list of 4 different diagnosis templates:
|
| 14 |
+
"templates_diagnose": [
|
| 15 |
+
diagnose_double_negation_template,
|
| 16 |
+
diagnose_correct_answer_stands_out_template,
|
| 17 |
+
diagnose_distractor_clearly_wrong_template,
|
| 18 |
+
diagnose_distractor_partially_correct_template,
|
| 19 |
+
],
|
| 20 |
"llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
|
|
|
|
| 21 |
},
|
| 22 |
"distractors": {
|
| 23 |
"class": DistractorsChain,
|
config/templates.py
CHANGED
|
@@ -13,16 +13,152 @@ standardize_template = ChatPromptTemplate(
|
|
| 13 |
# Template to generate a diagnosis from the standardized exercise.
|
| 14 |
diagnose_template = ChatPromptTemplate(
|
| 15 |
messages=[
|
| 16 |
-
("system", "
|
| 17 |
("human", "{standardized_exercise}")
|
| 18 |
],
|
| 19 |
input_variables=["standardized_exercise"]
|
| 20 |
)
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
distractors_template = ChatPromptTemplate(
|
| 24 |
messages=[
|
| 25 |
-
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with 10 additional distractors:
|
|
|
|
|
|
|
| 26 |
("human", "{user_input}")
|
| 27 |
],
|
| 28 |
input_variables=["standardized_exercise"]
|
|
|
|
| 13 |
# Template to generate a diagnosis from the standardized exercise.
|
| 14 |
diagnose_template = ChatPromptTemplate(
|
| 15 |
messages=[
|
| 16 |
+
("system", "Based on the given exercise, provide a detailed diagnosis of potential issues. What makes this exercise sub-par, worse than it could be, not yet perfect? Only give the diagnosis, no solutions."),
|
| 17 |
("human", "{standardized_exercise}")
|
| 18 |
],
|
| 19 |
input_variables=["standardized_exercise"]
|
| 20 |
)
|
| 21 |
|
| 22 |
+
diagnose_double_negation_template = ChatPromptTemplate(
|
| 23 |
+
messages=[
|
| 24 |
+
("system", """You analyze a multiple-choice exercise for the presence of double negatives.
|
| 25 |
+
Here are some examples of double negatives:
|
| 26 |
+
|
| 27 |
+
<example 1>
|
| 28 |
+
<exercise 1>
|
| 29 |
+
Stelling
|
| 30 |
+
Expertfolio wordt niet aangeboden door ENI.
|
| 31 |
+
|
| 32 |
+
Keuzeopties:
|
| 33 |
+
1. Deze stelling is niet correct
|
| 34 |
+
2. Deze stelling is correct
|
| 35 |
+
|
| 36 |
+
Correct antwoord:
|
| 37 |
+
1. Deze stelling is niet correct
|
| 38 |
+
</exercise 1>
|
| 39 |
+
<double negative explanation>
|
| 40 |
+
Een niet-correctvraag met 'niet' (het is niet correct dat Expertfolio niet wordt aangeboden) is een dubbele ontkenning.
|
| 41 |
+
</double negative explanation>
|
| 42 |
+
</example 1>
|
| 43 |
+
|
| 44 |
+
<example 2>
|
| 45 |
+
<exercise 2>
|
| 46 |
+
Vraag
|
| 47 |
+
Welk aspect hoort niet bij eenzaamheid?
|
| 48 |
+
|
| 49 |
+
Keuzeopties:
|
| 50 |
+
1. Betekenisvolle relaties hebben
|
| 51 |
+
2. Depressiviteit en angst
|
| 52 |
+
3. Veel alleen zijn
|
| 53 |
+
4. Geen lijfelijk contact hebben
|
| 54 |
+
|
| 55 |
+
Correct antwoord:
|
| 56 |
+
Het ontbreken van betekenisvolle relaties
|
| 57 |
+
</exercise 2>
|
| 58 |
+
<double negative explanation>
|
| 59 |
+
In de vraag staat al 'niet'. In keuzeoptie 4 staat ook nog 'geen', dat is dus een dubbele ontkenning.
|
| 60 |
+
</double negative explanation>
|
| 61 |
+
</example 2>.
|
| 62 |
+
If it's obvious that there is or isn't a double negative in this exercise, just give a short one-sentence diagnosis on this.
|
| 63 |
+
If you're not quite sure, do some reasoning first, and give your diagnosis then."""),
|
| 64 |
+
("human", "{standardized_exercise}")
|
| 65 |
+
],
|
| 66 |
+
input_variables=["standardized_exercise"]
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
diagnose_correct_answer_stands_out_template = ChatPromptTemplate(
|
| 70 |
+
messages=[
|
| 71 |
+
("system", """You evaluate a multiple-choice exercise to determine if the correct answer
|
| 72 |
+
stands out too much compared to the distractors. If the correct answer is significantly
|
| 73 |
+
longer, more detailed, or structurally or grammatically different, this is undesirable. Identify such
|
| 74 |
+
cases.
|
| 75 |
+
Here are some examples of cases where the correct answer stands out:
|
| 76 |
+
|
| 77 |
+
<example where the correct answer is much longer>
|
| 78 |
+
<exercise>
|
| 79 |
+
Theorie:
|
| 80 |
+
De volgende afbeelding komt uit een onderzoek over eenzaamheid dat in 2012 is uitgevoerd.
|
| 81 |
+
|
| 82 |
+
Vraag:
|
| 83 |
+
Bij welke groep komt eenzaamheid volgens dit onderzoek het vaakst voor?
|
| 84 |
+
|
| 85 |
+
1. Gehandicapten
|
| 86 |
+
2. Mantelzorgers
|
| 87 |
+
3. Mensen met langdurige psychische aandoeningen
|
| 88 |
+
4. Sporters
|
| 89 |
+
|
| 90 |
+
Correct antwoord:
|
| 91 |
+
3. Mensen met langdurige psychische aandoeningen.
|
| 92 |
+
</exercise>
|
| 93 |
+
<explanation how the correct answer stands out>
|
| 94 |
+
Alle afleiders zijn 1 woord (kort), terwijl het correcte antwoord een zin is (duidelijk langer).
|
| 95 |
+
</explanation how the correct answer stands out>
|
| 96 |
+
</example where X>
|
| 97 |
+
|
| 98 |
+
<example where the correct answer is grammatically different>
|
| 99 |
+
<exercise>
|
| 100 |
+
Vraag: Wat is alimentatie?
|
| 101 |
+
|
| 102 |
+
1. Geld dat betaald moet worden na een scheiding
|
| 103 |
+
2. Een lening van de overheid
|
| 104 |
+
3. Een maandelijkse bijdrage aan liefdadigheid
|
| 105 |
+
4. Een belastingteruggave
|
| 106 |
+
|
| 107 |
+
Correct antwoord:
|
| 108 |
+
1. Geld dat betaald moet worden na een scheiding of als men niet meer samen is met de andere ouder van de kinderen.
|
| 109 |
+
|
| 110 |
+
</exercise>
|
| 111 |
+
<explanation how the correct answer stands out>
|
| 112 |
+
Alle afleiders beginnen met "Een", maar het correcte antwoord begint anders.
|
| 113 |
+
</explanation how the correct answer stands out>
|
| 114 |
+
</example where the correct answer is grammatically different>
|
| 115 |
+
|
| 116 |
+
Your only focus is to accurately diagnose this issue, no need to provide a fix. If the correct answer in the given exercise clearly does or does not stand out, just give a short one-sentence diagnosis on this.
|
| 117 |
+
If you're not quite sure, do some reasoning first, and give your diagnosis then."""),
|
| 118 |
+
("human", "{standardized_exercise}")
|
| 119 |
+
],
|
| 120 |
+
input_variables=["standardized_exercise"]
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# <example where X>
|
| 124 |
+
# <exercise>
|
| 125 |
+
# </exercise>
|
| 126 |
+
# <explanation how the correct answer stands out>
|
| 127 |
+
# </explanation how the correct answer stands out>
|
| 128 |
+
# </example where X>
|
| 129 |
+
|
| 130 |
+
diagnose_distractor_clearly_wrong_template = ChatPromptTemplate(
|
| 131 |
+
messages=[
|
| 132 |
+
("system", """You assess a multiple-choice exercise to determine if any distractors
|
| 133 |
+
are clearly incorrect and therefore too easy to eliminate. Effective distractors should
|
| 134 |
+
be plausible but incorrect.
|
| 135 |
+
Identify distractors that are obviously wrong, such that even students that are completely uninformed about the topic can eliminate them.
|
| 136 |
+
Your only focus is to accurately diagnose this issue, no need to provide a fix. If all distractors in the given exercise clearly are or aren't obviously incorrect, just give a short one-sentence diagnosis on this.
|
| 137 |
+
If you're not quite sure, do some reasoning first, and give your diagnosis then."""),
|
| 138 |
+
("human", "{standardized_exercise}")
|
| 139 |
+
],
|
| 140 |
+
input_variables=["standardized_exercise"]
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
diagnose_distractor_partially_correct_template = ChatPromptTemplate(
|
| 144 |
+
messages=[
|
| 145 |
+
("system", """You analyze a multiple-choice exercise to detect distractors that are
|
| 146 |
+
partially correct. Some answer choices may contain elements of truth, leading to
|
| 147 |
+
ambiguity. Identify such cases. Really stress-test them: is there a story you could tell where the distractor, in the context of this exercise, could be considered a (partially) correct answer?
|
| 148 |
+
Your only focus is to accurately diagnose this issue, no need to provide a fix. If all distractors in the given exercise clearly are or aren't unambiguously false, just give a short one-sentence diagnosis on this.
|
| 149 |
+
If you're not quite sure, do some reasoning first, and give your diagnosis then.
|
| 150 |
+
"""),
|
| 151 |
+
("human", "{standardized_exercise}")
|
| 152 |
+
],
|
| 153 |
+
input_variables=["standardized_exercise"]
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
# Template for the distractors brainstorm
|
| 157 |
distractors_template = ChatPromptTemplate(
|
| 158 |
messages=[
|
| 159 |
+
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with 10 additional distractors: "
|
| 160 |
+
"alternative answer options that are not correct, yet plausible enough that a poorly informed student might pick them. "
|
| 161 |
+
"Vary the degree of 'almost correctness' and 'clearly incorrectness' between them to provide a wide range of options."),
|
| 162 |
("human", "{user_input}")
|
| 163 |
],
|
| 164 |
input_variables=["standardized_exercise"]
|