expanding
Browse files- chains/diagnoser_chain.py +13 -65
- chains/distractors_chain.py +29 -2
- config/chain_configs.py +7 -4
- config/exercise_standardizer.py +32 -0
- config/format_mappings.py +52 -0
- config/templates.py +3 -3
chains/diagnoser_chain.py
CHANGED
|
@@ -2,11 +2,13 @@
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from typing import Any
|
| 4 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class DiagnoserChain(BaseModel):
|
| 7 |
template_standardize: ChatPromptTemplate
|
| 8 |
-
template_diagnose: ChatPromptTemplate
|
| 9 |
llm_standardize: Any # Fixed LLM for step 1
|
|
|
|
| 10 |
llm_diagnose: Any # User-selectable LLM for step 2
|
| 11 |
|
| 12 |
async def run(self, user_query: str, exercise_format: str) -> str:
|
|
@@ -15,72 +17,18 @@ class DiagnoserChain(BaseModel):
|
|
| 15 |
1. Standardizes the exercise formatting (if exercise_format isn't Raw).
|
| 16 |
2. Generates a diagnosis from the standardized format.
|
| 17 |
"""
|
| 18 |
-
# --- Step 1: Standardize the exercise formatting ---
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
mapping = {
|
| 23 |
-
"Markdown": (
|
| 24 |
-
"Please format the exercise in Markdown, similarly to this example:\n\n"
|
| 25 |
-
"**Theorie** \n"
|
| 26 |
-
"Eenzaamheid wordt door ieder persoon anders ervaren en is daarom subjectief.\n\n"
|
| 27 |
-
"---\n\n"
|
| 28 |
-
"**Vraag** \n"
|
| 29 |
-
"Wat is de meest passende definitie van eenzaamheid?\n\n"
|
| 30 |
-
"1. Het gevoel geen connectie te hebben met anderen \n"
|
| 31 |
-
"2. Regelmatig in je eentje zijn \n"
|
| 32 |
-
"3. Beide bovenstaande \n"
|
| 33 |
-
"4. Geen van bovenstaande \n\n"
|
| 34 |
-
"**Correct antwoord:** \n"
|
| 35 |
-
"1. Het gevoel geen connectie te hebben met anderen."
|
| 36 |
-
),
|
| 37 |
-
"XML": (
|
| 38 |
-
"Please reformat in XML, following this example:\n"
|
| 39 |
-
"<exercise>\n"
|
| 40 |
-
" <content>\n"
|
| 41 |
-
" <question>Theorie:\n"
|
| 42 |
-
"Eenzaamheid wordt door ieder persoon anders ervaren en is daarom subjectief.\n\n"
|
| 43 |
-
"Vraag:\n"
|
| 44 |
-
"Wat is de meest passende definitie van eenzaamheid?</question>\n"
|
| 45 |
-
" <choices>\n"
|
| 46 |
-
" <choice id=\"1\">Het gevoel geen connectie te hebben met anderen</choice>\n"
|
| 47 |
-
" <choice id=\"2\">Regelmatig in je eentje zijn</choice>\n"
|
| 48 |
-
" <choice id=\"3\">Beide bovenstaande</choice>\n"
|
| 49 |
-
" <choice id=\"4\">Geen van bovenstaande</choice>\n"
|
| 50 |
-
" </choices>\n"
|
| 51 |
-
" </content>\n"
|
| 52 |
-
" <answer>\n"
|
| 53 |
-
" <correct-choice>1</correct-choice>\n"
|
| 54 |
-
" <explanation></explanation>\n"
|
| 55 |
-
" </answer>\n"
|
| 56 |
-
"</exercise>"
|
| 57 |
-
),
|
| 58 |
-
"Plaintext": (
|
| 59 |
-
"Please reformat in plain text, following this example:\n\n"
|
| 60 |
-
"Theorie\n"
|
| 61 |
-
"Eenzaamheid wordt door ieder persoon anders ervaren en is daarom subjectief.\n\n"
|
| 62 |
-
"Vraag\n"
|
| 63 |
-
"Wat is de meest passende definitie van eenzaamheid?\n\n"
|
| 64 |
-
"1. Het gevoel geen connectie te hebben met anderen\n"
|
| 65 |
-
"2. Regelmatig in je eentje zijn\n"
|
| 66 |
-
"3. Beide bovenstaande\n"
|
| 67 |
-
"4. Geen van bovenstaande\n\n"
|
| 68 |
-
"Correct antwoord:\n"
|
| 69 |
-
"1. Het gevoel geen connectie te hebben met anderen."
|
| 70 |
-
)
|
| 71 |
-
}
|
| 72 |
-
formatting_instructions = mapping.get(exercise_format, "Please reformat the given exercise to ease further processing.")
|
| 73 |
-
prompt_std = await self.template_standardize.aformat_prompt(
|
| 74 |
-
user_input=user_query,
|
| 75 |
-
formatting_instructions=formatting_instructions
|
| 76 |
-
)
|
| 77 |
-
std_messages = prompt_std.to_messages()
|
| 78 |
-
standardized_exercise = await self.llm_standardize.ainvoke(std_messages)
|
| 79 |
|
| 80 |
# --- Step 2: Generate a diagnosis using the standardized exercise ---
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
diagnosis =
|
|
|
|
|
|
|
|
|
|
| 84 |
return diagnosis
|
| 85 |
|
| 86 |
class Config:
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from typing import Any
|
| 4 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
| 5 |
+
from config.exercise_standardizer import standardize_exercise
|
| 6 |
+
|
| 7 |
|
| 8 |
class DiagnoserChain(BaseModel):
|
| 9 |
template_standardize: ChatPromptTemplate
|
|
|
|
| 10 |
llm_standardize: Any # Fixed LLM for step 1
|
| 11 |
+
template_diagnose: ChatPromptTemplate
|
| 12 |
llm_diagnose: Any # User-selectable LLM for step 2
|
| 13 |
|
| 14 |
async def run(self, user_query: str, exercise_format: str) -> str:
|
|
|
|
| 17 |
1. Standardizes the exercise formatting (if exercise_format isn't Raw).
|
| 18 |
2. Generates a diagnosis from the standardized format.
|
| 19 |
"""
|
| 20 |
+
# --- Step 1: Standardize the exercise formatting (if exercise_format isn't 'Raw (original)') ---
|
| 21 |
+
standardized_exercise = await standardize_exercise(
|
| 22 |
+
user_query, exercise_format, self.template_standardize, self.llm_standardize
|
| 23 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
# --- Step 2: Generate a diagnosis using the standardized exercise ---
|
| 26 |
+
prompt_diagnose = await self.template_diagnose.aformat_prompt(standardized_exercise=standardized_exercise)
|
| 27 |
+
diagnose_messages = prompt_diagnose.to_messages()
|
| 28 |
+
diagnosis = ""
|
| 29 |
+
async for token in self.llm_diagnose.astream(diagnose_messages):
|
| 30 |
+
diagnosis += token
|
| 31 |
+
# Here you could, for example, update a UI element if you were streaming tokens to the frontend.
|
| 32 |
return diagnosis
|
| 33 |
|
| 34 |
class Config:
|
chains/distractors_chain.py
CHANGED
|
@@ -2,12 +2,39 @@
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from typing import Any
|
| 4 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class DistractorsChain(BaseModel):
|
|
|
|
|
|
|
| 7 |
template: ChatPromptTemplate
|
| 8 |
-
llm: Any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
async def run(self, user_query: str) -> str:
|
| 11 |
prompt = await self.template.aformat_prompt(user_input=user_query)
|
| 12 |
messages = prompt.to_messages()
|
| 13 |
result = await self.llm.ainvoke(messages)
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from typing import Any
|
| 4 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
| 5 |
+
from config.exercise_standardizer import standardize_exercise
|
| 6 |
+
|
| 7 |
|
| 8 |
class DistractorsChain(BaseModel):
|
| 9 |
+
llm_standardize: Any # Fixed LLM for step 1
|
| 10 |
+
template_standardize: ChatPromptTemplate
|
| 11 |
template: ChatPromptTemplate
|
| 12 |
+
llm: Any # User-selectable LLM for step 2
|
| 13 |
+
|
| 14 |
+
async def run(self, user_query: str, exercise_format: str) -> str:
|
| 15 |
+
"""
|
| 16 |
+
Runs the composite chain:
|
| 17 |
+
1. Standardizes the exercise formatting (if exercise_format isn't Raw).
|
| 18 |
+
2. Generates new distractors from the standardized format.
|
| 19 |
+
"""
|
| 20 |
+
# --- Step 1: Standardize the exercise formatting (if exercise_format isn't 'Raw (original)') ---
|
| 21 |
+
standardized_exercise = await standardize_exercise(
|
| 22 |
+
user_query, exercise_format, self.template_standardize, self.llm_standardize
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# --- Step 2: Generate new distractors using the standardized exercise ---
|
| 26 |
+
prompt_distractors = await self.template_distractors.aformat_prompt(standardized_exercise=standardized_exercise)
|
| 27 |
+
distractors_messages = prompt_distractors.to_messages()
|
| 28 |
+
distractors = ""
|
| 29 |
+
async for token in self.llm_distr.astream(distractors_messages):
|
| 30 |
+
distractors += token
|
| 31 |
+
# Here you could, for example, update a UI element if you were streaming tokens to the frontend.
|
| 32 |
+
return distractors
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
|
|
|
|
| 38 |
prompt = await self.template.aformat_prompt(user_input=user_query)
|
| 39 |
messages = prompt.to_messages()
|
| 40 |
result = await self.llm.ainvoke(messages)
|
config/chain_configs.py
CHANGED
|
@@ -9,13 +9,16 @@ chain_configs = {
|
|
| 9 |
"diagnoser": {
|
| 10 |
"class": DiagnoserChain,
|
| 11 |
"template_standardize": standardize_template,
|
|
|
|
| 12 |
"template_diagnose": diagnose_template,
|
| 13 |
-
"
|
| 14 |
-
|
| 15 |
},
|
| 16 |
"distractors": {
|
| 17 |
"class": DistractorsChain,
|
| 18 |
-
"
|
| 19 |
-
"
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
}
|
|
|
|
| 9 |
"diagnoser": {
|
| 10 |
"class": DiagnoserChain,
|
| 11 |
"template_standardize": standardize_template,
|
| 12 |
+
"llm_standardize": llms["GPT-4o-mini"], # Always fixed
|
| 13 |
"template_diagnose": diagnose_template,
|
| 14 |
+
"llm_diagnose": llms["GPT-4o"], # Default; can be replaced in UI
|
| 15 |
+
|
| 16 |
},
|
| 17 |
"distractors": {
|
| 18 |
"class": DistractorsChain,
|
| 19 |
+
"template_standardize": standardize_template,
|
| 20 |
+
"llm_standardize": llms["GPT-4o-mini"], # Always fixed
|
| 21 |
+
"template_distractors": distractors_template,
|
| 22 |
+
"llm_distractors": llms["GPT-4o"], # Default; can be replaced in UI
|
| 23 |
},
|
| 24 |
}
|
config/exercise_standardizer.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# exercise_standardizer.py
|
| 2 |
+
from format_mappings import FORMAT_MAPPINGS
|
| 3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
+
from typing import Any
|
| 5 |
+
from config.format_mappings import FORMAT_MAPPINGS
|
| 6 |
+
|
| 7 |
+
async def standardize_exercise(user_query: str, exercise_format: str, template: ChatPromptTemplate, llm: Any) -> str:
|
| 8 |
+
"""
|
| 9 |
+
Standardizes an exercise's format using the specified template and LLM.
|
| 10 |
+
Uses token streaming for efficiency.
|
| 11 |
+
"""
|
| 12 |
+
if exercise_format == "Raw (original)":
|
| 13 |
+
return user_query # No transformation needed
|
| 14 |
+
|
| 15 |
+
formatting_instructions = FORMAT_MAPPINGS.get(
|
| 16 |
+
exercise_format,
|
| 17 |
+
"Please reformat the given exercise to ease further processing."
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
prompt_std = await template.aformat_prompt(
|
| 21 |
+
user_input=user_query,
|
| 22 |
+
formatting_instructions=formatting_instructions
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
std_messages = prompt_std.to_messages()
|
| 26 |
+
|
| 27 |
+
# Stream tokens to construct the standardized response
|
| 28 |
+
standardized_exercise = ""
|
| 29 |
+
async for token in llm.astream(std_messages):
|
| 30 |
+
standardized_exercise += token
|
| 31 |
+
|
| 32 |
+
return standardized_exercise
|
config/format_mappings.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# format_mappings.py
|
| 2 |
+
|
| 3 |
+
FORMAT_MAPPINGS = {
|
| 4 |
+
"Markdown": (
|
| 5 |
+
"Please format the exercise in Markdown, similarly to this example:\n\n"
|
| 6 |
+
"**Theorie** \n"
|
| 7 |
+
"Eenzaamheid wordt door ieder persoon anders ervaren en is daarom subjectief.\n\n"
|
| 8 |
+
"---\n\n"
|
| 9 |
+
"**Vraag** \n"
|
| 10 |
+
"Wat is de meest passende definitie van eenzaamheid?\n\n"
|
| 11 |
+
"1. Het gevoel geen connectie te hebben met anderen \n"
|
| 12 |
+
"2. Regelmatig in je eentje zijn \n"
|
| 13 |
+
"3. Beide bovenstaande \n"
|
| 14 |
+
"4. Geen van bovenstaande \n\n"
|
| 15 |
+
"**Correct antwoord:** \n"
|
| 16 |
+
"1. Het gevoel geen connectie te hebben met anderen."
|
| 17 |
+
),
|
| 18 |
+
"XML": (
|
| 19 |
+
"Please reformat in XML, following this example:\n"
|
| 20 |
+
"<exercise>\n"
|
| 21 |
+
" <content>\n"
|
| 22 |
+
" <question>Theorie:\n"
|
| 23 |
+
"Eenzaamheid wordt door ieder persoon anders ervaren en is daarom subjectief.\n\n"
|
| 24 |
+
"Vraag:\n"
|
| 25 |
+
"Wat is de meest passende definitie van eenzaamheid?</question>\n"
|
| 26 |
+
" <choices>\n"
|
| 27 |
+
" <choice id=\"1\">Het gevoel geen connectie te hebben met anderen</choice>\n"
|
| 28 |
+
" <choice id=\"2\">Regelmatig in je eentje zijn</choice>\n"
|
| 29 |
+
" <choice id=\"3\">Beide bovenstaande</choice>\n"
|
| 30 |
+
" <choice id=\"4\">Geen van bovenstaande</choice>\n"
|
| 31 |
+
" </choices>\n"
|
| 32 |
+
" </content>\n"
|
| 33 |
+
" <answer>\n"
|
| 34 |
+
" <correct-choice>1</correct-choice>\n"
|
| 35 |
+
" <explanation></explanation>\n"
|
| 36 |
+
" </answer>\n"
|
| 37 |
+
"</exercise>"
|
| 38 |
+
),
|
| 39 |
+
"Plaintext": (
|
| 40 |
+
"Please reformat in plain text, following this example:\n\n"
|
| 41 |
+
"Theorie\n"
|
| 42 |
+
"Eenzaamheid wordt door ieder persoon anders ervaren en is daarom subjectief.\n\n"
|
| 43 |
+
"Vraag\n"
|
| 44 |
+
"Wat is de meest passende definitie van eenzaamheid?\n\n"
|
| 45 |
+
"1. Het gevoel geen connectie te hebben met anderen\n"
|
| 46 |
+
"2. Regelmatig in je eentje zijn\n"
|
| 47 |
+
"3. Beide bovenstaande\n"
|
| 48 |
+
"4. Geen van bovenstaande\n\n"
|
| 49 |
+
"Correct antwoord:\n"
|
| 50 |
+
"1. Het gevoel geen connectie te hebben met anderen."
|
| 51 |
+
)
|
| 52 |
+
}
|
config/templates.py
CHANGED
|
@@ -13,7 +13,7 @@ standardize_template = ChatPromptTemplate(
|
|
| 13 |
# Template to generate a diagnosis from the standardized exercise.
|
| 14 |
diagnose_template = ChatPromptTemplate(
|
| 15 |
messages=[
|
| 16 |
-
("system", "You are a diagnostic assistant. Based on the given exercise(s), provide a detailed diagnosis of potential issues. What makes this exercise sub-par, worse than it could be, not yet perfect?"),
|
| 17 |
("human", "{standardized_exercise}")
|
| 18 |
],
|
| 19 |
input_variables=["standardized_exercise"]
|
|
@@ -22,8 +22,8 @@ diagnose_template = ChatPromptTemplate(
|
|
| 22 |
# Template for the distractors brainstorm (a single-step chain).
|
| 23 |
distractors_template = ChatPromptTemplate(
|
| 24 |
messages=[
|
| 25 |
-
("system", "You are a brainstorming assistant.
|
| 26 |
("human", "{user_input}")
|
| 27 |
],
|
| 28 |
-
input_variables=["
|
| 29 |
)
|
|
|
|
| 13 |
# Template to generate a diagnosis from the standardized exercise.
|
| 14 |
diagnose_template = ChatPromptTemplate(
|
| 15 |
messages=[
|
| 16 |
+
("system", "You are a diagnostic assistant. Based on the given exercise(s), provide a detailed diagnosis of potential issues. What makes this exercise sub-par, worse than it could be, not yet perfect? Only give the diagnosis, no solutions."),
|
| 17 |
("human", "{standardized_exercise}")
|
| 18 |
],
|
| 19 |
input_variables=["standardized_exercise"]
|
|
|
|
| 22 |
# Template for the distractors brainstorm (a single-step chain).
|
| 23 |
distractors_template = ChatPromptTemplate(
|
| 24 |
messages=[
|
| 25 |
+
("system", "You are a brainstorming assistant. Based on the given multiple choice exercise, come up with 10 additional distractors: alternative answer options that are not correct, yet plausible enough that a poorly informed student might pick them. Vary the degree of 'almost correctness' and 'clearly incorrectness' between them to provide a wide range of options."),
|
| 26 |
("human", "{user_input}")
|
| 27 |
],
|
| 28 |
+
input_variables=["standardized_exercise"]
|
| 29 |
)
|