Added pointer for all, never, exclusively
Browse files- app/ui/diagnoser_tab.py +5 -4
- chains/exercises/runner.py +8 -7
- config/system_prompt_texts.py +142 -2
- utils/pending_issues.md +19 -0
app/ui/diagnoser_tab.py
CHANGED
|
@@ -49,10 +49,11 @@ def build_diagnoser_tab():
|
|
| 49 |
diagnoser_button = gr.Button("Diagnose")
|
| 50 |
|
| 51 |
# Create 10 Response textboxes
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
# Set up a change callback so that if the user selects any model with "Claude" in the name, the exercise format updates to "XML"
|
| 58 |
model_choice_diagnose.change(
|
|
|
|
| 49 |
diagnoser_button = gr.Button("Diagnose")
|
| 50 |
|
| 51 |
# Create 10 Response textboxes
|
| 52 |
+
with gr.Column():
|
| 53 |
+
diagnoser_responses = [
|
| 54 |
+
gr.Textbox(label=f"Response {i + 1}", interactive=False, visible=(i == 0))
|
| 55 |
+
for i in range(10)
|
| 56 |
+
]
|
| 57 |
|
| 58 |
# Set up a change callback so that if the user selects any model with "Claude" in the name, the exercise format updates to "XML"
|
| 59 |
model_choice_diagnose.change(
|
chains/exercises/runner.py
CHANGED
|
@@ -29,8 +29,9 @@ async def run_fluster(
|
|
| 29 |
llm_a = llms.get(model_choice_1, config["default_llm_a"])
|
| 30 |
llm_b = llms.get(model_choice_2, config["default_llm_b"])
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
template_sanitize = config["template_sanitize"]
|
| 36 |
llm_sanitize = config["llm_sanitize"]
|
|
@@ -66,13 +67,13 @@ async def run_fluster(
|
|
| 66 |
gen_resp = await gen_llm.ainvoke(gen_msg.to_messages())
|
| 67 |
write_fluster_result = getattr(gen_resp, "content", gen_resp)
|
| 68 |
|
| 69 |
-
# 2) Refine distractors
|
| 70 |
-
refine_msg = await template_refine.aformat_prompt(write_fluster_result=write_fluster_result)
|
| 71 |
-
refine_resp = await llm_refine.ainvoke(refine_msg.to_messages())
|
| 72 |
-
refined_output = getattr(refine_resp, "content", refine_resp)
|
| 73 |
|
| 74 |
# 3) Sanitize
|
| 75 |
-
sanitize_msg = await template_sanitize.aformat_prompt(refinement_result=
|
| 76 |
sanitize_resp = await llm_sanitize.ainvoke(sanitize_msg.to_messages())
|
| 77 |
sanitized_output = getattr(sanitize_resp, "content", sanitize_resp)
|
| 78 |
|
|
|
|
| 29 |
llm_a = llms.get(model_choice_1, config["default_llm_a"])
|
| 30 |
llm_b = llms.get(model_choice_2, config["default_llm_b"])
|
| 31 |
|
| 32 |
+
# we skip refinement for now
|
| 33 |
+
# template_refine = config["template_refine_fluster"]
|
| 34 |
+
# llm_refine = config["llm_refine"]
|
| 35 |
|
| 36 |
template_sanitize = config["template_sanitize"]
|
| 37 |
llm_sanitize = config["llm_sanitize"]
|
|
|
|
| 67 |
gen_resp = await gen_llm.ainvoke(gen_msg.to_messages())
|
| 68 |
write_fluster_result = getattr(gen_resp, "content", gen_resp)
|
| 69 |
|
| 70 |
+
# 2) Refine distractors << # we skip refinement for now
|
| 71 |
+
# refine_msg = await template_refine.aformat_prompt(write_fluster_result=write_fluster_result)
|
| 72 |
+
# refine_resp = await llm_refine.ainvoke(refine_msg.to_messages())
|
| 73 |
+
# refined_output = getattr(refine_resp, "content", refine_resp)
|
| 74 |
|
| 75 |
# 3) Sanitize
|
| 76 |
+
sanitize_msg = await template_sanitize.aformat_prompt(refinement_result=write_fluster_result)
|
| 77 |
sanitize_resp = await llm_sanitize.ainvoke(sanitize_msg.to_messages())
|
| 78 |
sanitized_output = getattr(sanitize_resp, "content", sanitize_resp)
|
| 79 |
|
config/system_prompt_texts.py
CHANGED
|
@@ -622,9 +622,149 @@ After lots of iterative prep and reasoning, considering a wide range of options,
|
|
| 622 |
|
| 623 |
## Pointers
|
| 624 |
- Try to exactly match the content and language level in the learning objective. If it's stated in simple words, use equally simple words in the exercises as well.
|
| 625 |
-
- Output format doesn't matter
|
| 626 |
"""
|
| 627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 628 |
|
| 629 |
|
| 630 |
"""
|
|
@@ -832,7 +972,7 @@ Be precise. Shun absolute terms like 'never' or 'always', as they imply complete
|
|
| 832 |
|
| 833 |
|
| 834 |
template_refine_fluster_text = """
|
| 835 |
-
Given some source data containing exercises,
|
| 836 |
"""
|
| 837 |
|
| 838 |
|
|
|
|
| 622 |
|
| 623 |
## Pointers
|
| 624 |
- Try to exactly match the content and language level in the learning objective. If it's stated in simple words, use equally simple words in the exercises as well.
|
| 625 |
+
- Output format doesn't matter: prioritize careful reasoning.
|
| 626 |
"""
|
| 627 |
|
| 628 |
+
uitgangspunt_template_for_writing_a_fluster = """
|
| 629 |
+
# Task outline
|
| 630 |
+
Given a learning objective, your goal is to write an exercise set of 3 high-quality multiple choice exercises that all test the exact same knowledge that's stated in the learning objective.
|
| 631 |
+
|
| 632 |
+
# Concepts
|
| 633 |
+
## Learning objective
|
| 634 |
+
Tests a specific fact. For example: "De student weet dat de Wet Bopz sinds 1994 in gebruik was (tot hij in 2020 werd opgevolgd door de Wvggz).
|
| 635 |
+
All exercises must test the very same specific key part of the given learning objective, focusing only on info that's not in parentheses. Any text between parentheses must only be used in the Theory or Explanation sections of the exercises (Theory if it's important for understanding the exercise beforehand, explanation if it's .
|
| 636 |
+
|
| 637 |
+
## Exercise set
|
| 638 |
+
Comprises 3 exercises that all test the same single learning objective: one bigger multiple choice exercise and two smaller true/false statements. See this example:
|
| 639 |
+
<exercise_set>
|
| 640 |
+
<multiple_choice_exercise>
|
| 641 |
+
<prompt>
|
| 642 |
+
Stelling:
|
| 643 |
+
De wet Bopz was sinds ..... in gebruik.
|
| 644 |
+
</prompt>
|
| 645 |
+
|
| 646 |
+
<choices>
|
| 647 |
+
1. 1984
|
| 648 |
+
2. 1999
|
| 649 |
+
3. 2004
|
| 650 |
+
4. 2009
|
| 651 |
+
</choices>
|
| 652 |
+
<correct_answer>2</correct_answer>
|
| 653 |
+
<explanation>In 2020 werd de wet Bopz opgevolgd door de Wvggz.</explanation>
|
| 654 |
+
</multiple_choice_exercise>
|
| 655 |
+
|
| 656 |
+
<true_statement>
|
| 657 |
+
<prompt>
|
| 658 |
+
Stelling:
|
| 659 |
+
De wet Bopz was sinds 1994 in gebruik.
|
| 660 |
+
</prompt>
|
| 661 |
+
|
| 662 |
+
<choices>
|
| 663 |
+
1. Deze stelling is correct
|
| 664 |
+
2. Deze stelling is niet correct
|
| 665 |
+
</choices>
|
| 666 |
+
<correct_answer>1</correct_answer>
|
| 667 |
+
<explanation>In 2020 werd de wet Bopz opgevolgd door de Wvggz.</explanation>
|
| 668 |
+
</true_statement>
|
| 669 |
+
|
| 670 |
+
<false_statement>
|
| 671 |
+
<prompt>
|
| 672 |
+
Stelling:
|
| 673 |
+
De wet Bopz was sinds 1984 in gebruik.
|
| 674 |
+
</prompt>
|
| 675 |
+
|
| 676 |
+
<choices>
|
| 677 |
+
1. Deze stelling is correct
|
| 678 |
+
2. Deze stelling is niet correct
|
| 679 |
+
</choices>
|
| 680 |
+
<correct_answer>2</correct_answer>
|
| 681 |
+
<explanation>De wet Bopz was sinds **1994** in gebruik. Tot 2020, toen hij werd opgevolgd door de Wvggz.</explanation>
|
| 682 |
+
</false_statement>
|
| 683 |
+
</exercise_set>
|
| 684 |
+
|
| 685 |
+
Here's another example of an exercise set, this time for the learning objective: "De student weet dat je dagelijks oefent om zo objectief (zonder je eigen mening) mogelijk te observeren."
|
| 686 |
+
<exercise_set>
|
| 687 |
+
<multiple_choice_exercise>
|
| 688 |
+
<prompt>
|
| 689 |
+
Theorie:
|
| 690 |
+
Objectief betekent "zonder je eigen mening".
|
| 691 |
+
|
| 692 |
+
Vraag:
|
| 693 |
+
Wat moet je doen om zo objectief mogelijk te observeren?
|
| 694 |
+
</prompt>
|
| 695 |
+
|
| 696 |
+
<choices>
|
| 697 |
+
1. Je intuïtie volgen
|
| 698 |
+
2. Veel theorie leren
|
| 699 |
+
3. Iemand anders erbij roepen
|
| 700 |
+
4. Dagelijks oefenen
|
| 701 |
+
</choices>
|
| 702 |
+
<correct_answer>4</correct_answer>
|
| 703 |
+
</multiple_choice_exercise>
|
| 704 |
+
|
| 705 |
+
<true_statement>
|
| 706 |
+
<prompt>
|
| 707 |
+
Theorie:
|
| 708 |
+
Objectief betekent "zonder je eigen mening".
|
| 709 |
+
|
| 710 |
+
Stelling:
|
| 711 |
+
Om zo objectief mogelijk te observeren moet je dagelijks oefenen.
|
| 712 |
+
</prompt>
|
| 713 |
+
|
| 714 |
+
<choices>
|
| 715 |
+
1. Deze stelling is correct
|
| 716 |
+
2. Deze stelling is niet correct
|
| 717 |
+
</choices>
|
| 718 |
+
<correct_answer>1</correct_answer>
|
| 719 |
+
</true_statement>
|
| 720 |
+
|
| 721 |
+
<false_statement>
|
| 722 |
+
<prompt>
|
| 723 |
+
Theorie:
|
| 724 |
+
Objectief betekent "zonder je eigen mening".
|
| 725 |
+
|
| 726 |
+
Stelling:
|
| 727 |
+
Om zo objectief mogelijk te observeren moet je een keer per jaar oefenen.
|
| 728 |
+
</prompt>
|
| 729 |
+
|
| 730 |
+
<choices>
|
| 731 |
+
1. Deze stelling is correct
|
| 732 |
+
2. Deze stelling is niet correct
|
| 733 |
+
</choices>
|
| 734 |
+
<correct_answer>2</correct_answer>
|
| 735 |
+
<explanation>Om zo objectief mogelijk te kunnen observeren, is het belangrijk om regelmatig, bij voorkeur <b>dagelijks</b>, te oefenen.</explanation>
|
| 736 |
+
</false_statement>
|
| 737 |
+
</exercise_set>
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
## Distractors
|
| 741 |
+
The alternative answer options of the multiple choice exercise that are not the correct answer are called distractors. These are the most important part of the exercise. Effective distractors strike an optimal balance between "very plausible to someone who doesn't know the answer to the question" and "clearly wrong to someone who does know the answer to the question".
|
| 742 |
+
|
| 743 |
+
## Theory
|
| 744 |
+
Optional. Sometimes there's additional knowledge present in the learning objective (often between parentheses) that is not the direct focus to test, but useful to know for the student beforehand to better understand the question. This is then added as Theory in the prompt. The student gets to see this as part of the exercise prompt.
|
| 745 |
+
|
| 746 |
+
# Explanation
|
| 747 |
+
Optional. Sometimes there's additional knowledge present in the learning objective (often between parentheses, or as a subclause) that is not the direct focus to know, nor is it necessary to clarify the prompt. If this is useful related, additional info, add it to the explanation, so that the student gets to see this after they pick their answer. The false statement always needs an explanation, to tell the student why the statement is incorrect (explaining what the true statement would have been). Other exercises should only get an explanation if the learning objective contains appropriate info for this.
|
| 748 |
+
|
| 749 |
+
# Approach
|
| 750 |
+
Think long and hard about the ideal three exercises to test the given learning objective.
|
| 751 |
+
## Distractors
|
| 752 |
+
Especially spend a lot of time picking good distractors for the first multiple choice exercise.
|
| 753 |
+
Imagine the typical Dumb Student among the target audience for the given learning objective, bottom of their class. Would they sometimes find each distractor sound appealing if they hadn't studied for the test? We want to avoid the possibility that they can too easily dismiss and eliminate a distractor as clearly not a serious option, just on the basis of it looking weird to them. Imagine whether very stupid students with limited general knowledge, and no knowledge of the topic of the exercise, might find the distractor plausible. That's the goal.
|
| 754 |
+
At the same time, a distractor must not be too close to the truth either, because that would be misleading.
|
| 755 |
+
Imagine asking an Expert Panel of 10 domain experts to judge this. All of them should agree that the correct answer is the clearly best answer in this exercise.
|
| 756 |
+
Really carry out these thought experiments of how the Dumb Student and the Expert Panel would look at the exercise. If there's any doubt the experts would agree on the best answer, rephrase the offending distractor to be a bit less true, and imagine again. If there's
|
| 757 |
+
After lots of iterative prep and reasoning, considering a wide range of options, weighing what would be the best, finally return a complete exercise set of 1 multiple choice exercise and 2 statements.
|
| 758 |
+
|
| 759 |
+
## Pointers
|
| 760 |
+
- Try to exactly match the content and language level in the learning objective. If it's stated in simple words, use equally simple words in the exercises as well.
|
| 761 |
+
- Avoid the use of unnecessarily strong false statements or distractors using words like "all", "never" or "exclusively" etc., because they're often too easy to dismiss (unless the correct answer is similarly extreme). For example: instead of your false statement being "De enige factor die slaapkwaliteit beïnvloedt, is consistent naar bed gaan", it is better to give a less extreme (and therefore more plausible-sounding) statement, like: "De hoofdfactor die slaapkwaliteit beïnvloedt, is consistent naar bed gaan".
|
| 762 |
+
- Output format doesn't matter, prioritize careful reasoning.
|
| 763 |
+
"""
|
| 764 |
+
|
| 765 |
+
|
| 766 |
+
|
| 767 |
+
|
| 768 |
|
| 769 |
|
| 770 |
"""
|
|
|
|
| 972 |
|
| 973 |
|
| 974 |
template_refine_fluster_text = """
|
| 975 |
+
Given some source data containing exercises, correct any spelling errors.
|
| 976 |
"""
|
| 977 |
|
| 978 |
|
utils/pending_issues.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Learning objectives generation
|
| 2 |
+
## LOs zijn nog steeds niet specifiek genoeg ?
|
| 3 |
+
Input:
|
| 4 |
+
HTML 277641
|
| 5 |
+
|
| 6 |
+
Result:
|
| 7 |
+
De student weet dat iemands leven wordt gevormd door drie dingen: interne factoren, externe factoren en zelfbepaling.
|
| 8 |
+
|
| 9 |
+
# Fluster generation
|
| 10 |
+
## References to things outside of the LO
|
| 11 |
+
Input:
|
| 12 |
+
De student weet dat iemands leven wordt gevormd door drie dingen: interne factoren, externe factoren en zelfbepaling.
|
| 13 |
+
|
| 14 |
+
Result (1/2):
|
| 15 |
+
|
| 16 |
+
Vraag:
|
| 17 |
+
Waaruit wordt iemands leven volgens de leerstof gevormd?
|
| 18 |
+
|
| 19 |
+
|