subul / backend /tests /test_scoring_methodology.py
Kaadan's picture
initial commit
358dfff
import json
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from models.assessment import Assessment
from models.job import Job
from models.user import User
from models.application import Application
from models.base import Base
from config import settings
from schemas.assessment import AssessmentQuestion, AssessmentQuestionOption
from schemas.enums import QuestionType
from services.ai_service import score_answer
from uuid import uuid4
def test_scoring_methodology():
"""Test that multiple choice questions are scored directly and text-based use AI evaluation"""
print("Testing scoring methodology...")
# Test multiple choice question scoring (direct comparison)
print("\n1. Testing multiple choice question scoring (direct comparison)...")
mc_question = AssessmentQuestion(
id=str(uuid4()),
text="What is the capital of France?",
weight=3,
skill_categories=["geography", "knowledge"],
type=QuestionType.choose_one,
options=[
AssessmentQuestionOption(text="London", value="a"),
AssessmentQuestionOption(text="Paris", value="b"),
AssessmentQuestionOption(text="Berlin", value="c")
],
correct_options=["b"]
)
# Test correct answer
correct_mc_result = score_answer(
question=mc_question,
answer_text="",
selected_options=["b"]
)
print(f" Correct MC answer score: {correct_mc_result['score']}")
print(f" Correct MC answer rationale: {correct_mc_result['rationale']}")
assert correct_mc_result['score'] == 1.0, f"Expected 1.0 for correct MC answer, got {correct_mc_result['score']}"
assert correct_mc_result['correct'] == True, f"Expected True for correct MC answer, got {correct_mc_result['correct']}"
print(" [PASS] Correct multiple choice answer scored directly")
# Test incorrect answer
incorrect_mc_result = score_answer(
question=mc_question,
answer_text="",
selected_options=["a"] # London is wrong
)
print(f" Incorrect MC answer score: {incorrect_mc_result['score']}")
print(f" Incorrect MC answer rationale: {incorrect_mc_result['rationale']}")
assert incorrect_mc_result['score'] == 0.0, f"Expected 0.0 for incorrect MC answer, got {incorrect_mc_result['score']}"
assert incorrect_mc_result['correct'] == False, f"Expected False for incorrect MC answer, got {incorrect_mc_result['correct']}"
print(" [PASS] Incorrect multiple choice answer scored directly")
# Test text-based question scoring (AI evaluation)
print("\n2. Testing text-based question scoring (AI evaluation)...")
text_question = AssessmentQuestion(
id=str(uuid4()),
text="Explain the importance of renewable energy.",
weight=5,
skill_categories=["environment", "science"],
type=QuestionType.text_based,
options=[],
correct_options=[]
)
text_result = score_answer(
question=text_question,
answer_text="Renewable energy is important because it reduces carbon emissions and is sustainable for future generations.",
selected_options=[]
)
print(f" Text answer score: {text_result['score']}")
print(f" Text answer rationale: {text_result['rationale']}")
# The score should be based on our heuristic evaluation (length, keywords, etc.)
assert 0.0 <= text_result['score'] <= 1.0, f"Text score {text_result['score']} is not in range [0,1]"
print(" [PASS] Text-based answer scored using AI evaluation heuristics")
# Test text-based question with poor answer
poor_text_result = score_answer(
question=text_question,
answer_text="It's good.",
selected_options=[]
)
print(f" Poor text answer score: {poor_text_result['score']}")
print(f" Poor text answer rationale: {poor_text_result['rationale']}")
# Short answers should receive lower scores
assert poor_text_result['score'] < text_result['score'], f"Short answer should score lower than detailed answer"
print(" [PASS] Poor text answer received lower score")
# Test choose-many question
print("\n3. Testing choose-many question scoring (direct comparison)...")
multichoice_question = AssessmentQuestion(
id=str(uuid4()),
text="Which of the following are programming languages?",
weight=4,
skill_categories=["programming", "computer-science"],
type=QuestionType.choose_many,
options=[
AssessmentQuestionOption(text="Python", value="a"),
AssessmentQuestionOption(text="HTML", value="b"),
AssessmentQuestionOption(text="Java", value="c"),
AssessmentQuestionOption(text="CSS", value="d")
],
correct_options=["a", "c"] # Python and Java are programming languages
)
correct_multichoice_result = score_answer(
question=multichoice_question,
answer_text="",
selected_options=["a", "c"] # Correct answers
)
print(f" Correct multichoice score: {correct_multichoice_result['score']}")
print(f" Correct multichoice rationale: {correct_multichoice_result['rationale']}")
assert correct_multichoice_result['score'] == 1.0, f"Expected 1.0 for correct multichoice answer, got {correct_multichoice_result['score']}"
assert correct_multichoice_result['correct'] == True, f"Expected True for correct multichoice answer, got {correct_multichoice_result['correct']}"
print(" [PASS] Correct choose-many answer scored directly")
incorrect_multichoice_result = score_answer(
question=multichoice_question,
answer_text="",
selected_options=["a", "b"] # Partially incorrect (includes HTML)
)
print(f" Incorrect multichoice score: {incorrect_multichoice_result['score']}")
print(f" Incorrect multichoice rationale: {incorrect_multichoice_result['rationale']}")
assert incorrect_multichoice_result['score'] == 0.0, f"Expected 0.0 for incorrect multichoice answer, got {incorrect_multichoice_result['score']}"
assert incorrect_multichoice_result['correct'] == False, f"Expected False for incorrect multichoice answer, got {incorrect_multichoice_result['correct']}"
print(" [PASS] Incorrect choose-many answer scored directly")
print("\n[PASS] Scoring methodology test completed successfully!")
print("- Multiple choice questions are scored directly by comparing options")
print("- Text-based questions use AI evaluation (heuristic scoring in mock)")
print("- This approach optimizes performance by avoiding unnecessary AI calls")
if __name__ == "__main__":
test_scoring_methodology()