Spaces:

AvtnshM
/

Confusion_matrix_Quiz

Sleeping

App Files Files Community

Confusion_matrix_Quiz / app.py

AvtnshM

Update app.py

43be1fc verified 11 months ago

raw

history blame contribute delete

28.9 kB

	import gradio as gr
	import random
	import json

	# Quiz questions database
	QUESTIONS = {
	"basic": [
	{
	"question": "What is a confusion matrix?",
	"options": [
	"A matrix that shows the correlation between features",
	"A table used to describe the performance of a classification model",
	"A matrix for storing confused data points",
	"A visualization tool for regression problems"
	],
	"correct": 1,
	"explanation": "A confusion matrix is a table used to describe the performance of a classification model on test data for which the true values are known."
	},
	{
	"question": "In a binary classification confusion matrix, what does TP stand for?",
	"options": ["Total Positive", "True Positive", "Test Positive", "Target Positive"],
	"correct": 1,
	"explanation": "TP stands for True Positive - cases where the model correctly predicted the positive class."
	},
	{
	"question": "What are the four basic components of a binary confusion matrix?",
	"options": [
	"TP, TN, FP, FN",
	"True, False, Positive, Negative",
	"Precision, Recall, F1, Accuracy",
	"Sensitivity, Specificity, PPV, NPV"
	],
	"correct": 0,
	"explanation": "The four components are True Positive (TP), True Negative (TN), False Positive (FP), and False Negative (FN)."
	},
	{
	"question": "What does a False Positive represent?",
	"options": [
	"Model predicted negative, actual was positive",
	"Model predicted positive, actual was negative",
	"Model predicted positive, actual was positive",
	"Model predicted negative, actual was negative"
	],
	"correct": 1,
	"explanation": "False Positive (Type I error) occurs when the model incorrectly predicts the positive class when the actual class is negative."
	},
	{
	"question": "What does a False Negative represent?",
	"options": [
	"Model predicted positive, actual was negative",
	"Model predicted negative, actual was positive",
	"Model predicted negative, actual was negative",
	"Model predicted positive, actual was positive"
	],
	"correct": 1,
	"explanation": "False Negative (Type II error) occurs when the model incorrectly predicts the negative class when the actual class is positive."
	},
	{
	"question": "If TP=85, TN=90, FP=10, FN=15, what is the accuracy?",
	"options": ["0.875", "0.85", "0.90", "0.825"],
	"correct": 0,
	"explanation": "Accuracy = (TP + TN) / (TP + TN + FP + FN) = (85 + 90) / (85 + 90 + 10 + 15) = 175/200 = 0.875"
	},
	{
	"question": "What does the diagonal of a confusion matrix represent?",
	"options": ["Incorrect predictions", "Correct predictions", "Total predictions", "Class imbalance"],
	"correct": 1,
	"explanation": "The diagonal elements of a confusion matrix represent correct predictions where predicted class equals actual class."
	},
	{
	"question": "What type of error is a False Positive also known as?",
	"options": ["Type I error", "Type II error", "Type III error", "Type IV error"],
	"correct": 0,
	"explanation": "A False Positive is also known as a Type I error - rejecting a true null hypothesis."
	},
	{
	"question": "In medical diagnosis, what would a False Negative represent?",
	"options": [
	"Diagnosing a healthy person as sick",
	"Diagnosing a sick person as healthy",
	"Correctly diagnosing a healthy person",
	"Correctly diagnosing a sick person"
	],
	"correct": 1,
	"explanation": "A False Negative in medical diagnosis means failing to detect a disease when it's actually present - diagnosing a sick person as healthy."
	},
	{
	"question": "Which axis typically represents predicted classes in a confusion matrix?",
	"options": ["X-axis (horizontal)", "Y-axis (vertical)", "Z-axis", "Both X and Y"],
	"correct": 0,
	"explanation": "By convention, the X-axis (horizontal) typically represents predicted classes, while the Y-axis represents actual classes."
	},
	{
	"question": "In spam email classification, what would a False Positive represent?",
	"options": [
	"Spam email correctly identified as spam",
	"Normal email correctly identified as normal",
	"Normal email incorrectly classified as spam",
	"Spam email incorrectly classified as normal"
	],
	"correct": 2,
	"explanation": "In spam classification, a False Positive means a legitimate email was incorrectly classified as spam."
	},
	{
	"question": "What is the sum of all elements in a confusion matrix equal to?",
	"options": ["Number of features", "Number of classes", "Total number of predictions", "Number of models"],
	"correct": 2,
	"explanation": "The sum of all elements in a confusion matrix equals the total number of predictions made by the model."
	},
	{
	"question": "In a perfect classifier's confusion matrix, what would the off-diagonal elements be?",
	"options": ["All ones", "All zeros", "Equal to diagonal", "Randomly distributed"],
	"correct": 1,
	"explanation": "In a perfect classifier, all predictions are correct, so off-diagonal elements (errors) would all be zero."
	},
	{
	"question": "What is the minimum number of classes needed to create a confusion matrix?",
	"options": ["1", "2", "3", "4"],
	"correct": 1,
	"explanation": "You need at least 2 classes to create a meaningful confusion matrix for classification problems."
	},
	{
	"question": "If TN=100, what does this mean?",
	"options": [
	"100 cases were incorrectly classified as negative",
	"100 cases were correctly classified as negative",
	"100 cases were classified as positive",
	"The model made 100 total predictions"
	],
	"correct": 1,
	"explanation": "TN=100 means the model correctly classified 100 cases as negative when they were actually negative."
	}
	],
	"intermediate": [
	{
	"question": "What is precision in terms of confusion matrix components?",
	"options": [
	"TP / (TP + FN)",
	"TP / (TP + FP)",
	"TN / (TN + FP)",
	"(TP + TN) / (TP + TN + FP + FN)"
	],
	"correct": 1,
	"explanation": "Precision = TP / (TP + FP). It measures the proportion of positive predictions that were actually correct."
	},
	{
	"question": "What is recall (sensitivity) formula?",
	"options": [
	"TP / (TP + FP)",
	"TN / (TN + FN)",
	"TP / (TP + FN)",
	"TN / (TN + FP)"
	],
	"correct": 2,
	"explanation": "Recall = TP / (TP + FN). It measures the proportion of actual positives that were correctly identified."
	},
	{
	"question": "What is specificity?",
	"options": [
	"TP / (TP + FN)",
	"TN / (TN + FP)",
	"TP / (TP + FP)",
	"FN / (FN + TP)"
	],
	"correct": 1,
	"explanation": "Specificity = TN / (TN + FP). It measures the proportion of actual negatives that were correctly identified."
	},
	{
	"question": "If Precision = 0.8 and Recall = 0.6, what is the F1-score?",
	"options": ["0.7", "0.686", "0.75", "0.667"],
	"correct": 1,
	"explanation": "F1-score = 2 × (Precision × Recall) / (Precision + Recall) = 2 × (0.8 × 0.6) / (0.8 + 0.6) = 0.96 / 1.4 ≈ 0.686"
	},
	{
	"question": "What does PPV stand for and how is it related to precision?",
	"options": [
	"Positive Predictive Value; it's different from precision",
	"Positive Predictive Value; it's the same as precision",
	"Previous Positive Value; it's unrelated to precision",
	"Probable Positive Variance; it's the inverse of precision"
	],
	"correct": 1,
	"explanation": "PPV (Positive Predictive Value) is exactly the same as precision: TP / (TP + FP)."
	},
	{
	"question": "What is the formula for NPV (Negative Predictive Value)?",
	"options": [
	"TN / (TN + FP)",
	"TN / (TN + FN)",
	"FN / (FN + TN)",
	"FP / (FP + TN)"
	],
	"correct": 1,
	"explanation": "NPV = TN / (TN + FN). It measures how many of the negative predictions were actually correct."
	},
	{
	"question": "What is the False Positive Rate (FPR) formula?",
	"options": [
	"FP / (FP + TP)",
	"FP / (FP + TN)",
	"FN / (FN + TP)",
	"TN / (TN + FP)"
	],
	"correct": 1,
	"explanation": "FPR = FP / (FP + TN). It represents the proportion of actual negatives that were incorrectly classified as positive."
	},
	{
	"question": "What is the relationship between FPR and specificity?",
	"options": [
	"FPR = Specificity",
	"FPR = 1 - Specificity",
	"FPR = 2 × Specificity",
	"They are unrelated"
	],
	"correct": 1,
	"explanation": "FPR = 1 - Specificity. Since Specificity = TN/(TN+FP) and FPR = FP/(FP+TN), they are complementary."
	},
	{
	"question": "If TP=50, FP=10, FN=20, TN=120, what is the precision?",
	"options": ["0.714", "0.833", "0.857", "0.625"],
	"correct": 1,
	"explanation": "Precision = TP / (TP + FP) = 50 / (50 + 10) = 50/60 = 0.833"
	},
	{
	"question": "What does a high precision but low recall indicate?",
	"options": [
	"Model makes many false positives",
	"Model is very conservative in positive predictions",
	"Model is very liberal in positive predictions",
	"Model has high accuracy"
	],
	"correct": 1,
	"explanation": "High precision with low recall means the model is conservative - when it predicts positive, it's usually right, but it misses many actual positives."
	},
	{
	"question": "What is the harmonic mean of precision and recall called?",
	"options": ["Accuracy", "F1-score", "Specificity", "AUC"],
	"correct": 1,
	"explanation": "The F1-score is the harmonic mean of precision and recall: 2 × (precision × recall) / (precision + recall)."
	},
	{
	"question": "What is the balanced accuracy formula?",
	"options": [
	"(TP + TN) / (TP + TN + FP + FN)",
	"(Sensitivity + Specificity) / 2",
	"(Precision + Recall) / 2",
	"√(Sensitivity × Specificity)"
	],
	"correct": 1,
	"explanation": "Balanced accuracy = (Sensitivity + Specificity) / 2. It gives equal weight to both true positive and true negative rates."
	},
	{
	"question": "If you want to minimize false positives, which metric should you optimize?",
	"options": ["Recall", "Precision", "Accuracy", "F1-score"],
	"correct": 1,
	"explanation": "To minimize false positives, optimize precision. Higher precision means fewer false positives relative to true positives."
	},
	{
	"question": "What is the precision-recall trade-off?",
	"options": [
	"Increasing precision always increases recall",
	"Precision and recall are independent",
	"Improving one often decreases the other",
	"They always sum to 1"
	],
	"correct": 2,
	"explanation": "There's typically a trade-off: being more selective (higher precision) often means missing more cases (lower recall) and vice versa."
	},
	{
	"question": "If recall = 1.0, what does this mean?",
	"options": [
	"All predictions were correct",
	"All positive predictions were correct",
	"All actual positives were found",
	"No false positives occurred"
	],
	"correct": 2,
	"explanation": "Recall = 1.0 means FN = 0, so all actual positive cases were correctly identified. However, there might still be false positives."
	}
	],
	"advanced": [
	{
	"question": "In a multi-class confusion matrix with 5 classes, how many cells will there be?",
	"options": ["10", "20", "25", "15"],
	"correct": 2,
	"explanation": "For n classes, the confusion matrix will be n×n. So for 5 classes: 5×5 = 25 cells."
	},
	{
	"question": "What is macro-averaged precision in multi-class classification?",
	"options": [
	"Sum of all TP divided by sum of all (TP + FP)",
	"Average of precision scores for each class",
	"Weighted average based on class frequency",
	"Precision of the majority class only"
	],
	"correct": 1,
	"explanation": "Macro-averaged precision calculates precision for each class separately, then takes the unweighted average of these precision scores."
	},
	{
	"question": "What's the difference between macro and micro averaging?",
	"options": [
	"Macro weights by class size, micro doesn't",
	"Micro weights by class size, macro doesn't",
	"Macro averages class-wise metrics, micro aggregates globally",
	"No difference, they're the same"
	],
	"correct": 2,
	"explanation": "Macro-averaging calculates metrics for each class and averages them. Micro-averaging aggregates all TP, FP, FN globally first, then calculates metrics."
	},
	{
	"question": "In an imbalanced dataset (95% negative, 5% positive), which metric is most misleading?",
	"options": ["Precision", "Recall", "F1-score", "Accuracy"],
	"correct": 3,
	"explanation": "Accuracy can be misleading in imbalanced datasets. A model predicting all negatives would achieve 95% accuracy but be useless for detecting the minority class."
	},
	{
	"question": "What is the Matthews Correlation Coefficient (MCC) range?",
	"options": ["[0, 1]", "[-1, 1]", "[0, ∞]", "[-∞, ∞]"],
	"correct": 1,
	"explanation": "MCC ranges from -1 to +1, where +1 represents perfect prediction, 0 represents random prediction, and -1 represents total disagreement."
	},
	{
	"question": "What is Cohen's Kappa used for in the context of confusion matrices?",
	"options": [
	"Measuring classification accuracy",
	"Measuring agreement beyond chance",
	"Calculating feature importance",
	"Optimizing threshold values"
	],
	"correct": 1,
	"explanation": "Cohen's Kappa measures inter-rater agreement for classification, accounting for agreement that could occur by chance alone."
	},
	{
	"question": "In multi-class classification, what is the one-vs-rest (OvR) approach?",
	"options": [
	"Training one model for all classes simultaneously",
	"Training separate binary classifiers for each class",
	"Using only one class as positive",
	"Removing the largest class"
	],
	"correct": 1,
	"explanation": "One-vs-Rest trains separate binary classifiers for each class, treating that class as positive and all others as negative."
	},
	{
	"question": "What is class-wise precision in a multi-class setting?",
	"options": [
	"Overall precision across all classes",
	"Precision calculated for each individual class",
	"Precision of the most frequent class",
	"Average precision weighted by class size"
	],
	"correct": 1,
	"explanation": "Class-wise precision calculates precision separately for each class by treating it as the positive class in binary classification."
	},
	{
	"question": "What does micro-averaged F1-score equal in multi-class classification?",
	"options": [
	"Macro-averaged F1-score",
	"Weighted F1-score",
	"Accuracy",
	"Balanced accuracy"
	],
	"correct": 2,
	"explanation": "In multi-class classification, micro-averaged F1-score equals accuracy because micro-averaged precision and recall are both equal to accuracy."
	},
	{
	"question": "What is the support of a class in classification metrics?",
	"options": [
	"Number of correct predictions for that class",
	"Number of actual instances of that class",
	"Prediction confidence for that class",
	"Model accuracy for that class"
	],
	"correct": 1,
	"explanation": "Support refers to the number of actual instances (samples) of each class in the dataset."
	},
	{
	"question": "In cost-sensitive learning, how might you modify confusion matrix interpretation?",
	"options": [
	"Weight errors by their associated costs",
	"Only consider diagonal elements",
	"Normalize by class frequency",
	"Use only precision and recall"
	],
	"correct": 0,
	"explanation": "Cost-sensitive learning assigns different costs to different types of errors, so you weight confusion matrix elements by their associated costs."
	},
	{
	"question": "What is the F-beta score and how does it relate to F1?",
	"options": [
	"F-beta is always higher than F1",
	"F-beta weights precision and recall differently based on beta",
	"F-beta is the multi-class version of F1",
	"F-beta and F1 are the same"
	],
	"correct": 1,
	"explanation": "F-beta score allows weighting precision and recall differently. When beta=1, F-beta equals F1. Beta>1 favors recall, beta<1 favors precision."
	},
	{
	"question": "What is stratified sampling important for when evaluating confusion matrices?",
	"options": [
	"Reducing computation time",
	"Maintaining class distribution in train/test splits",
	"Increasing model accuracy",
	"Reducing overfitting"
	],
	"correct": 1,
	"explanation": "Stratified sampling ensures that the class distribution in training and test sets matches the original dataset, leading to more reliable confusion matrix evaluation."
	},
	{
	"question": "In ROC analysis, what confusion matrix components determine the ROC curve?",
	"options": [
	"Precision and Recall",
	"TPR and FPR",
	"Accuracy and F1-score",
	"Sensitivity and NPV"
	],
	"correct": 1,
	"explanation": "ROC curves plot True Positive Rate (TPR = Recall) vs False Positive Rate (FPR) at various threshold settings."
	},
	{
	"question": "What is the no-skill classifier baseline for a balanced binary classification problem?",
	"options": [
	"50% accuracy",
	"Random predictions with 50% positive rate",
	"Always predict majority class",
	"Both A and B"
	],
	"correct": 3,
	"explanation": "For balanced binary classification, a no-skill classifier achieves 50% accuracy, which can be achieved by random predictions with 50% positive rate."
	}
	]
	}

	class QuizState:
	def __init__(self):
	self.current_question = None
	self.score = 0
	self.total_questions = 0
	self.difficulty = "basic"
	self.answered = False
	self.used_questions = set() # Track used question indices
	self.available_questions = [] # Track available questions for current difficulty

	def reset(self):
	self.current_question = None
	self.score = 0
	self.total_questions = 0
	self.answered = False
	self.used_questions = set()
	self.available_questions = []

	quiz_state = QuizState()

	def get_random_question(difficulty):
	"""Get a random question from the specified difficulty level, avoiding recent repeats."""
	questions = QUESTIONS[difficulty]

	# If we've used all questions or this is a new difficulty, reset the available questions
	if not quiz_state.available_questions or quiz_state.difficulty != difficulty:
	quiz_state.available_questions = list(range(len(questions)))
	quiz_state.used_questions = set()
	quiz_state.difficulty = difficulty

	# If we've used more than 80% of questions, reset to avoid running out
	if len(quiz_state.used_questions) >= len(questions) * 0.8:
	quiz_state.used_questions = set()
	quiz_state.available_questions = list(range(len(questions)))

	# Get available questions (not recently used)
	available_indices = [i for i in quiz_state.available_questions if i not in quiz_state.used_questions]

	# If no available questions, reset
	if not available_indices:
	quiz_state.used_questions = set()
	available_indices = list(range(len(questions)))

	# Choose random question from available ones
	chosen_index = random.choice(available_indices)
	quiz_state.used_questions.add(chosen_index)

	return questions[chosen_index]

	def start_quiz(difficulty):
	"""Start a new quiz with specified difficulty."""
	quiz_state.reset()
	quiz_state.difficulty = difficulty
	quiz_state.current_question = get_random_question(difficulty)
	quiz_state.answered = False

	question_text = f"Question {quiz_state.total_questions + 1} (Difficulty: {difficulty.title()})\n\n{quiz_state.current_question['question']}"

	return (
	question_text,
	gr.Radio(choices=quiz_state.current_question['options'], value=None, interactive=True),
	"", # explanation
	f"Score: {quiz_state.score}/{quiz_state.total_questions}",
	gr.Button("Submit Answer", interactive=True),
	gr.Button("Next Question", interactive=False)
	)

	def submit_answer(selected_option):
	"""Submit and check the answer."""
	if quiz_state.answered or not quiz_state.current_question:
	return submit_answer_outputs()

	quiz_state.total_questions += 1
	quiz_state.answered = True

	if selected_option is None:
	explanation = "❌ No answer selected!\n\n" + f"Correct Answer: {quiz_state.current_question['options'][quiz_state.current_question['correct']]}\n\nExplanation: {quiz_state.current_question['explanation']}"
	else:
	selected_index = quiz_state.current_question['options'].index(selected_option)
	if selected_index == quiz_state.current_question['correct']:
	quiz_state.score += 1
	explanation = f"✅ Correct!\n\nExplanation: {quiz_state.current_question['explanation']}"
	else:
	explanation = f"❌ Incorrect!\n\nYour Answer: {selected_option}\nCorrect Answer: {quiz_state.current_question['options'][quiz_state.current_question['correct']]}\n\nExplanation: {quiz_state.current_question['explanation']}"

	score_text = f"Score: {quiz_state.score}/{quiz_state.total_questions}"

	return (
	explanation,
	score_text,
	gr.Button("Submit Answer", interactive=False),
	gr.Button("Next Question", interactive=True)
	)

	def submit_answer_outputs():
	"""Return current state outputs for submit answer."""
	if quiz_state.current_question:
	explanation = f"Explanation: {quiz_state.current_question['explanation']}"
	else:
	explanation = ""

	score_text = f"Score: {quiz_state.score}/{quiz_state.total_questions}"

	return (
	explanation,
	score_text,
	gr.Button("Submit Answer", interactive=False),
	gr.Button("Next Question", interactive=True)
	)

	def next_question():
	"""Load the next question."""
	quiz_state.current_question = get_random_question(quiz_state.difficulty)
	quiz_state.answered = False

	question_text = f"Question {quiz_state.total_questions + 1} (Difficulty: {quiz_state.difficulty.title()})\n\n{quiz_state.current_question['question']}"

	return (
	question_text,
	gr.Radio(choices=quiz_state.current_question['options'], value=None, interactive=True),
	"", # explanation
	gr.Button("Submit Answer", interactive=True),
	gr.Button("Next Question", interactive=False)
	)

	# Create Gradio interface
	with gr.Blocks(title="Confusion Matrix Quiz", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🧠 Confusion Matrix Interview Quiz

	Test your knowledge of confusion matrices with questions ranging from basic concepts to advanced topics!

	Question Bank: 15 questions per difficulty level (45 total)
	- Basic: Fundamentals, definitions, simple calculations
	- Intermediate: Metrics, formulas, trade-offs
	- Advanced: Multi-class, statistical measures, real-world applications

	Choose your difficulty level and start practicing for your data science interviews.
	""")

	with gr.Row():
	with gr.Column(scale=2):
	difficulty_selector = gr.Radio(
	choices=["basic", "intermediate", "advanced"],
	value="basic",
	label="Select Difficulty Level",
	info="Choose your preferred difficulty level"
	)

	start_btn = gr.Button("🚀 Start Quiz", variant="primary", size="lg")

	question_display = gr.Markdown("Click 'Start Quiz' to begin!", visible=True)

	answer_options = gr.Radio(
	choices=[],
	label="Select your answer:",
	visible=False,
	interactive=True
	)

	with gr.Row():
	submit_btn = gr.Button("Submit Answer", interactive=False, visible=False)
	next_btn = gr.Button("Next Question", interactive=False, visible=False)

	with gr.Column(scale=1):
	score_display = gr.Markdown("Score: 0/0")
	explanation_display = gr.Markdown("")

	# Event handlers
	start_btn.click(
	fn=start_quiz,
	inputs=[difficulty_selector],
	outputs=[question_display, answer_options, explanation_display, score_display, submit_btn, next_btn]
	).then(
	lambda: [gr.Radio(visible=True), gr.Button(visible=True), gr.Button(visible=True)],
	outputs=[answer_options, submit_btn, next_btn]
	)

	submit_btn.click(
	fn=submit_answer,
	inputs=[answer_options],
	outputs=[explanation_display, score_display, submit_btn, next_btn]
	)

	next_btn.click(
	fn=next_question,
	outputs=[question_display, answer_options, explanation_display, submit_btn, next_btn]
	)

	if __name__ == "__main__":
	demo.launch()