Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

File size: 45,505 Bytes

0d00d62

{
  "title": "Naive Bayes Mastery: 100 MCQs",
  "description": "A comprehensive set of 100 multiple-choice questions to test and deepen your understanding of Naive Bayes classifiers, from fundamental concepts to advanced real-world applications and challenges.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is the core assumption of Naive Bayes?",
      "options": [
        "All classes have equal probability",
        "The dataset is balanced",
        "Features are correlated",
        "Features are independent given the class label"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Naive Bayes assumes conditional independence of features given the class, which simplifies probability computation."
    },
    {
      "id": 2,
      "questionText": "Which theorem is Naive Bayes based on?",
      "options": [
        "Markov Theorem",
        "Pythagoras Theorem",
        "Central Limit Theorem",
        "Bayes' Theorem"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Naive Bayes uses Bayes’ Theorem to compute posterior probabilities for classification."
    },
    {
      "id": 3,
      "questionText": "In Naive Bayes, what is the 'prior probability'?",
      "options": [
        "Probability of each class before observing features",
        "Probability of features given the class",
        "Probability of misclassification",
        "Conditional probability of test data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "The prior is the initial probability of each class based on the training dataset."
    },
    {
      "id": 4,
      "questionText": "Which type of Naive Bayes is suitable for text data?",
      "options": [
        "Gaussian Naive Bayes",
        "Bernoulli Naive Bayes",
        "Multinomial Naive Bayes",
        "Poisson Naive Bayes"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Multinomial NB works well for text features, as it handles word frequencies."
    },
    {
      "id": 5,
      "questionText": "Which Naive Bayes variant is used for binary features?",
      "options": [
        "Gaussian Naive Bayes",
        "Bernoulli Naive Bayes",
        "Poisson Naive Bayes",
        "Multinomial Naive Bayes"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Bernoulli NB models binary presence/absence features effectively."
    },
    {
      "id": 6,
      "questionText": "In Gaussian Naive Bayes, features are assumed to follow which distribution?",
      "options": [
        "Uniform distribution",
        "Normal (Gaussian) distribution",
        "Exponential distribution",
        "Poisson distribution"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Gaussian NB models continuous features using a normal distribution."
    },
    {
      "id": 7,
      "questionText": "What is 'likelihood' in Naive Bayes?",
      "options": [
        "Probability of features given the class",
        "Posterior probability",
        "Prior probability",
        "Probability of the class given features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Likelihood is P(features|class) used in Bayes’ formula to compute posterior probability."
    },
    {
      "id": 8,
      "questionText": "Which probability does Naive Bayes calculate to make predictions?",
      "options": [
        "Prior probability only",
        "Posterior probability P(class|features)",
        "Feature probability only",
        "Joint probability of all classes"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Naive Bayes computes the posterior probability for each class and selects the class with the highest value."
    },
    {
      "id": 9,
      "questionText": "Why is it called 'Naive' Bayes?",
      "options": [
        "Because it is simple to implement",
        "Because it assumes feature independence",
        "Because it only works on small datasets",
        "Because it ignores class labels"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The method is 'naive' due to its strong assumption that features are independent given the class."
    },
    {
      "id": 10,
      "questionText": "Which metric is commonly used to evaluate Naive Bayes classifiers?",
      "options": [
        "R-squared value",
        "Euclidean distance",
        "Accuracy, Precision, Recall, F1-score",
        "Mean squared error"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Classification metrics like accuracy, precision, recall, and F1-score are used to evaluate Naive Bayes performance."
    },
    {
      "id": 11,
      "questionText": "Scenario: You have continuous features with Gaussian distribution. Which Naive Bayes variant is suitable?",
      "options": [
        "Bernoulli Naive Bayes",
        "Gaussian Naive Bayes",
        "Multinomial Naive Bayes",
        "Poisson Naive Bayes"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Gaussian NB models continuous features using the mean and variance of each class."
    },
    {
      "id": 12,
      "questionText": "Scenario: Your dataset has counts of words per document. Which Naive Bayes is ideal?",
      "options": [
        "Bernoulli Naive Bayes",
        "Multinomial Naive Bayes",
        "Gaussian Naive Bayes",
        "Poisson Naive Bayes"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Multinomial NB handles discrete count data such as word frequencies."
    },
    {
      "id": 13,
      "questionText": "Scenario: You have binary features indicating presence or absence. Which Naive Bayes type should you use?",
      "options": [
        "Gaussian Naive Bayes",
        "Multinomial Naive Bayes",
        "Bernoulli Naive Bayes",
        "Poisson Naive Bayes"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Bernoulli NB is suitable for binary features."
    },
    {
      "id": 14,
      "questionText": "Which problem arises if a feature has zero probability in training data?",
      "options": [
        "Likelihood is unaffected",
        "Accuracy increases",
        "Prior probability changes",
        "Posterior becomes zero, causing prediction failure"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Zero probability leads to a posterior of zero. Laplace smoothing is used to avoid this."
    },
    {
      "id": 15,
      "questionText": "What is Laplace smoothing used for in Naive Bayes?",
      "options": [
        "To normalize features",
        "To handle zero probabilities",
        "To scale continuous features",
        "To reduce dimensionality"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Laplace smoothing adds a small value to feature counts to avoid zero probabilities."
    },
    {
      "id": 16,
      "questionText": "Scenario: You apply Naive Bayes to a spam detection problem. What is the target variable?",
      "options": [
        "Document length",
        "Email class (spam or not spam)",
        "Feature importance",
        "Word frequency"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The target variable is the class label to predict, e.g., spam or ham."
    },
    {
      "id": 17,
      "questionText": "Scenario: In text classification, why do we use log probabilities in Naive Bayes?",
      "options": [
        "To prevent underflow from multiplying many small probabilities",
        "To ignore irrelevant words",
        "To increase accuracy",
        "To normalize features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Log probabilities convert multiplication into addition, avoiding numerical underflow."
    },
    {
      "id": 18,
      "questionText": "Which is a limitation of Naive Bayes?",
      "options": [
        "Cannot handle categorical data",
        "Requires large datasets only",
        "Does not use prior probabilities",
        "Assumes feature independence which is often violated"
      ],
      "correctAnswerIndex": 3,
      "explanation": "The independence assumption may not hold, potentially reducing accuracy."
    },
    {
      "id": 19,
      "questionText": "Which scenario favors Naive Bayes despite its independence assumption?",
      "options": [
        "Complex regression tasks",
        "Time-series prediction",
        "Text classification",
        "Image classification with correlated pixels"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Naive Bayes performs surprisingly well for text classification even when features are not fully independent."
    },
    {
      "id": 20,
      "questionText": "Which term in Bayes’ theorem represents evidence?",
      "options": [
        "P(features|class)",
        "P(class)",
        "P(class|features)",
        "P(features)"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Evidence is P(features), used to normalize posterior probabilities."
    },
    {
      "id": 21,
      "questionText": "Scenario: You want to classify news articles. Which preprocessing step helps Naive Bayes?",
      "options": [
        "Ignoring word frequencies",
        "Tokenization and stop-word removal",
        "Adding irrelevant words",
        "Random feature shuffling"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Tokenization and stop-word removal reduce noise and improve feature quality."
    },
    {
      "id": 22,
      "questionText": "Scenario: You notice some features dominate predictions. What can help?",
      "options": [
        "Add Laplace smoothing",
        "Increase k",
        "Remove prior probabilities",
        "Feature scaling or normalization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Scaling features ensures no single feature dominates posterior computation."
    },
    {
      "id": 23,
      "questionText": "Which is a benefit of Naive Bayes?",
      "options": [
        "Handles missing values automatically",
        "Works only on balanced datasets",
        "Fast to train and predict",
        "Always accurate"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Naive Bayes is computationally efficient and works well with large datasets."
    },
    {
      "id": 24,
      "questionText": "Scenario: Multinomial Naive Bayes is applied to short text documents. What could help?",
      "options": [
        "TF-IDF feature weighting",
        "Use raw counts only",
        "Ignore feature scaling",
        "Shuffle classes randomly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "TF-IDF emphasizes informative words and improves classification accuracy."
    },
    {
      "id": 25,
      "questionText": "Scenario: You apply Gaussian NB but features are not Gaussian. What is likely?",
      "options": [
        "Model may underperform",
        "Features are transformed automatically",
        "Posterior probabilities are exact",
        "Accuracy improves"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Gaussian NB assumes normal distribution; violations can reduce accuracy."
    },
    {
      "id": 26,
      "questionText": "Which step avoids zero probability for unseen feature values in training?",
      "options": [
        "Laplace smoothing",
        "Feature scaling",
        "Normalization only",
        "Random shuffling"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Laplace smoothing adds a small constant to feature counts."
    },
    {
      "id": 27,
      "questionText": "Scenario: Two classes have very different sample sizes. Which helps?",
      "options": [
        "Setting all priors equal",
        "Random shuffling",
        "Using priors proportional to class frequencies",
        "Ignoring class sizes"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Prior probabilities account for class imbalance in prediction."
    },
    {
      "id": 28,
      "questionText": "Scenario: Features are correlated. What is the effect on Naive Bayes?",
      "options": [
        "Posterior probabilities remain exact",
        "Independence assumption is violated, may reduce accuracy",
        "Model ignores correlation automatically",
        "Accuracy improves"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Naive Bayes assumes independence; correlations can reduce prediction reliability."
    },
    {
      "id": 29,
      "questionText": "Which probability is directly used to choose class label in Naive Bayes?",
      "options": [
        "Likelihood only",
        "Evidence only",
        "Posterior probability",
        "Prior probability only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Class with highest posterior probability is chosen as prediction."
    },
    {
      "id": 30,
      "questionText": "Scenario: Naive Bayes is applied to multi-class classification. How is prediction done?",
      "options": [
        "Compute posterior for each class and select maximum",
        "Use only the first class",
        "Choose class randomly",
        "Average class probabilities"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Posterior probabilities are computed for each class; the one with the highest is selected."
    },
    {
      "id": 31,
      "questionText": "Scenario: In email spam detection, which feature representation works best with Multinomial NB?",
      "options": [
        "Random numbers",
        "Raw characters",
        "Word count or TF-IDF vectors",
        "Binary features only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Multinomial NB handles count-based features like word frequencies effectively."
    },
    {
      "id": 32,
      "questionText": "Which smoothing method prevents zero probability in Naive Bayes?",
      "options": [
        "Z-score normalization",
        "PCA",
        "Laplace smoothing",
        "Min-max scaling"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Laplace smoothing adds a small value to feature counts, avoiding zero probability for unseen features."
    },
    {
      "id": 33,
      "questionText": "Scenario: You have continuous features with non-Gaussian distribution. Which strategy is suitable?",
      "options": [
        "Use Bernoulli NB",
        "Discretize features or use kernel density estimation",
        "Ignore feature distribution",
        "Use Gaussian NB without changes"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Discretization or kernel density estimation allows NB to handle non-Gaussian continuous data."
    },
    {
      "id": 34,
      "questionText": "Which assumption does Multinomial Naive Bayes make about features?",
      "options": [
        "All features are binary",
        "Features are correlated",
        "Features represent counts/frequencies and are independent",
        "Features are continuous"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Multinomial NB assumes independent counts/frequencies for each feature per class."
    },
    {
      "id": 35,
      "questionText": "Scenario: You apply Naive Bayes to a dataset with missing categorical features. What is an effective approach?",
      "options": [
        "Use Gaussian NB",
        "Replace with random values",
        "Ignore missing data",
        "Treat missing values as a separate category"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Treating missing data as a separate category allows NB to include them in probability computation."
    },
    {
      "id": 36,
      "questionText": "Scenario: You apply Laplace smoothing with alpha=1. What does alpha control?",
      "options": [
        "Amount added to feature counts to avoid zero probability",
        "Learning rate",
        "Number of neighbors",
        "Feature scaling factor"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Alpha determines the additive smoothing applied to counts to handle unseen feature values."
    },
    {
      "id": 37,
      "questionText": "Scenario: Two features are highly correlated. How does Naive Bayes handle this?",
      "options": [
        "Weights one feature higher",
        "Automatically removes one feature",
        "Merges features into one",
        "Assumes independence; predictions may be biased"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Naive Bayes ignores correlation, which may reduce accuracy in such cases."
    },
    {
      "id": 38,
      "questionText": "Scenario: Using Naive Bayes for sentiment analysis, what preprocessing step helps?",
      "options": [
        "Tokenization, stop-word removal, and stemming",
        "Shuffling words randomly",
        "Ignoring word frequencies",
        "Using raw text only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Text preprocessing ensures features are meaningful and reduces noise."
    },
    {
      "id": 39,
      "questionText": "Scenario: A new category appears in testing data unseen in training. What happens?",
      "options": [
        "Class is automatically ignored",
        "Prediction remains correct",
        "Posterior probability becomes zero unless smoothed",
        "Naive Bayes creates a new class"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Without smoothing, unseen feature categories lead to zero probability and failed predictions."
    },
    {
      "id": 40,
      "questionText": "Scenario: Features are categorical with many levels. What helps Naive Bayes performance?",
      "options": [
        "Merging all categories",
        "Feature encoding and smoothing",
        "Ignoring levels",
        "Using Gaussian NB"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Encoding categorical features and smoothing probability estimates improves performance."
    },
    {
      "id": 41,
      "questionText": "Scenario: Naive Bayes applied to multi-class document classification. How is probability computed?",
      "options": [
        "Equal probability for all classes",
        "Posterior probability for each class using prior and likelihood",
        "Only consider the first class",
        "Random selection of class"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Posterior is computed for each class and the highest is selected."
    },
    {
      "id": 42,
      "questionText": "Scenario: You have imbalanced classes. How to adjust Naive Bayes?",
      "options": [
        "Ignore imbalance",
        "Reduce feature counts",
        "Use class priors reflecting class frequencies",
        "Increase smoothing arbitrarily"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Setting class priors helps account for imbalance in predictions."
    },
    {
      "id": 43,
      "questionText": "Scenario: Naive Bayes is applied to continuous and categorical features together. Strategy?",
      "options": [
        "Ignore categorical features",
        "Use only Multinomial NB",
        "Use Gaussian NB for continuous, Multinomial/Bernoulli NB for categorical",
        "Use only Gaussian NB"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Different variants can be combined for mixed-type features."
    },
    {
      "id": 44,
      "questionText": "Scenario: High-dimensional text data causes overfitting. What helps?",
      "options": [
        "Feature selection or dimensionality reduction",
        "Ignore rare words",
        "Increase Laplace alpha",
        "Random shuffling of features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Selecting important features reduces overfitting and improves generalization."
    },
    {
      "id": 45,
      "questionText": "Scenario: Two words always appear together in class A. Effect on Naive Bayes?",
      "options": [
        "Posterior probabilities unaffected",
        "One word ignored",
        "Independence assumption violated; may affect accuracy",
        "Model handles correlation automatically"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Correlated features violate independence, potentially reducing prediction reliability."
    },
    {
      "id": 46,
      "questionText": "Scenario: Naive Bayes is slow with large vocabulary. What helps?",
      "options": [
        "Increase alpha arbitrarily",
        "Use raw counts only",
        "Feature selection or TF-IDF weighting",
        "Shuffle training data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing feature size or weighting reduces computation and improves performance."
    },
    {
      "id": 47,
      "questionText": "Scenario: Text classification with short documents. Which variant works best?",
      "options": [
        "Poisson NB",
        "Bernoulli NB with raw counts",
        "Multinomial NB with TF-IDF or word counts",
        "Gaussian NB"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Short text benefits from count-based Multinomial NB representation."
    },
    {
      "id": 48,
      "questionText": "Scenario: Feature appears in all classes equally. Effect?",
      "options": [
        "Feature dominates prediction",
        "Feature does not help in discriminating classes",
        "Posterior probability increases",
        "Naive Bayes ignores automatically"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Features with equal probability across classes do not contribute to classification."
    },
    {
      "id": 49,
      "questionText": "Scenario: Multinomial NB predicts probabilities 0.7 for class A and 0.3 for class B. Decision?",
      "options": [
        "Choose class B",
        "Average the classes",
        "Random selection",
        "Choose class A"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Naive Bayes selects the class with the highest posterior probability."
    },
    {
      "id": 50,
      "questionText": "Scenario: Features are sparse with many zeros. Which is preferred?",
      "options": [
        "Use raw dense arrays only",
        "Gaussian NB",
        "Multinomial or Bernoulli NB with sparse representation",
        "Ignore zeros"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Sparse-friendly NB variants handle high-dimensional sparse data efficiently."
    },
    {
      "id": 51,
      "questionText": "Scenario: You want to explain predictions. Which Naive Bayes property helps?",
      "options": [
        "Posterior is ignored",
        "Model is a black box",
        "Prior probabilities are hidden",
        "Feature contributions are interpretable via conditional probabilities"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Conditional probabilities indicate which features most influence predictions."
    },
    {
      "id": 52,
      "questionText": "Scenario: Naive Bayes used on reviews. Some rare words exist. Solution?",
      "options": [
        "Normalize counts only",
        "Apply Laplace smoothing",
        "Ignore rare words",
        "Increase k arbitrarily"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Smoothing ensures rare or unseen words do not result in zero probability."
    },
    {
      "id": 53,
      "questionText": "Scenario: Features are normalized to 0-1. Effect on Multinomial NB?",
      "options": [
        "Feature scaling automatically helps",
        "Posterior probabilities unaffected",
        "Accuracy improves",
        "Counts should remain integer; normalization may reduce effectiveness"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Multinomial NB expects count data; normalization may distort probabilities."
    },
    {
      "id": 54,
      "questionText": "Scenario: You have continuous features. Which transformation may help Gaussian NB?",
      "options": [
        "Ignore continuous nature",
        "Binary encode features",
        "Log-transform to reduce skewness",
        "Shuffle values randomly"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Transforming skewed data closer to Gaussian improves model fit."
    },
    {
      "id": 55,
      "questionText": "Scenario: Two classes overlap heavily. Naive Bayes accuracy?",
      "options": [
        "Model ignores overlap",
        "Increases automatically",
        "Reduced due to similar likelihoods",
        "Independent features help perfectly"
      ],
      "correctAnswerIndex": 2,
      "explanation": "When classes overlap, posterior probabilities may be close, leading to misclassification."
    },
    {
      "id": 56,
      "questionText": "Scenario: You want to combine Gaussian and Multinomial features. Strategy?",
      "options": [
        "Use a hybrid NB model handling each type separately",
        "Use Gaussian NB for all",
        "Ignore one type",
        "Convert all to counts"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Hybrid NB allows handling mixed feature types properly."
    },
    {
      "id": 57,
      "questionText": "Scenario: Some features are highly informative, others noisy. Strategy?",
      "options": [
        "Keep all features",
        "Increase alpha",
        "Feature selection to keep informative features",
        "Randomly drop features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Selecting informative features improves classification and reduces noise influence."
    },
    {
      "id": 58,
      "questionText": "Scenario: Words with high frequency in all classes. Effect?",
      "options": [
        "Dominate prediction positively",
        "Provide little discrimination; may be removed",
        "Model ignores automatically",
        "Posterior probabilities increase"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Common words like 'the' or 'and' do not help differentiate classes."
    },
    {
      "id": 59,
      "questionText": "Scenario: Test data has unseen word features. What is required?",
      "options": [
        "Gaussian NB handles automatically",
        "Remove prior probabilities",
        "Ignore unseen words",
        "Apply Laplace smoothing"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Smoothing ensures unseen words do not produce zero posterior probability."
    },
    {
      "id": 60,
      "questionText": "Scenario: You want probabilities instead of class labels. Naive Bayes output?",
      "options": [
        "Only prior probability",
        "Only class label",
        "Posterior probability for each class",
        "Only likelihood"
      ],
      "correctAnswerIndex": 2,
      "explanation": "NB computes posterior probabilities, which can be used directly or thresholded for classification."
    },
    {
      "id": 61,
      "questionText": "Scenario: Words co-occur frequently within a class. Effect?",
      "options": [
        "Class probabilities unaffected",
        "NB ignores co-occurrence",
        "Independence assumption violated; may reduce accuracy",
        "Prediction improves automatically"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Correlated features violate NB assumption; may bias predictions."
    },
    {
      "id": 62,
      "questionText": "Scenario: Multiclass NB with 10 classes. How to predict?",
      "options": [
        "Compute posterior for each class; choose maximum",
        "Average class probabilities",
        "Random class selection",
        "Use only first class"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Posterior probabilities guide selection of most probable class."
    },
    {
      "id": 63,
      "questionText": "Scenario: Some features have very low variance. Effect on Gaussian NB?",
      "options": [
        "Model ignores feature automatically",
        "Posterior probability increases",
        "May have little impact; small variance reduces feature importance",
        "Feature dominates prediction"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Low-variance features contribute less to posterior probability."
    },
    {
      "id": 64,
      "questionText": "Scenario: Sparse categorical features with many unseen values. What helps?",
      "options": [
        "Randomly shuffle features",
        "Ignore rare categories",
        "Smoothing and proper encoding",
        "Use Gaussian NB"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Smoothing and encoding unseen categories allow proper posterior computation."
    },
    {
      "id": 65,
      "questionText": "Scenario: Words occur in multiple classes with similar frequency. Effect?",
      "options": [
        "Model ignores feature automatically",
        "Feature provides little discriminative power",
        "Posterior probabilities increase",
        "Feature dominates prediction"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Non-informative features do not help classification."
    },
    {
      "id": 66,
      "questionText": "Scenario: Features are scaled differently. Effect on Gaussian NB?",
      "options": [
        "Feature scaling ignored",
        "NB unaffected",
        "Scaling impacts Gaussian NB since variance and mean are computed per feature",
        "Posterior remains exact"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Scaling changes mean/variance; proper preprocessing ensures meaningful probabilities."
    },
    {
      "id": 67,
      "questionText": "Scenario: Class conditional distributions overlap. Accuracy?",
      "options": [
        "NB ignores overlap",
        "Reduced due to similar likelihoods",
        "Increases automatically",
        "Posterior probabilities exact"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Overlap reduces discriminative power, increasing misclassification."
    },
    {
      "id": 68,
      "questionText": "Scenario: Combining NB with feature selection. Effect?",
      "options": [
        "Reduces accuracy",
        "Prior probabilities change",
        "Reduces noise and improves accuracy",
        "Ignored features dominate"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Selecting important features improves model generalization."
    },
    {
      "id": 69,
      "questionText": "Scenario: Naive Bayes for movie genre prediction. Some features missing. Strategy?",
      "options": [
        "Gaussian NB only",
        "Ignore data row",
        "Randomly replace",
        "Treat missing as separate category or impute"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Missing categorical features are handled as separate category or imputed to compute posterior."
    },
    {
      "id": 70,
      "questionText": "Scenario: Rare feature appears in all classes equally. Impact?",
      "options": [
        "Feature contributes little to classification",
        "Feature dominates prediction",
        "Posterior probability increases",
        "NB ignores automatically"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Features with equal class frequency have minimal discriminative value."
    },
    {
      "id": 71,
      "questionText": "Scenario: You have highly imbalanced classes. What is a good strategy with Naive Bayes?",
      "options": [
        "Increase Laplace smoothing arbitrarily",
        "Use only majority class",
        "Ignore imbalance",
        "Adjust class priors according to class frequencies"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Adjusting class priors ensures the model accounts for imbalance in predictions."
    },
    {
      "id": 72,
      "questionText": "Scenario: Two features are strongly correlated. What is the effect on Naive Bayes?",
      "options": [
        "NB automatically decorrelates features",
        "Independence assumption violated; may reduce accuracy",
        "Posterior remains exact",
        "Accuracy improves"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Naive Bayes assumes independence. Correlated features may bias predictions."
    },
    {
      "id": 73,
      "questionText": "Scenario: You are predicting rare disease presence. Most patients are healthy. Which is critical?",
      "options": [
        "Class priors and threshold adjustment",
        "Use Gaussian NB for all",
        "Ignore rare class",
        "Increase feature counts"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Rare class predictions require careful handling of priors and decision thresholds."
    },
    {
      "id": 74,
      "questionText": "Scenario: Multi-class text classification with many rare words. Strategy?",
      "options": [
        "Ignore rare words",
        "Use Laplace smoothing and possibly TF-IDF",
        "Use Gaussian NB",
        "Shuffle features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Smoothing and weighting rare words prevents zero probabilities and improves generalization."
    },
    {
      "id": 75,
      "questionText": "Scenario: Continuous features are skewed. What improves Gaussian NB?",
      "options": [
        "Use Bernoulli NB instead",
        "Ignore skewness",
        "Log or Box-Cox transformation to approximate Gaussian distribution",
        "Normalize 0–1"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Transforming skewed features closer to Gaussian improves model assumptions and accuracy."
    },
    {
      "id": 76,
      "questionText": "Scenario: Text classification. Some words appear in every class equally. Effect?",
      "options": [
        "Dominates predictions",
        "NB ignores automatically",
        "Little discriminative value; may be removed",
        "Posterior increases"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Features with equal class frequency do not help differentiate classes."
    },
    {
      "id": 77,
      "questionText": "Scenario: Combining continuous and categorical features in one dataset. Strategy?",
      "options": [
        "Use hybrid NB (Gaussian for continuous, Multinomial/Bernoulli for categorical)",
        "Convert all to counts",
        "Ignore one type",
        "Use Gaussian NB only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Hybrid NB allows proper handling of mixed feature types."
    },
    {
      "id": 78,
      "questionText": "Scenario: Naive Bayes applied on streaming data with changing distributions. Strategy?",
      "options": [
        "Use Gaussian NB only",
        "Ignore distribution change",
        "Randomly drop old data",
        "Retrain periodically or use incremental NB"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Incremental learning or periodic retraining adapts to distribution shifts in streaming data."
    },
    {
      "id": 79,
      "questionText": "Scenario: High-dimensional sparse data. What optimization helps?",
      "options": [
        "Sparse representation and feature selection",
        "Shuffle features",
        "Increase Laplace alpha only",
        "Use raw dense matrix"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Sparse storage and feature selection reduce computation and memory use."
    },
    {
      "id": 80,
      "questionText": "Scenario: Words co-occur frequently within a class. Effect?",
      "options": [
        "Violates independence; may bias predictions",
        "Improves accuracy automatically",
        "NB ignores co-occurrence",
        "Posterior unchanged"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Correlated features violate NB assumption; predictions may be biased."
    },
    {
      "id": 81,
      "questionText": "Scenario: Large vocabulary with many zero-count features. How to handle?",
      "options": [
        "Use Laplace smoothing",
        "Remove zeros arbitrarily",
        "Use Gaussian NB",
        "Ignore rare features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Smoothing ensures zero-count features do not yield zero probability."
    },
    {
      "id": 82,
      "questionText": "Scenario: Test data has unseen feature categories. Solution?",
      "options": [
        "Apply Laplace smoothing or treat as unknown category",
        "Randomly assign values",
        "Use Gaussian NB",
        "Ignore unseen categories"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Smoothing allows unseen categories to be incorporated safely."
    },
    {
      "id": 83,
      "questionText": "Scenario: Overlapping class distributions. Naive Bayes accuracy?",
      "options": [
        "NB ignores overlap",
        "Posterior exact",
        "Improves automatically",
        "Reduced due to similar likelihoods"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Overlap reduces discriminative power, increasing misclassification risk."
    },
    {
      "id": 84,
      "questionText": "Scenario: NB output shows posterior probabilities 0.51 vs 0.49. Interpretation?",
      "options": [
        "Prediction is exact",
        "Model is uncertain; threshold adjustment may help",
        "Ignore probabilities",
        "Choose lower class"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Close probabilities indicate uncertainty; thresholds or confidence measures can improve decision-making."
    },
    {
      "id": 85,
      "questionText": "Scenario: Gaussian NB feature has extremely low variance. Effect?",
      "options": [
        "Posterior increases",
        "Feature dominates prediction",
        "Ignored automatically",
        "Feature contributes little; may be ignored"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Low-variance features have minimal impact on posterior probability."
    },
    {
      "id": 86,
      "questionText": "Scenario: Multi-class NB with 15 classes. Prediction method?",
      "options": [
        "Average probabilities",
        "Use only first class",
        "Compute posterior for each class and choose maximum",
        "Choose randomly"
      ],
      "correctAnswerIndex": 2,
      "explanation": "The class with the highest posterior probability is selected."
    },
    {
      "id": 87,
      "questionText": "Scenario: NB applied on mixed numeric and categorical features. Preprocessing?",
      "options": [
        "Ignore one feature type",
        "Normalize all",
        "Gaussian for numeric, Multinomial/Bernoulli for categorical",
        "Convert numeric to binary"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Proper variant selection ensures correct probability calculation."
    },
    {
      "id": 88,
      "questionText": "Scenario: Rare features appear in training but not testing. How to handle?",
      "options": [
        "Apply smoothing to prevent zero probability",
        "Randomly assign probabilities",
        "Ignore rare features",
        "Use Gaussian NB"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Smoothing prevents zero posterior for rare or unseen features."
    },
    {
      "id": 89,
      "questionText": "Scenario: Continuous features heavily skewed. Best approach?",
      "options": [
        "Convert to binary",
        "Use only categorical NB",
        "Log-transform to approximate Gaussian",
        "Ignore skewness"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Transforming skewed continuous features improves Gaussian NB assumptions."
    },
    {
      "id": 90,
      "questionText": "Scenario: Text classification with highly frequent words like 'the'. What should you do?",
      "options": [
        "Increase Laplace alpha",
        "Remove stop words",
        "Keep all words",
        "Randomly shuffle"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Stop-word removal prevents common non-informative words from dominating probabilities."
    },
    {
      "id": 91,
      "questionText": "Scenario: NB used on streaming data with evolving distribution. What helps?",
      "options": [
        "Ignore drift",
        "Use Gaussian NB only",
        "Incremental NB or periodic retraining",
        "Discard old data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Incremental learning adapts the model to changing feature distributions."
    },
    {
      "id": 92,
      "questionText": "Scenario: Words appear together in many documents (correlation). Effect?",
      "options": [
        "NB ignores correlation",
        "Posterior unaffected",
        "Violates independence; may reduce accuracy",
        "Improves accuracy"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Correlated features violate the conditional independence assumption."
    },
    {
      "id": 93,
      "questionText": "Scenario: Multi-class NB. One class has very few examples. Strategy?",
      "options": [
        "Use priors and smoothing to handle small classes",
        "Duplicate small class",
        "Ignore small class",
        "Remove features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Small classes require careful handling of priors and smoothing to avoid misclassification."
    },
    {
      "id": 94,
      "questionText": "Scenario: Mixed sparse and dense features. Optimization?",
      "options": [
        "Use sparse representation for sparse features",
        "Convert all to dense",
        "Ignore sparse features",
        "Use only Gaussian NB"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Sparse storage reduces memory and computation costs."
    },
    {
      "id": 95,
      "questionText": "Scenario: NB misclassifies some classes consistently. Probable cause?",
      "options": [
        "Posterior probabilities are exact",
        "Model ignores priors",
        "Independence assumption violated or poor feature selection",
        "Smoothing too high"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Feature correlation or irrelevant features can bias predictions."
    },
    {
      "id": 96,
      "questionText": "Scenario: Gaussian NB on features with large range differences. What is required?",
      "options": [
        "Keep raw values",
        "Apply Laplace smoothing",
        "Random shuffling",
        "Standardize or normalize features"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Feature scaling ensures Gaussian parameters are meaningful."
    },
    {
      "id": 97,
      "questionText": "Scenario: NB for sentiment analysis with short documents. Strategy?",
      "options": [
        "Poisson NB",
        "Use Multinomial NB with TF-IDF or counts",
        "Use Gaussian NB",
        "Bernoulli NB with raw counts"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Short text benefits from count-based representation."
    },
    {
      "id": 98,
      "questionText": "Scenario: Feature occurs frequently in all classes. Effect?",
      "options": [
        "NB ignores automatically",
        "Posterior probability increases",
        "Provides little discriminative power; may be removed",
        "Dominates prediction"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Non-informative features do not help classification."
    },
    {
      "id": 99,
      "questionText": "Scenario: NB applied on multi-lingual text. Strategy?",
      "options": [
        "Ignore language differences",
        "Merge all text blindly",
        "Separate feature sets per language or use language-independent features",
        "Use Gaussian NB"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Language-specific preprocessing ensures meaningful feature extraction."
    },
    {
      "id": 100,
      "questionText": "Scenario: You want to explain which features influenced prediction. Which NB property helps?",
      "options": [
        "Only prior matters",
        "Conditional probabilities show feature contributions",
        "Posterior probabilities ignored",
        "Model is black-box"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Conditional probabilities indicate how each feature contributes to the posterior probability."
    }
  ]
}