Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

File size: 54,521 Bytes

0d00d62

{
  "title": "Apriori Algorithm Mastery: 100 MCQs",
  "description": "A complete collection of 100 multiple-choice questions covering the Apriori algorithm — from basic concepts and definitions to medium-level implementation details and hard scenario-based applications in association rule mining.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is the primary purpose of the Apriori algorithm?",
      "options": [
        "To find linear relationships between continuous variables.",
        "To classify data into predefined categories.",
        "To reduce the dimensionality of data.",
        "To identify frequent itemsets and generate association rules."
      ],
      "correctAnswerIndex": 3,
      "explanation": "The Apriori algorithm is used in association rule mining to discover frequent itemsets from transactional datasets and generate rules that describe how items co-occur."
    },
    {
      "id": 2,
      "questionText": "The Apriori algorithm is mainly used in which domain?",
      "options": [
        "Regression Analysis",
        "Clustering",
        "Market Basket Analysis",
        "Image Classification"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Apriori is most commonly used in Market Basket Analysis to identify patterns in customer purchase behavior, such as 'If a person buys bread, they are likely to buy butter'."
    },
    {
      "id": 3,
      "questionText": "In Apriori, what does 'support' measure?",
      "options": [
        "The total number of items in a transaction.",
        "The probability that a rule is correct.",
        "The correlation between two attributes.",
        "The frequency of occurrence of an itemset in the dataset."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Support measures how frequently an itemset appears in the dataset, helping to identify item combinations that occur often enough to be considered interesting."
    },
    {
      "id": 4,
      "questionText": "What is 'confidence' in Apriori rule mining?",
      "options": [
        "A measure of how frequently items appear together.",
        "The probability that a transaction containing X also contains Y.",
        "The total number of transactions in the dataset.",
        "The likelihood that a rule is incorrect."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Confidence measures the reliability of a rule — the percentage of transactions containing X that also contain Y, for a rule X → Y."
    },
    {
      "id": 5,
      "questionText": "Which of the following statements best describes the Apriori property?",
      "options": [
        "Both (2) and (3)",
        "All supersets of a frequent itemset must also be frequent.",
        "All subsets of a frequent itemset must also be frequent.",
        "All supersets of an infrequent itemset must be infrequent."
      ],
      "correctAnswerIndex": 0,
      "explanation": "The Apriori property states that all non-empty subsets of a frequent itemset are also frequent, and conversely, any superset of an infrequent itemset must be infrequent."
    },
    {
      "id": 6,
      "questionText": "What does the 'minimum support threshold' control in Apriori?",
      "options": [
        "The minimum frequency required for an itemset to be considered frequent.",
        "The accuracy of association rules.",
        "The maximum number of items allowed per transaction.",
        "The minimum confidence of a rule."
      ],
      "correctAnswerIndex": 0,
      "explanation": "The minimum support threshold helps eliminate infrequent itemsets by setting a lower limit on how often an itemset must appear in the dataset to be considered frequent."
    },
    {
      "id": 7,
      "questionText": "Which data structure is most commonly used to store itemsets in the Apriori algorithm?",
      "options": [
        "Trees",
        "Stacks",
        "Linked lists",
        "Hash tables"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Hash tables are often used for efficient counting and storage of candidate itemsets during the support counting phase of the Apriori algorithm."
    },
    {
      "id": 8,
      "questionText": "Which of the following can cause Apriori to perform slowly on large datasets?",
      "options": [
        "It generates a large number of candidate itemsets.",
        "It uses recursive tree pruning.",
        "It requires labeled data.",
        "It ignores item frequency thresholds."
      ],
      "correctAnswerIndex": 0,
      "explanation": "Apriori can become computationally expensive on large datasets because it must generate and test many candidate itemsets at each iteration."
    },
    {
      "id": 9,
      "questionText": "Which of these best describes 'association rule mining'?",
      "options": [
        "Reducing data dimensions using PCA.",
        "Grouping similar data points into clusters.",
        "Finding correlations among items in transactional data.",
        "Predicting continuous outcomes using regression."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Association rule mining uncovers interesting relationships or correlations among items in transactional or relational datasets."
    },
    {
      "id": 10,
      "questionText": "Which algorithm improvement focuses on reducing candidate generation compared to Apriori?",
      "options": [
        "FP-Growth",
        "K-Means",
        "Naive Bayes",
        "Linear Regression"
      ],
      "correctAnswerIndex": 0,
      "explanation": "The FP-Growth algorithm improves upon Apriori by eliminating the costly candidate generation process, using a compressed tree structure (FP-Tree) instead."
    },
    {
      "id": 11,
      "questionText": "Who introduced the Apriori algorithm?",
      "options": [
        "Yann LeCun and Yoshua Bengio",
        "Andrew Ng and Geoffrey Hinton",
        "Rakesh Agrawal and Ramakrishnan Srikant",
        "Ian Goodfellow and Richard Sutton"
      ],
      "correctAnswerIndex": 2,
      "explanation": "The Apriori algorithm was proposed by Rakesh Agrawal and Ramakrishnan Srikant in 1994 for mining frequent itemsets and association rules from large transactional databases."
    },
    {
      "id": 12,
      "questionText": "What kind of dataset is Apriori typically applied to?",
      "options": [
        "Continuous numerical datasets",
        "Transactional datasets",
        "Time-series datasets",
        "Labeled datasets for classification"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Apriori works on transactional datasets, such as market basket data, where each transaction contains a set of items purchased together."
    },
    {
      "id": 13,
      "questionText": "Which of the following best represents an 'itemset'?",
      "options": [
        "A list of all customers",
        "A probability distribution",
        "A collection of items that occur together",
        "A single item in a transaction"
      ],
      "correctAnswerIndex": 2,
      "explanation": "An itemset is a collection of one or more items that appear together in a transaction. For example, {bread, butter, milk} is a 3-itemset."
    },
    {
      "id": 14,
      "questionText": "In Apriori, what is meant by a 'frequent itemset'?",
      "options": [
        "An itemset that has the highest lift value",
        "An itemset that meets or exceeds the minimum support threshold",
        "An itemset that appears only once",
        "An itemset that rarely appears in transactions"
      ],
      "correctAnswerIndex": 1,
      "explanation": "A frequent itemset is one that occurs frequently enough in the dataset to satisfy the minimum support threshold."
    },
    {
      "id": 15,
      "questionText": "What is the output of the Apriori algorithm?",
      "options": [
        "A list of association rules with their support and confidence values",
        "A set of clusters with centroids",
        "A regression line equation",
        "A confusion matrix"
      ],
      "correctAnswerIndex": 0,
      "explanation": "The Apriori algorithm outputs a set of association rules along with their corresponding support, confidence, and lift measures."
    },
    {
      "id": 16,
      "questionText": "What is the primary challenge Apriori faces with large datasets?",
      "options": [
        "High memory and computational cost due to candidate generation",
        "Lack of interpretability of rules",
        "It requires supervised learning labels",
        "Inability to handle continuous variables"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Apriori can become inefficient on large datasets because it generates and scans a huge number of candidate itemsets, consuming time and memory."
    },
    {
      "id": 17,
      "questionText": "Apriori algorithm uses which approach to find frequent itemsets?",
      "options": [
        "Bottom-up approach",
        "Top-down approach",
        "Random sampling",
        "Divide and conquer"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Apriori follows a bottom-up approach, where frequent subsets are extended one item at a time to form larger itemsets, as long as those subsets remain frequent."
    },
    {
      "id": 18,
      "questionText": "What does the 'Apriori' name signify?",
      "options": [
        "It refers to the priority order of transactions.",
        "It comes from the word 'prioritize'.",
        "It means it was developed first among other algorithms.",
        "It means the algorithm uses prior knowledge of frequent itemset properties."
      ],
      "correctAnswerIndex": 3,
      "explanation": "The term 'Apriori' refers to the use of prior knowledge — specifically, the property that all subsets of a frequent itemset must also be frequent."
    },
    {
      "id": 19,
      "questionText": "What is the typical input format for Apriori?",
      "options": [
        "A transactional dataset where each transaction is a list of items",
        "A time-series with timestamps",
        "A table of numeric values",
        "A matrix of continuous features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Apriori expects a dataset where each record represents a transaction containing a list of items purchased or present together."
    },
    {
      "id": 20,
      "questionText": "What type of learning does Apriori belong to?",
      "options": [
        "Semi-supervised learning",
        "Unsupervised learning",
        "Reinforcement learning",
        "Supervised learning"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Apriori is an unsupervised learning algorithm because it discovers hidden relationships or associations without labeled output variables."
    },
    {
      "id": 21,
      "questionText": "Which metric helps to measure the strength of association rules beyond support and confidence?",
      "options": [
        "Lift",
        "Precision",
        "Entropy",
        "Recall"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Lift measures how much more often X and Y occur together than expected if they were statistically independent, helping to identify strong rules."
    },
    {
      "id": 22,
      "questionText": "Which of the following statements about 'lift' is correct?",
      "options": [
        "Lift = 0 means perfect correlation.",
        "Lift = 1 means X and Y are independent.",
        "Lift < 1 means X and Y occur more often together.",
        "Lift > 1 means X and Y are negatively correlated."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A lift of 1 means that the occurrence of X has no effect on Y (they are independent). Lift > 1 suggests a positive association, and Lift < 1 indicates a negative one."
    },
    {
      "id": 23,
      "questionText": "What is the main reason for pruning in Apriori?",
      "options": [
        "To increase the confidence of rules.",
        "To generate more itemsets.",
        "To remove infrequent itemsets early and reduce computation.",
        "To handle missing data."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Pruning eliminates infrequent itemsets early based on the Apriori property, reducing unnecessary computations and improving efficiency."
    },
    {
      "id": 24,
      "questionText": "What is a candidate itemset?",
      "options": [
        "A transaction containing all items.",
        "A confirmed frequent itemset.",
        "A rule with high confidence.",
        "A potential itemset that may become frequent after support testing."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Candidate itemsets are those that may become frequent, pending support count validation against the minimum support threshold."
    },
    {
      "id": 25,
      "questionText": "In Apriori, what does the step 'Join' refer to?",
      "options": [
        "Merging transactions with similar IDs.",
        "Combining rules with similar confidence.",
        "Joining datasets based on keys.",
        "Combining smaller frequent itemsets to form larger candidate itemsets."
      ],
      "correctAnswerIndex": 3,
      "explanation": "The 'Join' step merges smaller frequent itemsets (k-itemsets) to generate larger candidate itemsets (k+1-itemsets)."
    },
    {
      "id": 26,
      "questionText": "Which of these measures how often an itemset appears in transactions that contain another itemset?",
      "options": [
        "Confidence",
        "Lift",
        "Correlation",
        "Support"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Confidence measures the conditional probability that a transaction containing one itemset also contains another."
    },
    {
      "id": 27,
      "questionText": "What is the stopping condition for Apriori’s iteration process?",
      "options": [
        "When confidence falls below 0.5.",
        "When lift becomes 1.",
        "When all itemsets are tested once.",
        "When no new frequent itemsets can be generated."
      ],
      "correctAnswerIndex": 3,
      "explanation": "The Apriori process stops when no further frequent itemsets can be generated in the next iteration."
    },
    {
      "id": 28,
      "questionText": "What does a high confidence value indicate in a rule X → Y?",
      "options": [
        "Y often appears without X.",
        "X and Y are negatively correlated.",
        "X and Y rarely appear together.",
        "Transactions containing X are likely to also contain Y."
      ],
      "correctAnswerIndex": 3,
      "explanation": "A high confidence value means that if X appears, Y is very likely to appear in the same transaction — indicating a strong association."
    },
    {
      "id": 29,
      "questionText": "If an itemset’s support is below the threshold, what happens?",
      "options": [
        "It increases the confidence value.",
        "It is pruned from further consideration.",
        "It is considered frequent.",
        "It is forced into the rule set."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Itemsets that do not meet the minimum support threshold are pruned from further iterations because they are considered infrequent."
    },
    {
      "id": 30,
      "questionText": "Which of the following combinations of metrics is most commonly used in Apriori?",
      "options": [
        "Mean, Variance, and Standard Deviation",
        "Support, Confidence, and Lift",
        "Entropy, Gini Index, and Information Gain",
        "Precision, Recall, and F1-score"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The Apriori algorithm uses Support, Confidence, and Lift as its core metrics for evaluating and filtering association rules."
    },
    {
      "id": 31,
      "questionText": "In Apriori, what happens during the 'Prune' step?",
      "options": [
        "Transactions are combined together.",
        "Candidate itemsets that contain infrequent subsets are removed.",
        "New candidate itemsets are created.",
        "All itemsets are deleted."
      ],
      "correctAnswerIndex": 1,
      "explanation": "In the 'Prune' step, Apriori removes candidate itemsets that have infrequent subsets, reducing the number of itemsets that need to be tested in the next iteration."
    },
    {
      "id": 32,
      "questionText": "What is the significance of 'k' in Apriori’s k-itemset?",
      "options": [
        "It represents the number of items in each itemset.",
        "It represents the number of candidate generations.",
        "It represents the total number of transactions.",
        "It represents the number of association rules."
      ],
      "correctAnswerIndex": 0,
      "explanation": "The value 'k' denotes the size of an itemset. For example, a 2-itemset contains 2 items, and a 3-itemset contains 3 items."
    },
    {
      "id": 33,
      "questionText": "What is the purpose of generating L1 in Apriori?",
      "options": [
        "To determine the lift values.",
        "To initialize the confidence of rules.",
        "To find all 1-itemsets that are frequent.",
        "To prune infrequent 2-itemsets."
      ],
      "correctAnswerIndex": 2,
      "explanation": "L1 represents all 1-itemsets that meet the minimum support threshold, serving as the starting point for generating larger frequent itemsets."
    },
    {
      "id": 34,
      "questionText": "How does Apriori decide whether to include a candidate itemset in the next level?",
      "options": [
        "By random selection.",
        "By checking if its subsets are frequent and support ≥ minimum threshold.",
        "By comparing lift with previous rules.",
        "By checking if its confidence > 50%."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Apriori includes a candidate itemset in the next iteration only if all its subsets are frequent and its support meets the threshold."
    },
    {
      "id": 35,
      "questionText": "Which of the following is true about 'Support Count'?",
      "options": [
        "It is the number of items in the dataset.",
        "It measures the accuracy of rules.",
        "It is always between 0 and 1.",
        "It is the number of transactions containing an itemset."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Support count refers to the actual count of transactions that contain a particular itemset before converting it into a support ratio."
    },
    {
      "id": 36,
      "questionText": "If the support of {milk, bread} is 0.3, what does it mean?",
      "options": [
        "Bread appears in 30% of transactions.",
        "Milk and bread appear together in 30% of all transactions.",
        "They are independent items.",
        "Milk appears in 30% of transactions."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A support of 0.3 means that milk and bread are bought together in 30% of the total transactions."
    },
    {
      "id": 37,
      "questionText": "If confidence(X→Y) = 0.8, what does it indicate?",
      "options": [
        "Y occurs in 80% of all transactions.",
        "X occurs in 80% of all transactions.",
        "80% of transactions with X also contain Y.",
        "The lift is 0.8."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Confidence of 0.8 indicates that in 80% of the cases where X appears, Y also appears, showing a strong directional relationship."
    },
    {
      "id": 38,
      "questionText": "Which of the following formulas represents 'Confidence' correctly?",
      "options": [
        "Confidence(X→Y) = Support(X) / Support(Y)",
        "Confidence(X→Y) = Support(X ∪ Y) / Support(X)",
        "Confidence(X→Y) = Support(Y) / Support(X ∪ Y)",
        "Confidence(X→Y) = Support(X ∪ Y) × Support(Y)"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Confidence measures the probability that Y occurs in a transaction given that X occurs, calculated as Support(X∪Y) / Support(X)."
    },
    {
      "id": 39,
      "questionText": "Which measure identifies how much more likely items co-occur than if they were independent?",
      "options": [
        "Coverage",
        "Support",
        "Lift",
        "Confidence"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Lift evaluates how much more often X and Y occur together than expected if they were independent. Lift = Support(X∪Y) / (Support(X) × Support(Y))."
    },
    {
      "id": 40,
      "questionText": "What does a lift value greater than 1 imply?",
      "options": [
        "X causes Y to occur.",
        "X and Y are independent.",
        "X and Y never occur together.",
        "X and Y occur together more often than expected."
      ],
      "correctAnswerIndex": 3,
      "explanation": "A lift > 1 implies a positive correlation between X and Y, meaning they occur together more frequently than expected by chance."
    },
    {
      "id": 41,
      "questionText": "What is the relationship between 'support' and 'confidence'?",
      "options": [
        "Support measures accuracy; confidence measures frequency.",
        "Support measures co-occurrence; confidence measures conditional probability.",
        "Support is always greater than confidence.",
        "They are identical metrics."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Support measures how often X and Y appear together overall, while confidence measures how often Y appears given that X appears."
    },
    {
      "id": 42,
      "questionText": "If Support(X) = 0.5, Support(Y) = 0.4, and Support(X∪Y) = 0.3, what is the confidence of X→Y?",
      "options": [
        "1.33",
        "0.6",
        "0.9",
        "0.75"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Confidence = Support(X∪Y) / Support(X) = 0.3 / 0.4 = 0.75, meaning 75% of transactions with X also contain Y."
    },
    {
      "id": 43,
      "questionText": "Which of the following is not a limitation of the Apriori algorithm?",
      "options": [
        "It can generate a large number of candidate itemsets.",
        "It works only with categorical data.",
        "It can efficiently handle large-scale continuous data.",
        "It requires multiple database scans."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Apriori is not efficient for large-scale continuous data; it is primarily designed for discrete, categorical datasets."
    },
    {
      "id": 44,
      "questionText": "How does increasing the minimum support threshold affect Apriori’s results?",
      "options": [
        "More itemsets are found.",
        "Fewer itemsets are found.",
        "Rules become less confident.",
        "Support values increase automatically."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Raising the minimum support threshold filters out less frequent itemsets, reducing the total number of generated itemsets."
    },
    {
      "id": 45,
      "questionText": "Which of the following helps improve Apriori’s performance?",
      "options": [
        "Avoiding pruning.",
        "Increasing dataset size.",
        "Reducing minimum support threshold.",
        "Using the Apriori property for pruning."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Using the Apriori property allows early pruning of infrequent subsets, significantly improving computational efficiency."
    },
    {
      "id": 46,
      "questionText": "Why is candidate generation computationally expensive in Apriori?",
      "options": [
        "Because it has a single database scan.",
        "Because it depends on regression coefficients.",
        "Because it uses unsupervised learning.",
        "Because it must check every possible combination of items."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Candidate generation involves forming and testing all possible item combinations, which grows exponentially with dataset size."
    },
    {
      "id": 47,
      "questionText": "In Apriori, which technique can be used to reduce database scans?",
      "options": [
        "Backpropagation",
        "Linear regression",
        "Hash-based counting",
        "Decision tree pruning"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Hash-based counting maps itemsets into hash buckets to reduce the need for repeated database scans and improve efficiency."
    },
    {
      "id": 48,
      "questionText": "The time complexity of Apriori primarily depends on:",
      "options": [
        "Number of clusters",
        "Number of items and transactions",
        "Learning rate",
        "Number of labels"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The time complexity of Apriori is driven by both the number of unique items (which affects combinations) and total transactions."
    },
    {
      "id": 49,
      "questionText": "What happens if the dataset is very sparse?",
      "options": [
        "Support thresholds increase automatically.",
        "All itemsets become frequent.",
        "Lift becomes 0.",
        "Few frequent itemsets are found."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Sparse datasets have fewer common co-occurrences, resulting in very few itemsets that meet the minimum support threshold."
    },
    {
      "id": 50,
      "questionText": "Which variant of Apriori improves speed by reducing candidate sets?",
      "options": [
        "AprioriFast",
        "AprioriLite",
        "AprioriHybrid",
        "AprioriTid"
      ],
      "correctAnswerIndex": 3,
      "explanation": "AprioriTid keeps track of candidate sets in memory instead of re-reading the entire database, improving speed in later iterations."
    },
    {
      "id": 51,
      "questionText": "What is a 'rule generation' phase in Apriori?",
      "options": [
        "Generating association rules from frequent itemsets.",
        "Counting item frequency.",
        "Creating the initial L1 itemset.",
        "Generating candidate itemsets."
      ],
      "correctAnswerIndex": 0,
      "explanation": "After frequent itemsets are discovered, the rule generation phase derives association rules from them based on confidence thresholds."
    },
    {
      "id": 52,
      "questionText": "In which step does Apriori calculate 'support count' for each candidate?",
      "options": [
        "During database scan phase",
        "During pruning phase",
        "During rule validation phase",
        "During rule generation phase"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Support counts for each candidate itemset are calculated during database scans to determine which itemsets are frequent."
    },
    {
      "id": 53,
      "questionText": "If Support(X∪Y) = 0.15 and Support(X) = 0.3, what is Confidence(X→Y)?",
      "options": [
        "0.15",
        "0.45",
        "0.5",
        "2"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Confidence = Support(X∪Y) / Support(X) = 0.15 / 0.3 = 0.5, meaning 50% of transactions containing X also contain Y."
    },
    {
      "id": 54,
      "questionText": "Apriori performs best when:",
      "options": [
        "Dataset is dense and small.",
        "Minimum support is very low.",
        "Dataset is continuous.",
        "Dataset is sparse and large."
      ],
      "correctAnswerIndex": 0,
      "explanation": "Apriori works best on dense and smaller datasets where frequent itemsets appear often and candidate generation is manageable."
    },
    {
      "id": 55,
      "questionText": "Which metric helps identify rules that are misleading due to common items?",
      "options": [
        "Lift",
        "Leverage",
        "Support",
        "Conviction"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Leverage helps detect misleading rules by comparing the observed co-occurrence of X and Y to what would be expected if they were independent."
    },
    {
      "id": 56,
      "questionText": "What does a confidence of 1.0 mean?",
      "options": [
        "X and Y never appear together.",
        "X occurs twice as often as Y.",
        "Support is zero.",
        "Whenever X occurs, Y always occurs."
      ],
      "correctAnswerIndex": 3,
      "explanation": "A confidence of 1.0 indicates a perfect rule: every transaction containing X also contains Y."
    },
    {
      "id": 57,
      "questionText": "Which of the following can cause redundant rules in Apriori?",
      "options": [
        "Hash-based counting.",
        "Strict pruning.",
        "High lift values.",
        "Low support and confidence thresholds."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Low thresholds lead to many weak and overlapping rules, causing redundancy in the final rule set."
    },
    {
      "id": 58,
      "questionText": "What is 'downward closure property' also known as?",
      "options": [
        "Frequent closure rule",
        "Pruning law",
        "Confidence rule",
        "Apriori property"
      ],
      "correctAnswerIndex": 3,
      "explanation": "The downward closure property, or Apriori property, states that all subsets of a frequent itemset must also be frequent."
    },
    {
      "id": 59,
      "questionText": "What is the formula for Lift(X→Y)?",
      "options": [
        "Lift = Support(X∪Y) / (Support(X) × Support(Y))",
        "Lift = Support(Y) / Support(X∪Y)",
        "Lift = Confidence × Support",
        "Lift = Support(X) / Support(X∪Y)"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Lift compares observed co-occurrence with expected independence, calculated as Support(X∪Y) divided by Support(X) × Support(Y)."
    },
    {
      "id": 60,
      "questionText": "What happens if Lift = 1?",
      "options": [
        "X and Y are positively correlated.",
        "X and Y are independent.",
        "Rule is invalid.",
        "X and Y are negatively correlated."
      ],
      "correctAnswerIndex": 1,
      "explanation": "When Lift = 1, X and Y occur together exactly as often as expected under independence, showing no association."
    },
    {
      "id": 61,
      "questionText": "What type of data transformation is required before using Apriori?",
      "options": [
        "Time-series lag transformation.",
        "Normalization of continuous attributes.",
        "Standardization of numeric values.",
        "Binary encoding of items in transactions."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Apriori typically works on binary-encoded transactional data, where each item is represented as 1 (present) or 0 (absent)."
    },
    {
      "id": 62,
      "questionText": "What is the purpose of minimum confidence threshold?",
      "options": [
        "To filter rules that are not reliable enough.",
        "To reduce lift values.",
        "To filter itemsets that occur too frequently.",
        "To limit the dataset size."
      ],
      "correctAnswerIndex": 0,
      "explanation": "The minimum confidence threshold ensures only rules with sufficient predictive reliability are kept."
    },
    {
      "id": 63,
      "questionText": "What happens if both minimum support and confidence thresholds are set very high?",
      "options": [
        "Algorithm will fail.",
        "Few or no rules will be generated.",
        "Too many redundant rules will be generated.",
        "Lift will always be zero."
      ],
      "correctAnswerIndex": 1,
      "explanation": "High thresholds filter out most itemsets and rules, resulting in very few discovered associations."
    },
    {
      "id": 64,
      "questionText": "Which of the following represents a strong rule?",
      "options": [
        "Low support and high lift",
        "High support and low confidence",
        "Low confidence and high support",
        "High support and high confidence"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Strong rules exhibit both high support (frequent co-occurrence) and high confidence (high reliability)."
    },
    {
      "id": 65,
      "questionText": "What type of relationship does Apriori primarily find?",
      "options": [
        "Functional relationships in regression",
        "Associative relationships between items",
        "Causal relationships between features",
        "Hierarchical relationships between classes"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Apriori focuses on associative relationships — discovering which items tend to appear together, not causal or predictive links."
    },
    {
      "id": 66,
      "questionText": "What happens if the minimum confidence threshold is set too high in the Apriori algorithm?",
      "options": [
        "The number of frequent itemsets will increase drastically.",
        "The algorithm may generate too many redundant rules.",
        "It will have no effect on the rules generated.",
        "Many strong but infrequent rules may be ignored."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Setting a very high confidence threshold can cause the algorithm to miss potentially interesting rules that have moderate confidence but high support, thus reducing overall insight."
    },
    {
      "id": 67,
      "questionText": "Why does the Apriori algorithm use an iterative approach?",
      "options": [
        "To merge multiple datasets together before rule generation.",
        "To reduce the computational cost of support counting.",
        "To progressively build larger frequent itemsets from smaller ones.",
        "To randomly sample the dataset multiple times."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Apriori uses a level-wise iterative approach: it first finds frequent 1-itemsets, then uses them to generate 2-itemsets, and so on, until no more frequent itemsets can be generated."
    },
    {
      "id": 68,
      "questionText": "Which of the following best defines the term 'strong association rule'?",
      "options": [
        "A rule that appears in every transaction.",
        "A rule that satisfies both minimum support and minimum confidence thresholds.",
        "A rule that contains the largest number of items.",
        "A rule that has the highest lift value."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A strong association rule is one that meets both user-defined thresholds for minimum support and minimum confidence, ensuring that it is both frequent and reliable."
    },
    {
      "id": 69,
      "questionText": "In Apriori, why is candidate pruning necessary after generating Ck (candidate itemsets of size k)?",
      "options": [
        "To remove itemsets that contain infrequent subsets.",
        "To increase the support value of remaining itemsets.",
        "To eliminate itemsets with high lift values.",
        "To reduce the number of transactions in the dataset."
      ],
      "correctAnswerIndex": 0,
      "explanation": "After generating candidate itemsets (Ck), Apriori prunes those whose subsets are not frequent. This is based on the Apriori property, which ensures computational efficiency."
    },
    {
      "id": 70,
      "questionText": "If the Apriori algorithm produces too many frequent itemsets, what adjustment should you make?",
      "options": [
        "Add more transactions to the dataset.",
        "Decrease the minimum confidence threshold.",
        "Increase the minimum support threshold.",
        "Use a smaller dataset."
      ],
      "correctAnswerIndex": 2,
      "explanation": "When too many frequent itemsets are found, it indicates that the minimum support threshold is too low. Raising it helps reduce the number of itemsets to a more manageable set."
    },
    {
      "id": 71,
      "questionText": "You are analyzing grocery transactions using Apriori. If 'milk → bread' has high confidence but low lift, what does that imply?",
      "options": [
        "Milk and bread are rarely bought together.",
        "The support value for the rule must be very high.",
        "The rule has both high confidence and high significance.",
        "Milk and bread co-occur frequently, but their association is not stronger than random chance."
      ],
      "correctAnswerIndex": 3,
      "explanation": "High confidence with low lift suggests that while milk and bread often appear together, this occurrence is mostly due to their high individual frequencies, not because they are strongly associated beyond random expectation."
    },
    {
      "id": 72,
      "questionText": "A supermarket uses Apriori and finds the rule {diapers} → {beer} with high confidence. What is the most likely business action?",
      "options": [
        "Increase the price of diapers only.",
        "Place diapers and beer close together to increase joint sales.",
        "Remove beer from the store.",
        "Reduce the number of beer brands."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A high-confidence rule indicates a strong co-purchase tendency. Placing these items closer encourages impulse buying, leveraging the discovered association."
    },
    {
      "id": 73,
      "questionText": "A rule {bread, butter} → {jam} has low support but very high confidence. What does this mean?",
      "options": [
        "The combination occurs rarely but is very reliable when it does.",
        "The rule is meaningless because support must be high.",
        "Bread and butter are independent of jam.",
        "Jam is more popular than bread or butter."
      ],
      "correctAnswerIndex": 0,
      "explanation": "Low support with high confidence indicates that while few transactions contain all items, whenever bread and butter occur together, jam almost always appears too."
    },
    {
      "id": 74,
      "questionText": "A retailer increases the minimum support threshold in Apriori. What will likely happen?",
      "options": [
        "The lift of each rule will increase.",
        "More frequent itemsets will be generated.",
        "The confidence values of rules will increase.",
        "Fewer frequent itemsets will be generated."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Increasing the minimum support threshold filters out itemsets that occur less frequently, thus reducing the number of frequent itemsets generated."
    },
    {
      "id": 75,
      "questionText": "You discover the rule {pasta} → {tomato sauce} with lift = 2. What does this indicate?",
      "options": [
        "The rule has twice the confidence of support.",
        "Pasta and tomato sauce are twice as likely to be bought together than by chance.",
        "The dataset contains exactly twice as many pasta transactions as tomato sauce transactions.",
        "The association between pasta and tomato sauce is weak."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Lift = 2 means the joint occurrence of pasta and tomato sauce is twice what would be expected if they were independent — a strong positive association."
    },
    {
      "id": 76,
      "questionText": "If Apriori finds {milk, bread} → {butter} but not {bread, butter} → {milk}, what does it suggest?",
      "options": [
        "The algorithm failed to meet the minimum support threshold.",
        "The rules are directional; confidence depends on the antecedent and consequent.",
        "The dataset must contain errors.",
        "The items are mutually exclusive."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Association rules are directional; {A → B} may have different confidence than {B → A}. The relationship depends on how often the antecedent implies the consequent."
    },
    {
      "id": 77,
      "questionText": "In a dataset, lift = 1. What does this indicate about the association between items?",
      "options": [
        "The rule is invalid.",
        "The items are independent of each other.",
        "The items occur in every transaction.",
        "The items are perfectly associated."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A lift of 1 means there is no association between items — their co-occurrence is purely due to chance, indicating independence."
    },
    {
      "id": 78,
      "questionText": "A large e-commerce dataset causes Apriori to run extremely slowly. What is the best alternative algorithm?",
      "options": [
        "FP-Growth",
        "Naive Bayes",
        "Decision Trees",
        "K-Means"
      ],
      "correctAnswerIndex": 0,
      "explanation": "FP-Growth is preferred for large datasets as it avoids generating candidate itemsets, using an FP-Tree for compact storage and faster computation."
    },
    {
      "id": 79,
      "questionText": "If support(A) = 0.4, support(B) = 0.5, and support(A ∪ B) = 0.2, what is the confidence of the rule A → B?",
      "options": [
        "0.25",
        "0.2",
        "0.5",
        "0.4"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Confidence(A → B) = support(A ∪ B) / support(A) = 0.2 / 0.4 = 0.5."
    },
    {
      "id": 80,
      "questionText": "A rule has confidence = 0.8 and lift = 1. What does this mean?",
      "options": [
        "The support value must be low.",
        "The rule is reliable but not useful, as the items occur independently.",
        "The rule is both reliable and highly associated.",
        "The dataset is too sparse."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Confidence = 0.8 means 80% reliability, but lift = 1 indicates independence. So while the rule seems strong, it offers no real association insight."
    },
    {
      "id": 81,
      "questionText": "If an itemset passes the support threshold but fails the confidence threshold, what does it imply?",
      "options": [
        "It must have very high lift.",
        "It cannot appear in any association rule.",
        "It appears frequently but does not strongly imply other items.",
        "It should be removed from all transactions."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Frequent itemsets may still fail to form strong rules if the consequent is not reliably present — they are frequent but not predictive."
    },
    {
      "id": 82,
      "questionText": "A rule {tea} → {cookies} has high confidence but very low support. Should it be trusted?",
      "options": [
        "Yes, because confidence is more important than support.",
        "No, because it occurs too rarely to be meaningful.",
        "Yes, because lift will always be high when confidence is high.",
        "No, because tea and cookies cannot be related."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Low support means the rule is based on too few examples to generalize well, even if confidence is high. Support ensures statistical significance."
    },
    {
      "id": 83,
      "questionText": "In Apriori, what happens if two items always occur together in every transaction?",
      "options": [
        "Their lift will be greater than 1.",
        "Their confidence and lift will both be maximum possible.",
        "They will be pruned as infrequent.",
        "Their confidence will be zero."
      ],
      "correctAnswerIndex": 1,
      "explanation": "If two items always co-occur, confidence = 1 and lift = 1 / support(B), meaning maximum confidence and strong association."
    },
    {
      "id": 84,
      "questionText": "A dataset has millions of transactions but few unique items. What is the expected performance of Apriori?",
      "options": [
        "It will perform efficiently due to low item variety.",
        "It will need a higher lift threshold.",
        "It will fail to find any rules.",
        "It will slow down due to many candidate sets."
      ],
      "correctAnswerIndex": 0,
      "explanation": "With fewer unique items, the number of possible combinations is small, allowing Apriori to perform efficiently even with large transaction counts."
    },
    {
      "id": 85,
      "questionText": "What is one common post-processing step after generating association rules using Apriori?",
      "options": [
        "Running regression on frequent itemsets.",
        "Adding random noise to the dataset.",
        "Recomputing support for every rule.",
        "Filtering redundant or weak rules based on lift and confidence."
      ],
      "correctAnswerIndex": 3,
      "explanation": "After generating many rules, redundancy filtering helps remove overlapping or weak associations, retaining only the most informative rules."
    },
    {
      "id": 86,
      "questionText": "A store finds the rule {chips} → {salsa} with support = 0.4, confidence = 0.9, lift = 3. What does this imply?",
      "options": [
        "Salsa is rarely bought without chips.",
        "The items occur independently.",
        "Chips and salsa are strongly associated; the rule is valuable.",
        "The rule is weak because support is too high."
      ],
      "correctAnswerIndex": 2,
      "explanation": "High lift (3) means chips and salsa are three times more likely to be bought together than by chance, showing a strong actionable association."
    },
    {
      "id": 87,
      "questionText": "Which situation would cause Apriori to miss a valid rule?",
      "options": [
        "Having too few transactions.",
        "Setting the support threshold too high.",
        "Setting the confidence threshold too low.",
        "Using continuous variables instead of categorical."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A high support threshold may eliminate meaningful but less frequent patterns, causing the algorithm to miss valid associations."
    },
    {
      "id": 88,
      "questionText": "How can Apriori be adapted for streaming data or large-scale environments?",
      "options": [
        "By increasing the support threshold until no rules remain.",
        "By limiting rule generation to single-item antecedents.",
        "By using incremental or parallelized versions of Apriori.",
        "By converting data into clusters first."
      ],
      "correctAnswerIndex": 2,
      "explanation": "For scalability, Apriori can be implemented incrementally or in parallel (e.g., in MapReduce) to handle continuous or large-volume data streams."
    },
    {
      "id": 89,
      "questionText": "Which of the following indicates a misleading rule in Apriori?",
      "options": [
        "Low confidence but high support.",
        "High support and high lift.",
        "Low support and low confidence.",
        "High confidence but low lift."
      ],
      "correctAnswerIndex": 3,
      "explanation": "A high-confidence, low-lift rule appears strong but shows no actual association beyond random occurrence — it's misleading."
    },
    {
      "id": 90,
      "questionText": "In a retail dataset, {pen} → {paper} has confidence = 0.9 and lift = 2. What does this mean?",
      "options": [
        "Pen and paper are unrelated.",
        "The dataset is too small to draw conclusions.",
        "Customers who buy pens are twice as likely to buy paper as random chance suggests.",
        "The rule has very low reliability."
      ],
      "correctAnswerIndex": 2,
      "explanation": "A lift of 2 indicates a strong positive relationship — pen buyers are twice as likely to also buy paper compared to random probability."
    },
    {
      "id": 91,
      "questionText": "If Apriori is applied to non-transactional data, what preprocessing step is necessary?",
      "options": [
        "Normalizing continuous variables.",
        "Adding missing categorical variables.",
        "Transforming it into a binary transactional format.",
        "Applying k-means clustering."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Apriori requires categorical transactional data. Non-transactional data must first be converted into a binary form where each item is 0/1 per record."
    },
    {
      "id": 92,
      "questionText": "A rule has high support and low confidence. What does this indicate?",
      "options": [
        "The items rarely co-occur.",
        "The lift must be very high.",
        "The rule is strong and reliable.",
        "The items appear frequently but are not strongly dependent on each other."
      ],
      "correctAnswerIndex": 3,
      "explanation": "High support with low confidence means the items appear often individually, but one does not necessarily imply the other."
    },
    {
      "id": 93,
      "questionText": "In Apriori, if we want to generate longer rules, what adjustment helps?",
      "options": [
        "Increasing the confidence threshold.",
        "Lowering the minimum support threshold.",
        "Reducing the dataset size.",
        "Increasing the lift threshold."
      ],
      "correctAnswerIndex": 1,
      "explanation": "Reducing the minimum support threshold allows more itemsets to qualify as frequent, leading to longer potential rules."
    },
    {
      "id": 94,
      "questionText": "Why is Apriori considered a 'bottom-up' approach?",
      "options": [
        "It builds larger frequent itemsets from smaller ones iteratively.",
        "It compares association rules before counting support.",
        "It starts with the largest itemsets and prunes downward.",
        "It analyzes the dataset top-down using trees."
      ],
      "correctAnswerIndex": 0,
      "explanation": "Apriori begins with single-item itemsets and incrementally grows them by combining frequent ones, making it a bottom-up process."
    },
    {
      "id": 95,
      "questionText": "A dataset contains 1 million transactions and 10,000 items. What is the biggest performance bottleneck for Apriori?",
      "options": [
        "Rule visualization.",
        "Confidence calculation.",
        "Candidate generation and support counting.",
        "Lift computation."
      ],
      "correctAnswerIndex": 2,
      "explanation": "The major performance issue lies in generating and counting vast numbers of candidate itemsets, which grows exponentially with item count."
    },
    {
      "id": 96,
      "questionText": "What is the best strategy when Apriori produces too many redundant rules?",
      "options": [
        "Increase transaction size.",
        "Decrease minimum confidence.",
        "Apply rule post-filtering using lift or conviction.",
        "Remove all 2-item rules."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Post-processing filters based on metrics like lift or conviction help retain only unique, meaningful rules, reducing redundancy."
    },
    {
      "id": 97,
      "questionText": "What is the relationship between lift and independence?",
      "options": [
        "Lift > 1 means items are independent.",
        "Lift = 0 means items are independent.",
        "Lift < 1 means items are perfectly correlated.",
        "Lift = 1 means items are independent."
      ],
      "correctAnswerIndex": 3,
      "explanation": "A lift value of exactly 1 indicates statistical independence; values greater than 1 show positive correlation, and less than 1 show negative correlation."
    },
    {
      "id": 98,
      "questionText": "What does a negative correlation between two items imply in association rule mining?",
      "options": [
        "They have high support.",
        "They are mutually dependent.",
        "They always appear together.",
        "They are less likely to occur together than by chance."
      ],
      "correctAnswerIndex": 3,
      "explanation": "Negative correlation means the occurrence of one item reduces the likelihood of the other appearing — a lift value below 1 reflects this."
    },
    {
      "id": 99,
      "questionText": "In a medical dataset, Apriori finds {fever, cough} → {flu} with lift = 3.5. What does this mean?",
      "options": [
        "The rule is statistically insignificant.",
        "The dataset must have errors.",
        "Patients with fever and cough are 3.5 times more likely to have flu compared to random chance.",
        "Fever and cough are independent of flu."
      ],
      "correctAnswerIndex": 2,
      "explanation": "A lift of 3.5 indicates a strong positive association — fever and cough together strongly suggest the presence of flu."
    },
    {
      "id": 100,
      "questionText": "In a real-world deployment, why might Apriori-generated rules fail to perform well over time?",
      "options": [
        "The rules are stored in memory incorrectly.",
        "Apriori uses probabilistic sampling.",
        "Customer behavior and item associations may change.",
        "Support and confidence are permanent metrics."
      ],
      "correctAnswerIndex": 2,
      "explanation": "Association rules can become outdated as trends shift. Continuous retraining or adaptive algorithms are needed to reflect evolving patterns."
    }
  ]
}