File size: 54,521 Bytes
0d00d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 |
{
"title": "Apriori Algorithm Mastery: 100 MCQs",
"description": "A complete collection of 100 multiple-choice questions covering the Apriori algorithm β from basic concepts and definitions to medium-level implementation details and hard scenario-based applications in association rule mining.",
"questions": [
{
"id": 1,
"questionText": "What is the primary purpose of the Apriori algorithm?",
"options": [
"To find linear relationships between continuous variables.",
"To classify data into predefined categories.",
"To reduce the dimensionality of data.",
"To identify frequent itemsets and generate association rules."
],
"correctAnswerIndex": 3,
"explanation": "The Apriori algorithm is used in association rule mining to discover frequent itemsets from transactional datasets and generate rules that describe how items co-occur."
},
{
"id": 2,
"questionText": "The Apriori algorithm is mainly used in which domain?",
"options": [
"Regression Analysis",
"Clustering",
"Market Basket Analysis",
"Image Classification"
],
"correctAnswerIndex": 2,
"explanation": "Apriori is most commonly used in Market Basket Analysis to identify patterns in customer purchase behavior, such as 'If a person buys bread, they are likely to buy butter'."
},
{
"id": 3,
"questionText": "In Apriori, what does 'support' measure?",
"options": [
"The total number of items in a transaction.",
"The probability that a rule is correct.",
"The correlation between two attributes.",
"The frequency of occurrence of an itemset in the dataset."
],
"correctAnswerIndex": 3,
"explanation": "Support measures how frequently an itemset appears in the dataset, helping to identify item combinations that occur often enough to be considered interesting."
},
{
"id": 4,
"questionText": "What is 'confidence' in Apriori rule mining?",
"options": [
"A measure of how frequently items appear together.",
"The probability that a transaction containing X also contains Y.",
"The total number of transactions in the dataset.",
"The likelihood that a rule is incorrect."
],
"correctAnswerIndex": 1,
"explanation": "Confidence measures the reliability of a rule β the percentage of transactions containing X that also contain Y, for a rule X β Y."
},
{
"id": 5,
"questionText": "Which of the following statements best describes the Apriori property?",
"options": [
"Both (2) and (3)",
"All supersets of a frequent itemset must also be frequent.",
"All subsets of a frequent itemset must also be frequent.",
"All supersets of an infrequent itemset must be infrequent."
],
"correctAnswerIndex": 0,
"explanation": "The Apriori property states that all non-empty subsets of a frequent itemset are also frequent, and conversely, any superset of an infrequent itemset must be infrequent."
},
{
"id": 6,
"questionText": "What does the 'minimum support threshold' control in Apriori?",
"options": [
"The minimum frequency required for an itemset to be considered frequent.",
"The accuracy of association rules.",
"The maximum number of items allowed per transaction.",
"The minimum confidence of a rule."
],
"correctAnswerIndex": 0,
"explanation": "The minimum support threshold helps eliminate infrequent itemsets by setting a lower limit on how often an itemset must appear in the dataset to be considered frequent."
},
{
"id": 7,
"questionText": "Which data structure is most commonly used to store itemsets in the Apriori algorithm?",
"options": [
"Trees",
"Stacks",
"Linked lists",
"Hash tables"
],
"correctAnswerIndex": 3,
"explanation": "Hash tables are often used for efficient counting and storage of candidate itemsets during the support counting phase of the Apriori algorithm."
},
{
"id": 8,
"questionText": "Which of the following can cause Apriori to perform slowly on large datasets?",
"options": [
"It generates a large number of candidate itemsets.",
"It uses recursive tree pruning.",
"It requires labeled data.",
"It ignores item frequency thresholds."
],
"correctAnswerIndex": 0,
"explanation": "Apriori can become computationally expensive on large datasets because it must generate and test many candidate itemsets at each iteration."
},
{
"id": 9,
"questionText": "Which of these best describes 'association rule mining'?",
"options": [
"Reducing data dimensions using PCA.",
"Grouping similar data points into clusters.",
"Finding correlations among items in transactional data.",
"Predicting continuous outcomes using regression."
],
"correctAnswerIndex": 2,
"explanation": "Association rule mining uncovers interesting relationships or correlations among items in transactional or relational datasets."
},
{
"id": 10,
"questionText": "Which algorithm improvement focuses on reducing candidate generation compared to Apriori?",
"options": [
"FP-Growth",
"K-Means",
"Naive Bayes",
"Linear Regression"
],
"correctAnswerIndex": 0,
"explanation": "The FP-Growth algorithm improves upon Apriori by eliminating the costly candidate generation process, using a compressed tree structure (FP-Tree) instead."
},
{
"id": 11,
"questionText": "Who introduced the Apriori algorithm?",
"options": [
"Yann LeCun and Yoshua Bengio",
"Andrew Ng and Geoffrey Hinton",
"Rakesh Agrawal and Ramakrishnan Srikant",
"Ian Goodfellow and Richard Sutton"
],
"correctAnswerIndex": 2,
"explanation": "The Apriori algorithm was proposed by Rakesh Agrawal and Ramakrishnan Srikant in 1994 for mining frequent itemsets and association rules from large transactional databases."
},
{
"id": 12,
"questionText": "What kind of dataset is Apriori typically applied to?",
"options": [
"Continuous numerical datasets",
"Transactional datasets",
"Time-series datasets",
"Labeled datasets for classification"
],
"correctAnswerIndex": 1,
"explanation": "Apriori works on transactional datasets, such as market basket data, where each transaction contains a set of items purchased together."
},
{
"id": 13,
"questionText": "Which of the following best represents an 'itemset'?",
"options": [
"A list of all customers",
"A probability distribution",
"A collection of items that occur together",
"A single item in a transaction"
],
"correctAnswerIndex": 2,
"explanation": "An itemset is a collection of one or more items that appear together in a transaction. For example, {bread, butter, milk} is a 3-itemset."
},
{
"id": 14,
"questionText": "In Apriori, what is meant by a 'frequent itemset'?",
"options": [
"An itemset that has the highest lift value",
"An itemset that meets or exceeds the minimum support threshold",
"An itemset that appears only once",
"An itemset that rarely appears in transactions"
],
"correctAnswerIndex": 1,
"explanation": "A frequent itemset is one that occurs frequently enough in the dataset to satisfy the minimum support threshold."
},
{
"id": 15,
"questionText": "What is the output of the Apriori algorithm?",
"options": [
"A list of association rules with their support and confidence values",
"A set of clusters with centroids",
"A regression line equation",
"A confusion matrix"
],
"correctAnswerIndex": 0,
"explanation": "The Apriori algorithm outputs a set of association rules along with their corresponding support, confidence, and lift measures."
},
{
"id": 16,
"questionText": "What is the primary challenge Apriori faces with large datasets?",
"options": [
"High memory and computational cost due to candidate generation",
"Lack of interpretability of rules",
"It requires supervised learning labels",
"Inability to handle continuous variables"
],
"correctAnswerIndex": 0,
"explanation": "Apriori can become inefficient on large datasets because it generates and scans a huge number of candidate itemsets, consuming time and memory."
},
{
"id": 17,
"questionText": "Apriori algorithm uses which approach to find frequent itemsets?",
"options": [
"Bottom-up approach",
"Top-down approach",
"Random sampling",
"Divide and conquer"
],
"correctAnswerIndex": 0,
"explanation": "Apriori follows a bottom-up approach, where frequent subsets are extended one item at a time to form larger itemsets, as long as those subsets remain frequent."
},
{
"id": 18,
"questionText": "What does the 'Apriori' name signify?",
"options": [
"It refers to the priority order of transactions.",
"It comes from the word 'prioritize'.",
"It means it was developed first among other algorithms.",
"It means the algorithm uses prior knowledge of frequent itemset properties."
],
"correctAnswerIndex": 3,
"explanation": "The term 'Apriori' refers to the use of prior knowledge β specifically, the property that all subsets of a frequent itemset must also be frequent."
},
{
"id": 19,
"questionText": "What is the typical input format for Apriori?",
"options": [
"A transactional dataset where each transaction is a list of items",
"A time-series with timestamps",
"A table of numeric values",
"A matrix of continuous features"
],
"correctAnswerIndex": 0,
"explanation": "Apriori expects a dataset where each record represents a transaction containing a list of items purchased or present together."
},
{
"id": 20,
"questionText": "What type of learning does Apriori belong to?",
"options": [
"Semi-supervised learning",
"Unsupervised learning",
"Reinforcement learning",
"Supervised learning"
],
"correctAnswerIndex": 1,
"explanation": "Apriori is an unsupervised learning algorithm because it discovers hidden relationships or associations without labeled output variables."
},
{
"id": 21,
"questionText": "Which metric helps to measure the strength of association rules beyond support and confidence?",
"options": [
"Lift",
"Precision",
"Entropy",
"Recall"
],
"correctAnswerIndex": 0,
"explanation": "Lift measures how much more often X and Y occur together than expected if they were statistically independent, helping to identify strong rules."
},
{
"id": 22,
"questionText": "Which of the following statements about 'lift' is correct?",
"options": [
"Lift = 0 means perfect correlation.",
"Lift = 1 means X and Y are independent.",
"Lift < 1 means X and Y occur more often together.",
"Lift > 1 means X and Y are negatively correlated."
],
"correctAnswerIndex": 1,
"explanation": "A lift of 1 means that the occurrence of X has no effect on Y (they are independent). Lift > 1 suggests a positive association, and Lift < 1 indicates a negative one."
},
{
"id": 23,
"questionText": "What is the main reason for pruning in Apriori?",
"options": [
"To increase the confidence of rules.",
"To generate more itemsets.",
"To remove infrequent itemsets early and reduce computation.",
"To handle missing data."
],
"correctAnswerIndex": 2,
"explanation": "Pruning eliminates infrequent itemsets early based on the Apriori property, reducing unnecessary computations and improving efficiency."
},
{
"id": 24,
"questionText": "What is a candidate itemset?",
"options": [
"A transaction containing all items.",
"A confirmed frequent itemset.",
"A rule with high confidence.",
"A potential itemset that may become frequent after support testing."
],
"correctAnswerIndex": 3,
"explanation": "Candidate itemsets are those that may become frequent, pending support count validation against the minimum support threshold."
},
{
"id": 25,
"questionText": "In Apriori, what does the step 'Join' refer to?",
"options": [
"Merging transactions with similar IDs.",
"Combining rules with similar confidence.",
"Joining datasets based on keys.",
"Combining smaller frequent itemsets to form larger candidate itemsets."
],
"correctAnswerIndex": 3,
"explanation": "The 'Join' step merges smaller frequent itemsets (k-itemsets) to generate larger candidate itemsets (k+1-itemsets)."
},
{
"id": 26,
"questionText": "Which of these measures how often an itemset appears in transactions that contain another itemset?",
"options": [
"Confidence",
"Lift",
"Correlation",
"Support"
],
"correctAnswerIndex": 0,
"explanation": "Confidence measures the conditional probability that a transaction containing one itemset also contains another."
},
{
"id": 27,
"questionText": "What is the stopping condition for Aprioriβs iteration process?",
"options": [
"When confidence falls below 0.5.",
"When lift becomes 1.",
"When all itemsets are tested once.",
"When no new frequent itemsets can be generated."
],
"correctAnswerIndex": 3,
"explanation": "The Apriori process stops when no further frequent itemsets can be generated in the next iteration."
},
{
"id": 28,
"questionText": "What does a high confidence value indicate in a rule X β Y?",
"options": [
"Y often appears without X.",
"X and Y are negatively correlated.",
"X and Y rarely appear together.",
"Transactions containing X are likely to also contain Y."
],
"correctAnswerIndex": 3,
"explanation": "A high confidence value means that if X appears, Y is very likely to appear in the same transaction β indicating a strong association."
},
{
"id": 29,
"questionText": "If an itemsetβs support is below the threshold, what happens?",
"options": [
"It increases the confidence value.",
"It is pruned from further consideration.",
"It is considered frequent.",
"It is forced into the rule set."
],
"correctAnswerIndex": 1,
"explanation": "Itemsets that do not meet the minimum support threshold are pruned from further iterations because they are considered infrequent."
},
{
"id": 30,
"questionText": "Which of the following combinations of metrics is most commonly used in Apriori?",
"options": [
"Mean, Variance, and Standard Deviation",
"Support, Confidence, and Lift",
"Entropy, Gini Index, and Information Gain",
"Precision, Recall, and F1-score"
],
"correctAnswerIndex": 1,
"explanation": "The Apriori algorithm uses Support, Confidence, and Lift as its core metrics for evaluating and filtering association rules."
},
{
"id": 31,
"questionText": "In Apriori, what happens during the 'Prune' step?",
"options": [
"Transactions are combined together.",
"Candidate itemsets that contain infrequent subsets are removed.",
"New candidate itemsets are created.",
"All itemsets are deleted."
],
"correctAnswerIndex": 1,
"explanation": "In the 'Prune' step, Apriori removes candidate itemsets that have infrequent subsets, reducing the number of itemsets that need to be tested in the next iteration."
},
{
"id": 32,
"questionText": "What is the significance of 'k' in Aprioriβs k-itemset?",
"options": [
"It represents the number of items in each itemset.",
"It represents the number of candidate generations.",
"It represents the total number of transactions.",
"It represents the number of association rules."
],
"correctAnswerIndex": 0,
"explanation": "The value 'k' denotes the size of an itemset. For example, a 2-itemset contains 2 items, and a 3-itemset contains 3 items."
},
{
"id": 33,
"questionText": "What is the purpose of generating L1 in Apriori?",
"options": [
"To determine the lift values.",
"To initialize the confidence of rules.",
"To find all 1-itemsets that are frequent.",
"To prune infrequent 2-itemsets."
],
"correctAnswerIndex": 2,
"explanation": "L1 represents all 1-itemsets that meet the minimum support threshold, serving as the starting point for generating larger frequent itemsets."
},
{
"id": 34,
"questionText": "How does Apriori decide whether to include a candidate itemset in the next level?",
"options": [
"By random selection.",
"By checking if its subsets are frequent and support β₯ minimum threshold.",
"By comparing lift with previous rules.",
"By checking if its confidence > 50%."
],
"correctAnswerIndex": 1,
"explanation": "Apriori includes a candidate itemset in the next iteration only if all its subsets are frequent and its support meets the threshold."
},
{
"id": 35,
"questionText": "Which of the following is true about 'Support Count'?",
"options": [
"It is the number of items in the dataset.",
"It measures the accuracy of rules.",
"It is always between 0 and 1.",
"It is the number of transactions containing an itemset."
],
"correctAnswerIndex": 3,
"explanation": "Support count refers to the actual count of transactions that contain a particular itemset before converting it into a support ratio."
},
{
"id": 36,
"questionText": "If the support of {milk, bread} is 0.3, what does it mean?",
"options": [
"Bread appears in 30% of transactions.",
"Milk and bread appear together in 30% of all transactions.",
"They are independent items.",
"Milk appears in 30% of transactions."
],
"correctAnswerIndex": 1,
"explanation": "A support of 0.3 means that milk and bread are bought together in 30% of the total transactions."
},
{
"id": 37,
"questionText": "If confidence(XβY) = 0.8, what does it indicate?",
"options": [
"Y occurs in 80% of all transactions.",
"X occurs in 80% of all transactions.",
"80% of transactions with X also contain Y.",
"The lift is 0.8."
],
"correctAnswerIndex": 2,
"explanation": "Confidence of 0.8 indicates that in 80% of the cases where X appears, Y also appears, showing a strong directional relationship."
},
{
"id": 38,
"questionText": "Which of the following formulas represents 'Confidence' correctly?",
"options": [
"Confidence(XβY) = Support(X) / Support(Y)",
"Confidence(XβY) = Support(X βͺ Y) / Support(X)",
"Confidence(XβY) = Support(Y) / Support(X βͺ Y)",
"Confidence(XβY) = Support(X βͺ Y) Γ Support(Y)"
],
"correctAnswerIndex": 1,
"explanation": "Confidence measures the probability that Y occurs in a transaction given that X occurs, calculated as Support(XβͺY) / Support(X)."
},
{
"id": 39,
"questionText": "Which measure identifies how much more likely items co-occur than if they were independent?",
"options": [
"Coverage",
"Support",
"Lift",
"Confidence"
],
"correctAnswerIndex": 2,
"explanation": "Lift evaluates how much more often X and Y occur together than expected if they were independent. Lift = Support(XβͺY) / (Support(X) Γ Support(Y))."
},
{
"id": 40,
"questionText": "What does a lift value greater than 1 imply?",
"options": [
"X causes Y to occur.",
"X and Y are independent.",
"X and Y never occur together.",
"X and Y occur together more often than expected."
],
"correctAnswerIndex": 3,
"explanation": "A lift > 1 implies a positive correlation between X and Y, meaning they occur together more frequently than expected by chance."
},
{
"id": 41,
"questionText": "What is the relationship between 'support' and 'confidence'?",
"options": [
"Support measures accuracy; confidence measures frequency.",
"Support measures co-occurrence; confidence measures conditional probability.",
"Support is always greater than confidence.",
"They are identical metrics."
],
"correctAnswerIndex": 1,
"explanation": "Support measures how often X and Y appear together overall, while confidence measures how often Y appears given that X appears."
},
{
"id": 42,
"questionText": "If Support(X) = 0.5, Support(Y) = 0.4, and Support(XβͺY) = 0.3, what is the confidence of XβY?",
"options": [
"1.33",
"0.6",
"0.9",
"0.75"
],
"correctAnswerIndex": 3,
"explanation": "Confidence = Support(XβͺY) / Support(X) = 0.3 / 0.4 = 0.75, meaning 75% of transactions with X also contain Y."
},
{
"id": 43,
"questionText": "Which of the following is not a limitation of the Apriori algorithm?",
"options": [
"It can generate a large number of candidate itemsets.",
"It works only with categorical data.",
"It can efficiently handle large-scale continuous data.",
"It requires multiple database scans."
],
"correctAnswerIndex": 2,
"explanation": "Apriori is not efficient for large-scale continuous data; it is primarily designed for discrete, categorical datasets."
},
{
"id": 44,
"questionText": "How does increasing the minimum support threshold affect Aprioriβs results?",
"options": [
"More itemsets are found.",
"Fewer itemsets are found.",
"Rules become less confident.",
"Support values increase automatically."
],
"correctAnswerIndex": 1,
"explanation": "Raising the minimum support threshold filters out less frequent itemsets, reducing the total number of generated itemsets."
},
{
"id": 45,
"questionText": "Which of the following helps improve Aprioriβs performance?",
"options": [
"Avoiding pruning.",
"Increasing dataset size.",
"Reducing minimum support threshold.",
"Using the Apriori property for pruning."
],
"correctAnswerIndex": 3,
"explanation": "Using the Apriori property allows early pruning of infrequent subsets, significantly improving computational efficiency."
},
{
"id": 46,
"questionText": "Why is candidate generation computationally expensive in Apriori?",
"options": [
"Because it has a single database scan.",
"Because it depends on regression coefficients.",
"Because it uses unsupervised learning.",
"Because it must check every possible combination of items."
],
"correctAnswerIndex": 3,
"explanation": "Candidate generation involves forming and testing all possible item combinations, which grows exponentially with dataset size."
},
{
"id": 47,
"questionText": "In Apriori, which technique can be used to reduce database scans?",
"options": [
"Backpropagation",
"Linear regression",
"Hash-based counting",
"Decision tree pruning"
],
"correctAnswerIndex": 2,
"explanation": "Hash-based counting maps itemsets into hash buckets to reduce the need for repeated database scans and improve efficiency."
},
{
"id": 48,
"questionText": "The time complexity of Apriori primarily depends on:",
"options": [
"Number of clusters",
"Number of items and transactions",
"Learning rate",
"Number of labels"
],
"correctAnswerIndex": 1,
"explanation": "The time complexity of Apriori is driven by both the number of unique items (which affects combinations) and total transactions."
},
{
"id": 49,
"questionText": "What happens if the dataset is very sparse?",
"options": [
"Support thresholds increase automatically.",
"All itemsets become frequent.",
"Lift becomes 0.",
"Few frequent itemsets are found."
],
"correctAnswerIndex": 3,
"explanation": "Sparse datasets have fewer common co-occurrences, resulting in very few itemsets that meet the minimum support threshold."
},
{
"id": 50,
"questionText": "Which variant of Apriori improves speed by reducing candidate sets?",
"options": [
"AprioriFast",
"AprioriLite",
"AprioriHybrid",
"AprioriTid"
],
"correctAnswerIndex": 3,
"explanation": "AprioriTid keeps track of candidate sets in memory instead of re-reading the entire database, improving speed in later iterations."
},
{
"id": 51,
"questionText": "What is a 'rule generation' phase in Apriori?",
"options": [
"Generating association rules from frequent itemsets.",
"Counting item frequency.",
"Creating the initial L1 itemset.",
"Generating candidate itemsets."
],
"correctAnswerIndex": 0,
"explanation": "After frequent itemsets are discovered, the rule generation phase derives association rules from them based on confidence thresholds."
},
{
"id": 52,
"questionText": "In which step does Apriori calculate 'support count' for each candidate?",
"options": [
"During database scan phase",
"During pruning phase",
"During rule validation phase",
"During rule generation phase"
],
"correctAnswerIndex": 0,
"explanation": "Support counts for each candidate itemset are calculated during database scans to determine which itemsets are frequent."
},
{
"id": 53,
"questionText": "If Support(XβͺY) = 0.15 and Support(X) = 0.3, what is Confidence(XβY)?",
"options": [
"0.15",
"0.45",
"0.5",
"2"
],
"correctAnswerIndex": 2,
"explanation": "Confidence = Support(XβͺY) / Support(X) = 0.15 / 0.3 = 0.5, meaning 50% of transactions containing X also contain Y."
},
{
"id": 54,
"questionText": "Apriori performs best when:",
"options": [
"Dataset is dense and small.",
"Minimum support is very low.",
"Dataset is continuous.",
"Dataset is sparse and large."
],
"correctAnswerIndex": 0,
"explanation": "Apriori works best on dense and smaller datasets where frequent itemsets appear often and candidate generation is manageable."
},
{
"id": 55,
"questionText": "Which metric helps identify rules that are misleading due to common items?",
"options": [
"Lift",
"Leverage",
"Support",
"Conviction"
],
"correctAnswerIndex": 1,
"explanation": "Leverage helps detect misleading rules by comparing the observed co-occurrence of X and Y to what would be expected if they were independent."
},
{
"id": 56,
"questionText": "What does a confidence of 1.0 mean?",
"options": [
"X and Y never appear together.",
"X occurs twice as often as Y.",
"Support is zero.",
"Whenever X occurs, Y always occurs."
],
"correctAnswerIndex": 3,
"explanation": "A confidence of 1.0 indicates a perfect rule: every transaction containing X also contains Y."
},
{
"id": 57,
"questionText": "Which of the following can cause redundant rules in Apriori?",
"options": [
"Hash-based counting.",
"Strict pruning.",
"High lift values.",
"Low support and confidence thresholds."
],
"correctAnswerIndex": 3,
"explanation": "Low thresholds lead to many weak and overlapping rules, causing redundancy in the final rule set."
},
{
"id": 58,
"questionText": "What is 'downward closure property' also known as?",
"options": [
"Frequent closure rule",
"Pruning law",
"Confidence rule",
"Apriori property"
],
"correctAnswerIndex": 3,
"explanation": "The downward closure property, or Apriori property, states that all subsets of a frequent itemset must also be frequent."
},
{
"id": 59,
"questionText": "What is the formula for Lift(XβY)?",
"options": [
"Lift = Support(XβͺY) / (Support(X) Γ Support(Y))",
"Lift = Support(Y) / Support(XβͺY)",
"Lift = Confidence Γ Support",
"Lift = Support(X) / Support(XβͺY)"
],
"correctAnswerIndex": 0,
"explanation": "Lift compares observed co-occurrence with expected independence, calculated as Support(XβͺY) divided by Support(X) Γ Support(Y)."
},
{
"id": 60,
"questionText": "What happens if Lift = 1?",
"options": [
"X and Y are positively correlated.",
"X and Y are independent.",
"Rule is invalid.",
"X and Y are negatively correlated."
],
"correctAnswerIndex": 1,
"explanation": "When Lift = 1, X and Y occur together exactly as often as expected under independence, showing no association."
},
{
"id": 61,
"questionText": "What type of data transformation is required before using Apriori?",
"options": [
"Time-series lag transformation.",
"Normalization of continuous attributes.",
"Standardization of numeric values.",
"Binary encoding of items in transactions."
],
"correctAnswerIndex": 3,
"explanation": "Apriori typically works on binary-encoded transactional data, where each item is represented as 1 (present) or 0 (absent)."
},
{
"id": 62,
"questionText": "What is the purpose of minimum confidence threshold?",
"options": [
"To filter rules that are not reliable enough.",
"To reduce lift values.",
"To filter itemsets that occur too frequently.",
"To limit the dataset size."
],
"correctAnswerIndex": 0,
"explanation": "The minimum confidence threshold ensures only rules with sufficient predictive reliability are kept."
},
{
"id": 63,
"questionText": "What happens if both minimum support and confidence thresholds are set very high?",
"options": [
"Algorithm will fail.",
"Few or no rules will be generated.",
"Too many redundant rules will be generated.",
"Lift will always be zero."
],
"correctAnswerIndex": 1,
"explanation": "High thresholds filter out most itemsets and rules, resulting in very few discovered associations."
},
{
"id": 64,
"questionText": "Which of the following represents a strong rule?",
"options": [
"Low support and high lift",
"High support and low confidence",
"Low confidence and high support",
"High support and high confidence"
],
"correctAnswerIndex": 3,
"explanation": "Strong rules exhibit both high support (frequent co-occurrence) and high confidence (high reliability)."
},
{
"id": 65,
"questionText": "What type of relationship does Apriori primarily find?",
"options": [
"Functional relationships in regression",
"Associative relationships between items",
"Causal relationships between features",
"Hierarchical relationships between classes"
],
"correctAnswerIndex": 1,
"explanation": "Apriori focuses on associative relationships β discovering which items tend to appear together, not causal or predictive links."
},
{
"id": 66,
"questionText": "What happens if the minimum confidence threshold is set too high in the Apriori algorithm?",
"options": [
"The number of frequent itemsets will increase drastically.",
"The algorithm may generate too many redundant rules.",
"It will have no effect on the rules generated.",
"Many strong but infrequent rules may be ignored."
],
"correctAnswerIndex": 3,
"explanation": "Setting a very high confidence threshold can cause the algorithm to miss potentially interesting rules that have moderate confidence but high support, thus reducing overall insight."
},
{
"id": 67,
"questionText": "Why does the Apriori algorithm use an iterative approach?",
"options": [
"To merge multiple datasets together before rule generation.",
"To reduce the computational cost of support counting.",
"To progressively build larger frequent itemsets from smaller ones.",
"To randomly sample the dataset multiple times."
],
"correctAnswerIndex": 2,
"explanation": "Apriori uses a level-wise iterative approach: it first finds frequent 1-itemsets, then uses them to generate 2-itemsets, and so on, until no more frequent itemsets can be generated."
},
{
"id": 68,
"questionText": "Which of the following best defines the term 'strong association rule'?",
"options": [
"A rule that appears in every transaction.",
"A rule that satisfies both minimum support and minimum confidence thresholds.",
"A rule that contains the largest number of items.",
"A rule that has the highest lift value."
],
"correctAnswerIndex": 1,
"explanation": "A strong association rule is one that meets both user-defined thresholds for minimum support and minimum confidence, ensuring that it is both frequent and reliable."
},
{
"id": 69,
"questionText": "In Apriori, why is candidate pruning necessary after generating Ck (candidate itemsets of size k)?",
"options": [
"To remove itemsets that contain infrequent subsets.",
"To increase the support value of remaining itemsets.",
"To eliminate itemsets with high lift values.",
"To reduce the number of transactions in the dataset."
],
"correctAnswerIndex": 0,
"explanation": "After generating candidate itemsets (Ck), Apriori prunes those whose subsets are not frequent. This is based on the Apriori property, which ensures computational efficiency."
},
{
"id": 70,
"questionText": "If the Apriori algorithm produces too many frequent itemsets, what adjustment should you make?",
"options": [
"Add more transactions to the dataset.",
"Decrease the minimum confidence threshold.",
"Increase the minimum support threshold.",
"Use a smaller dataset."
],
"correctAnswerIndex": 2,
"explanation": "When too many frequent itemsets are found, it indicates that the minimum support threshold is too low. Raising it helps reduce the number of itemsets to a more manageable set."
},
{
"id": 71,
"questionText": "You are analyzing grocery transactions using Apriori. If 'milk β bread' has high confidence but low lift, what does that imply?",
"options": [
"Milk and bread are rarely bought together.",
"The support value for the rule must be very high.",
"The rule has both high confidence and high significance.",
"Milk and bread co-occur frequently, but their association is not stronger than random chance."
],
"correctAnswerIndex": 3,
"explanation": "High confidence with low lift suggests that while milk and bread often appear together, this occurrence is mostly due to their high individual frequencies, not because they are strongly associated beyond random expectation."
},
{
"id": 72,
"questionText": "A supermarket uses Apriori and finds the rule {diapers} β {beer} with high confidence. What is the most likely business action?",
"options": [
"Increase the price of diapers only.",
"Place diapers and beer close together to increase joint sales.",
"Remove beer from the store.",
"Reduce the number of beer brands."
],
"correctAnswerIndex": 1,
"explanation": "A high-confidence rule indicates a strong co-purchase tendency. Placing these items closer encourages impulse buying, leveraging the discovered association."
},
{
"id": 73,
"questionText": "A rule {bread, butter} β {jam} has low support but very high confidence. What does this mean?",
"options": [
"The combination occurs rarely but is very reliable when it does.",
"The rule is meaningless because support must be high.",
"Bread and butter are independent of jam.",
"Jam is more popular than bread or butter."
],
"correctAnswerIndex": 0,
"explanation": "Low support with high confidence indicates that while few transactions contain all items, whenever bread and butter occur together, jam almost always appears too."
},
{
"id": 74,
"questionText": "A retailer increases the minimum support threshold in Apriori. What will likely happen?",
"options": [
"The lift of each rule will increase.",
"More frequent itemsets will be generated.",
"The confidence values of rules will increase.",
"Fewer frequent itemsets will be generated."
],
"correctAnswerIndex": 3,
"explanation": "Increasing the minimum support threshold filters out itemsets that occur less frequently, thus reducing the number of frequent itemsets generated."
},
{
"id": 75,
"questionText": "You discover the rule {pasta} β {tomato sauce} with lift = 2. What does this indicate?",
"options": [
"The rule has twice the confidence of support.",
"Pasta and tomato sauce are twice as likely to be bought together than by chance.",
"The dataset contains exactly twice as many pasta transactions as tomato sauce transactions.",
"The association between pasta and tomato sauce is weak."
],
"correctAnswerIndex": 1,
"explanation": "Lift = 2 means the joint occurrence of pasta and tomato sauce is twice what would be expected if they were independent β a strong positive association."
},
{
"id": 76,
"questionText": "If Apriori finds {milk, bread} β {butter} but not {bread, butter} β {milk}, what does it suggest?",
"options": [
"The algorithm failed to meet the minimum support threshold.",
"The rules are directional; confidence depends on the antecedent and consequent.",
"The dataset must contain errors.",
"The items are mutually exclusive."
],
"correctAnswerIndex": 1,
"explanation": "Association rules are directional; {A β B} may have different confidence than {B β A}. The relationship depends on how often the antecedent implies the consequent."
},
{
"id": 77,
"questionText": "In a dataset, lift = 1. What does this indicate about the association between items?",
"options": [
"The rule is invalid.",
"The items are independent of each other.",
"The items occur in every transaction.",
"The items are perfectly associated."
],
"correctAnswerIndex": 1,
"explanation": "A lift of 1 means there is no association between items β their co-occurrence is purely due to chance, indicating independence."
},
{
"id": 78,
"questionText": "A large e-commerce dataset causes Apriori to run extremely slowly. What is the best alternative algorithm?",
"options": [
"FP-Growth",
"Naive Bayes",
"Decision Trees",
"K-Means"
],
"correctAnswerIndex": 0,
"explanation": "FP-Growth is preferred for large datasets as it avoids generating candidate itemsets, using an FP-Tree for compact storage and faster computation."
},
{
"id": 79,
"questionText": "If support(A) = 0.4, support(B) = 0.5, and support(A βͺ B) = 0.2, what is the confidence of the rule A β B?",
"options": [
"0.25",
"0.2",
"0.5",
"0.4"
],
"correctAnswerIndex": 2,
"explanation": "Confidence(A β B) = support(A βͺ B) / support(A) = 0.2 / 0.4 = 0.5."
},
{
"id": 80,
"questionText": "A rule has confidence = 0.8 and lift = 1. What does this mean?",
"options": [
"The support value must be low.",
"The rule is reliable but not useful, as the items occur independently.",
"The rule is both reliable and highly associated.",
"The dataset is too sparse."
],
"correctAnswerIndex": 1,
"explanation": "Confidence = 0.8 means 80% reliability, but lift = 1 indicates independence. So while the rule seems strong, it offers no real association insight."
},
{
"id": 81,
"questionText": "If an itemset passes the support threshold but fails the confidence threshold, what does it imply?",
"options": [
"It must have very high lift.",
"It cannot appear in any association rule.",
"It appears frequently but does not strongly imply other items.",
"It should be removed from all transactions."
],
"correctAnswerIndex": 2,
"explanation": "Frequent itemsets may still fail to form strong rules if the consequent is not reliably present β they are frequent but not predictive."
},
{
"id": 82,
"questionText": "A rule {tea} β {cookies} has high confidence but very low support. Should it be trusted?",
"options": [
"Yes, because confidence is more important than support.",
"No, because it occurs too rarely to be meaningful.",
"Yes, because lift will always be high when confidence is high.",
"No, because tea and cookies cannot be related."
],
"correctAnswerIndex": 1,
"explanation": "Low support means the rule is based on too few examples to generalize well, even if confidence is high. Support ensures statistical significance."
},
{
"id": 83,
"questionText": "In Apriori, what happens if two items always occur together in every transaction?",
"options": [
"Their lift will be greater than 1.",
"Their confidence and lift will both be maximum possible.",
"They will be pruned as infrequent.",
"Their confidence will be zero."
],
"correctAnswerIndex": 1,
"explanation": "If two items always co-occur, confidence = 1 and lift = 1 / support(B), meaning maximum confidence and strong association."
},
{
"id": 84,
"questionText": "A dataset has millions of transactions but few unique items. What is the expected performance of Apriori?",
"options": [
"It will perform efficiently due to low item variety.",
"It will need a higher lift threshold.",
"It will fail to find any rules.",
"It will slow down due to many candidate sets."
],
"correctAnswerIndex": 0,
"explanation": "With fewer unique items, the number of possible combinations is small, allowing Apriori to perform efficiently even with large transaction counts."
},
{
"id": 85,
"questionText": "What is one common post-processing step after generating association rules using Apriori?",
"options": [
"Running regression on frequent itemsets.",
"Adding random noise to the dataset.",
"Recomputing support for every rule.",
"Filtering redundant or weak rules based on lift and confidence."
],
"correctAnswerIndex": 3,
"explanation": "After generating many rules, redundancy filtering helps remove overlapping or weak associations, retaining only the most informative rules."
},
{
"id": 86,
"questionText": "A store finds the rule {chips} β {salsa} with support = 0.4, confidence = 0.9, lift = 3. What does this imply?",
"options": [
"Salsa is rarely bought without chips.",
"The items occur independently.",
"Chips and salsa are strongly associated; the rule is valuable.",
"The rule is weak because support is too high."
],
"correctAnswerIndex": 2,
"explanation": "High lift (3) means chips and salsa are three times more likely to be bought together than by chance, showing a strong actionable association."
},
{
"id": 87,
"questionText": "Which situation would cause Apriori to miss a valid rule?",
"options": [
"Having too few transactions.",
"Setting the support threshold too high.",
"Setting the confidence threshold too low.",
"Using continuous variables instead of categorical."
],
"correctAnswerIndex": 1,
"explanation": "A high support threshold may eliminate meaningful but less frequent patterns, causing the algorithm to miss valid associations."
},
{
"id": 88,
"questionText": "How can Apriori be adapted for streaming data or large-scale environments?",
"options": [
"By increasing the support threshold until no rules remain.",
"By limiting rule generation to single-item antecedents.",
"By using incremental or parallelized versions of Apriori.",
"By converting data into clusters first."
],
"correctAnswerIndex": 2,
"explanation": "For scalability, Apriori can be implemented incrementally or in parallel (e.g., in MapReduce) to handle continuous or large-volume data streams."
},
{
"id": 89,
"questionText": "Which of the following indicates a misleading rule in Apriori?",
"options": [
"Low confidence but high support.",
"High support and high lift.",
"Low support and low confidence.",
"High confidence but low lift."
],
"correctAnswerIndex": 3,
"explanation": "A high-confidence, low-lift rule appears strong but shows no actual association beyond random occurrence β it's misleading."
},
{
"id": 90,
"questionText": "In a retail dataset, {pen} β {paper} has confidence = 0.9 and lift = 2. What does this mean?",
"options": [
"Pen and paper are unrelated.",
"The dataset is too small to draw conclusions.",
"Customers who buy pens are twice as likely to buy paper as random chance suggests.",
"The rule has very low reliability."
],
"correctAnswerIndex": 2,
"explanation": "A lift of 2 indicates a strong positive relationship β pen buyers are twice as likely to also buy paper compared to random probability."
},
{
"id": 91,
"questionText": "If Apriori is applied to non-transactional data, what preprocessing step is necessary?",
"options": [
"Normalizing continuous variables.",
"Adding missing categorical variables.",
"Transforming it into a binary transactional format.",
"Applying k-means clustering."
],
"correctAnswerIndex": 2,
"explanation": "Apriori requires categorical transactional data. Non-transactional data must first be converted into a binary form where each item is 0/1 per record."
},
{
"id": 92,
"questionText": "A rule has high support and low confidence. What does this indicate?",
"options": [
"The items rarely co-occur.",
"The lift must be very high.",
"The rule is strong and reliable.",
"The items appear frequently but are not strongly dependent on each other."
],
"correctAnswerIndex": 3,
"explanation": "High support with low confidence means the items appear often individually, but one does not necessarily imply the other."
},
{
"id": 93,
"questionText": "In Apriori, if we want to generate longer rules, what adjustment helps?",
"options": [
"Increasing the confidence threshold.",
"Lowering the minimum support threshold.",
"Reducing the dataset size.",
"Increasing the lift threshold."
],
"correctAnswerIndex": 1,
"explanation": "Reducing the minimum support threshold allows more itemsets to qualify as frequent, leading to longer potential rules."
},
{
"id": 94,
"questionText": "Why is Apriori considered a 'bottom-up' approach?",
"options": [
"It builds larger frequent itemsets from smaller ones iteratively.",
"It compares association rules before counting support.",
"It starts with the largest itemsets and prunes downward.",
"It analyzes the dataset top-down using trees."
],
"correctAnswerIndex": 0,
"explanation": "Apriori begins with single-item itemsets and incrementally grows them by combining frequent ones, making it a bottom-up process."
},
{
"id": 95,
"questionText": "A dataset contains 1 million transactions and 10,000 items. What is the biggest performance bottleneck for Apriori?",
"options": [
"Rule visualization.",
"Confidence calculation.",
"Candidate generation and support counting.",
"Lift computation."
],
"correctAnswerIndex": 2,
"explanation": "The major performance issue lies in generating and counting vast numbers of candidate itemsets, which grows exponentially with item count."
},
{
"id": 96,
"questionText": "What is the best strategy when Apriori produces too many redundant rules?",
"options": [
"Increase transaction size.",
"Decrease minimum confidence.",
"Apply rule post-filtering using lift or conviction.",
"Remove all 2-item rules."
],
"correctAnswerIndex": 2,
"explanation": "Post-processing filters based on metrics like lift or conviction help retain only unique, meaningful rules, reducing redundancy."
},
{
"id": 97,
"questionText": "What is the relationship between lift and independence?",
"options": [
"Lift > 1 means items are independent.",
"Lift = 0 means items are independent.",
"Lift < 1 means items are perfectly correlated.",
"Lift = 1 means items are independent."
],
"correctAnswerIndex": 3,
"explanation": "A lift value of exactly 1 indicates statistical independence; values greater than 1 show positive correlation, and less than 1 show negative correlation."
},
{
"id": 98,
"questionText": "What does a negative correlation between two items imply in association rule mining?",
"options": [
"They have high support.",
"They are mutually dependent.",
"They always appear together.",
"They are less likely to occur together than by chance."
],
"correctAnswerIndex": 3,
"explanation": "Negative correlation means the occurrence of one item reduces the likelihood of the other appearing β a lift value below 1 reflects this."
},
{
"id": 99,
"questionText": "In a medical dataset, Apriori finds {fever, cough} β {flu} with lift = 3.5. What does this mean?",
"options": [
"The rule is statistically insignificant.",
"The dataset must have errors.",
"Patients with fever and cough are 3.5 times more likely to have flu compared to random chance.",
"Fever and cough are independent of flu."
],
"correctAnswerIndex": 2,
"explanation": "A lift of 3.5 indicates a strong positive association β fever and cough together strongly suggest the presence of flu."
},
{
"id": 100,
"questionText": "In a real-world deployment, why might Apriori-generated rules fail to perform well over time?",
"options": [
"The rules are stored in memory incorrectly.",
"Apriori uses probabilistic sampling.",
"Customer behavior and item associations may change.",
"Support and confidence are permanent metrics."
],
"correctAnswerIndex": 2,
"explanation": "Association rules can become outdated as trends shift. Continuous retraining or adaptive algorithms are needed to reflect evolving patterns."
}
]
}
|