{ "title": "Hierarchical Clustering Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Hierarchical Clustering, covering fundamental concepts, linkage methods, dendrogram interpretation, and practical applications.", "questions": [ { "id": 1, "questionText": "What is the primary goal of hierarchical clustering?", "options": [ "Reduce dimensionality", "Predict a dependent variable", "Build a hierarchy of clusters without specifying the number of clusters upfront", "Partition data into a fixed number of clusters" ], "correctAnswerIndex": 2, "explanation": "Hierarchical clustering creates a tree-like structure (dendrogram) representing nested clusters without requiring a pre-specified number of clusters." }, { "id": 2, "questionText": "Which of the following is NOT a linkage criterion in hierarchical clustering?", "options": [ "Single linkage", "Random linkage", "Complete linkage", "Average linkage" ], "correctAnswerIndex": 1, "explanation": "Single, complete, and average linkage are common methods. Random linkage is not used in hierarchical clustering." }, { "id": 3, "questionText": "In agglomerative hierarchical clustering, what happens at each step?", "options": [ "Clusters are removed", "Two closest clusters are merged", "Clusters are split randomly", "All clusters merge at once" ], "correctAnswerIndex": 1, "explanation": "Agglomerative clustering starts with individual points and iteratively merges the closest clusters until a single cluster remains." }, { "id": 4, "questionText": "In divisive hierarchical clustering, the process begins with:", "options": [ "Each point as its own cluster", "A single cluster containing all data points", "Clusters with equal size", "Random clusters" ], "correctAnswerIndex": 1, "explanation": "Divisive clustering starts with all points in one cluster and recursively splits clusters into smaller ones." }, { "id": 5, "questionText": "What does a dendrogram represent?", "options": [ "A tree showing the nested arrangement of clusters", "A bar chart of cluster sizes", "A line chart of errors", "A scatter plot of points" ], "correctAnswerIndex": 0, "explanation": "A dendrogram visualizes the hierarchical relationships among clusters, showing which clusters merge at each step." }, { "id": 6, "questionText": "Scenario: You want to cluster customers by purchase behavior using hierarchical clustering. Which method is suitable for identifying outliers?", "options": [ "Average linkage", "Complete linkage", "Centroid linkage", "Single linkage" ], "correctAnswerIndex": 3, "explanation": "Single linkage is sensitive to outliers and can help identify them as isolated points." }, { "id": 7, "questionText": "Scenario: You notice chaining effect in hierarchical clustering. What is likely used?", "options": [ "Ward’s method", "Complete linkage", "Average linkage", "Single linkage" ], "correctAnswerIndex": 3, "explanation": "Single linkage can produce a chaining effect where clusters merge sequentially through close points, forming elongated clusters." }, { "id": 8, "questionText": "Scenario: Using complete linkage. Effect?", "options": [ "Clusters elongated", "Clusters are compact and spherical", "Noise detected automatically", "All points merged at first step" ], "correctAnswerIndex": 1, "explanation": "Complete linkage merges clusters based on the maximum distance between points, favoring compact and well-separated clusters." }, { "id": 9, "questionText": "Scenario: Using average linkage. Benefit?", "options": [ "Balances sensitivity to outliers and cluster compactness", "Merges clusters randomly", "Maximizes cluster diameter", "Ignores distance" ], "correctAnswerIndex": 0, "explanation": "Average linkage merges clusters based on the average pairwise distance, balancing chaining and compactness." }, { "id": 10, "questionText": "Scenario: Ward’s method is preferred when:", "options": [ "Handling categorical data only", "Minimizing total within-cluster variance", "Maximizing cluster separation arbitrarily", "Creating elongated clusters" ], "correctAnswerIndex": 1, "explanation": "Ward’s method merges clusters to minimize the increase in total within-cluster variance, resulting in compact clusters." }, { "id": 11, "questionText": "Scenario: You have 2D points forming elongated clusters. Which linkage is prone to chaining?", "options": [ "Average linkage", "Complete linkage", "Ward’s method", "Single linkage" ], "correctAnswerIndex": 3, "explanation": "Single linkage tends to form chains by linking points sequentially, which can elongate clusters." }, { "id": 12, "questionText": "Scenario: You want compact clusters. Which linkage is better?", "options": [ "Complete linkage", "Single linkage", "Average linkage", "Centroid linkage" ], "correctAnswerIndex": 0, "explanation": "Complete linkage merges clusters based on the maximum distance, producing more compact clusters." }, { "id": 13, "questionText": "Scenario: Large dataset, hierarchical clustering is slow. Solution?", "options": [ "Use a sample or approximate methods", "Increase minPts", "Ignore some clusters", "Use K-Means instead" ], "correctAnswerIndex": 0, "explanation": "Hierarchical clustering has O(n²) complexity; sampling or approximate linkage speeds up computation." }, { "id": 14, "questionText": "Scenario: Dendrogram shows long vertical lines. Interpretation?", "options": [ "All clusters merge early", "Clusters merge at higher distances; well-separated clusters", "Clusters are compact", "Noise points" ], "correctAnswerIndex": 1, "explanation": "Long vertical lines in a dendrogram indicate that clusters are merged at large distances, showing separation." }, { "id": 15, "questionText": "Scenario: Hierarchical clustering on text embeddings. Challenge?", "options": [ "Clusters are always spherical", "High-dimensional distances may be less meaningful", "Noise ignored", "All points clustered perfectly" ], "correctAnswerIndex": 1, "explanation": "High-dimensional embeddings can make distance measures less effective, affecting cluster quality." }, { "id": 16, "questionText": "Scenario: You cut dendrogram at a certain height. Effect?", "options": [ "Removes noise automatically", "All clusters merged", "Defines number of clusters at that level", "Clustering fails" ], "correctAnswerIndex": 2, "explanation": "Cutting the dendrogram horizontally defines clusters by merging points below the cut line." }, { "id": 17, "questionText": "Scenario: Divisive hierarchical clustering applied to gene data. Advantage?", "options": [ "Noise merged", "Merges clusters sequentially", "All clusters detected at once", "Captures large clusters and splits iteratively" ], "correctAnswerIndex": 3, "explanation": "Divisive clustering starts with all points and splits them, which can reveal large patterns in genomic data." }, { "id": 18, "questionText": "Scenario: Agglomerative clustering on customer segmentation. Limitation?", "options": [ "Requires number of clusters", "Cannot handle numeric data", "Computationally expensive for large datasets", "All clusters spherical" ], "correctAnswerIndex": 2, "explanation": "Agglomerative clustering has O(n²) or O(n³) complexity, making it slow for large datasets." }, { "id": 19, "questionText": "Scenario: Using Manhattan distance instead of Euclidean. Effect?", "options": [ "All clusters identical", "Noise increases", "Clustering fails", "Cluster shapes may change; sensitivity to axis-aligned differences" ], "correctAnswerIndex": 3, "explanation": "Distance metric affects cluster formation. Manhattan distance may produce more grid-aligned clusters." }, { "id": 20, "questionText": "Scenario: Clustering image segments using Ward’s method. Advantage?", "options": [ "Minimizes within-cluster variance for compact regions", "Clusters must be circular", "Noise detected automatically", "Maximizes between-cluster variance arbitrarily" ], "correctAnswerIndex": 0, "explanation": "Ward’s method merges clusters to minimize variance, producing compact and meaningful image segments." }, { "id": 21, "questionText": "Scenario: Hierarchical clustering produces dendrogram with many short merges. Interpretation?", "options": [ "Clusters are far apart", "Algorithm fails", "Clusters are close; low distances between merges", "Noise points" ], "correctAnswerIndex": 2, "explanation": "Short vertical lines indicate clusters that merge at low distances, suggesting similarity." }, { "id": 22, "questionText": "Scenario: Combining hierarchical and K-Means. Approach?", "options": [ "Use dendrogram to determine K, then apply K-Means", "Merge clusters randomly", "K-Means replaces hierarchical clustering", "Ignore hierarchical output" ], "correctAnswerIndex": 0, "explanation": "Hierarchical clustering can guide selection of K for K-Means to improve results." }, { "id": 23, "questionText": "Scenario: Hierarchical clustering for market segmentation. Limitation?", "options": [ "Noise merged automatically", "Clusters must be pre-defined", "Not suitable for very large customer databases", "Cannot handle numeric features" ], "correctAnswerIndex": 2, "explanation": "Due to computational complexity, hierarchical clustering struggles with very large datasets." }, { "id": 24, "questionText": "Scenario: Single linkage produces chaining effect. Solution?", "options": [ "Increase data dimensionality", "Use complete or average linkage", "Ignore chaining", "Decrease number of clusters" ], "correctAnswerIndex": 1, "explanation": "Complete or average linkage reduces chaining by considering maximum or average distances between clusters." }, { "id": 25, "questionText": "Scenario: Hierarchical clustering applied to time-series patterns. Challenge?", "options": [ "Need to extract meaningful features or distance measures first", "Algorithm works on raw time stamps", "All sequences assigned automatically", "Noise ignored" ], "correctAnswerIndex": 0, "explanation": "Time-series often require preprocessing or feature extraction for meaningful clustering." }, { "id": 26, "questionText": "Scenario: Agglomerative clustering using Euclidean distance on GPS coordinates. Limitation?", "options": [ "Clusters always perfect", "Does not account for Earth's curvature; may need haversine distance", "All clusters spherical", "Noise automatically detected" ], "correctAnswerIndex": 1, "explanation": "Euclidean distance may misrepresent actual distances on the globe; haversine or geodesic distance is better." }, { "id": 27, "questionText": "Scenario: Using dendrogram to detect outliers. Approach?", "options": [ "Noise ignored", "Leaves merging at low distances", "All clusters equal size", "Identify leaves that merge at high distances" ], "correctAnswerIndex": 3, "explanation": "Points that merge last at high distances may be considered outliers." }, { "id": 28, "questionText": "Scenario: Hierarchical clustering on document embeddings. Benefit?", "options": [ "Requires fixed number of clusters", "Noise automatically ignored", "All points assigned to single cluster", "Detects thematic clusters and subtopics without predefined K" ], "correctAnswerIndex": 3, "explanation": "Hierarchical clustering captures nested structure and subtopics in document embeddings." }, { "id": 29, "questionText": "Scenario: Divisive clustering is rarely used in practice. Why?", "options": [ "Computationally expensive", "Cannot handle categorical data", "Fails on numeric data", "Noise ignored" ], "correctAnswerIndex": 0, "explanation": "Divisive clustering requires evaluating all splits at each step, making it slower than agglomerative methods." }, { "id": 30, "questionText": "Scenario: Combining hierarchical clustering with DBSCAN. Use case?", "options": [ "Merge clusters randomly", "Noise ignored", "Use DBSCAN to detect dense regions and hierarchical clustering to refine structure", "Clusters must be spherical" ], "correctAnswerIndex": 2, "explanation": "Combining methods can improve clustering by capturing dense clusters with DBSCAN and hierarchical structure." }, { "id": 31, "questionText": "Scenario: Agglomerative clustering produces two clusters that merge only at a very high distance. Interpretation?", "options": [ "Noise points", "Algorithm failed", "Clusters are similar and close", "Clusters are well-separated and distinct" ], "correctAnswerIndex": 3, "explanation": "High merging distance in a dendrogram indicates clusters are distinct and separated." }, { "id": 32, "questionText": "Scenario: Using Ward’s method with Euclidean distance. Advantage?", "options": [ "Noise automatically removed", "Produces compact, spherical clusters minimizing variance", "Clusters must be elongated", "All clusters merge randomly" ], "correctAnswerIndex": 1, "explanation": "Ward’s method minimizes total within-cluster variance, yielding compact clusters." }, { "id": 33, "questionText": "Scenario: Hierarchical clustering on customer purchase history with categorical data. Approach?", "options": [ "Use Euclidean distance directly", "Ignore categorical features", "Random linkage only", "Use suitable distance metric like Gower distance" ], "correctAnswerIndex": 3, "explanation": "Categorical data requires a proper distance measure, such as Gower distance, for meaningful clustering." }, { "id": 34, "questionText": "Scenario: Large dataset with millions of points. Hierarchical clustering limitation?", "options": [ "Computational complexity becomes prohibitive", "Produces perfect clusters", "Clusters always spherical", "Noise automatically detected" ], "correctAnswerIndex": 0, "explanation": "Hierarchical clustering scales poorly (O(n²) or worse), making it unsuitable for very large datasets." }, { "id": 35, "questionText": "Scenario: Clustering time-series data with hierarchical clustering. Preprocessing needed?", "options": [ "Extract features or use distance measures like DTW", "Randomly sample points", "Use only single linkage", "No preprocessing required" ], "correctAnswerIndex": 0, "explanation": "Time-series data often requires feature extraction or distance measures like Dynamic Time Warping for meaningful clustering." }, { "id": 36, "questionText": "Scenario: Dendrogram with multiple small clusters merging at low distances. Interpretation?", "options": [ "Noise points", "Data has widely separated clusters", "Data has many similar, tight clusters", "Algorithm failed" ], "correctAnswerIndex": 2, "explanation": "Clusters merging at low distances indicate closely packed points forming tight clusters." }, { "id": 37, "questionText": "Scenario: Using hierarchical clustering for image segmentation. Advantage?", "options": [ "Captures nested structure of segments without predefining number", "Noise ignored automatically", "Clusters must be circular", "Requires fixed number of segments" ], "correctAnswerIndex": 0, "explanation": "Hierarchical clustering can reveal nested structures in images, useful for segmentation." }, { "id": 38, "questionText": "Scenario: High-dimensional feature space for hierarchical clustering. Challenge?", "options": [ "Noise ignored", "All clusters compact", "Distances may become less meaningful; consider dimensionality reduction", "All clusters detected perfectly" ], "correctAnswerIndex": 2, "explanation": "In high dimensions, distances lose meaning (curse of dimensionality), affecting cluster quality." }, { "id": 39, "questionText": "Scenario: Dendrogram shows one cluster merging far above others. Interpretation?", "options": [ "Cluster is highly dense", "Noise ignored", "Cluster merged early", "Cluster is an outlier relative to others" ], "correctAnswerIndex": 3, "explanation": "A cluster merging at high distance is far from other clusters, possibly an outlier." }, { "id": 40, "questionText": "Scenario: Hierarchical clustering with average linkage. Advantage?", "options": [ "Maximizes cluster diameter arbitrarily", "Clusters merge randomly", "Noise removed automatically", "Balances sensitivity to chaining and compactness" ], "correctAnswerIndex": 3, "explanation": "Average linkage considers average distances between clusters, balancing chaining effect and cluster compactness." }, { "id": 41, "questionText": "Scenario: Divisive hierarchical clustering rarely used. Reason?", "options": [ "Cannot handle categorical features", "Computationally expensive due to evaluating all splits", "Fails on numeric data", "Noise ignored automatically" ], "correctAnswerIndex": 1, "explanation": "Divisive clustering evaluates multiple splits at each level, making it slower than agglomerative clustering." }, { "id": 42, "questionText": "Scenario: Hierarchical clustering on streaming data. Limitation?", "options": [ "Automatically updates clusters", "Clusters merge randomly", "Not incremental; requires recomputation", "Noise detected automatically" ], "correctAnswerIndex": 2, "explanation": "Standard hierarchical clustering is static and does not support incremental updates for streaming data." }, { "id": 43, "questionText": "Scenario: Hierarchical clustering using Manhattan distance. Effect?", "options": [ "Produces axis-aligned clusters; shapes may differ", "Noise detected automatically", "No effect on clusters", "Algorithm fails" ], "correctAnswerIndex": 0, "explanation": "Distance metric affects clustering; Manhattan distance may create axis-aligned clusters." }, { "id": 44, "questionText": "Scenario: Hierarchical clustering combined with K-Means. Use case?", "options": [ "Use dendrogram to select optimal K for K-Means", "Merge clusters randomly", "Clusters must be circular", "Noise ignored" ], "correctAnswerIndex": 0, "explanation": "Hierarchical clustering can guide K selection for K-Means for improved clustering quality." }, { "id": 45, "questionText": "Scenario: Choosing number of clusters from dendrogram. Approach?", "options": [ "Count leaves", "Cut dendrogram at a height where clusters merge", "Use arbitrary number", "Select maximum distance" ], "correctAnswerIndex": 1, "explanation": "Cutting dendrogram horizontally defines clusters at a chosen distance level." }, { "id": 46, "questionText": "Scenario: Hierarchical clustering with mixed numeric and categorical data. Solution?", "options": [ "Use Gower distance for similarity computation", "Use Euclidean distance only", "Ignore categorical features", "Merge randomly" ], "correctAnswerIndex": 0, "explanation": "Gower distance handles mixed data types for hierarchical clustering." }, { "id": 47, "questionText": "Scenario: Clustering irregularly shaped spatial regions. Which linkage works best?", "options": [ "Complete linkage", "Average or single linkage", "Centroid linkage", "Ward’s method" ], "correctAnswerIndex": 1, "explanation": "Average or single linkage can capture irregular shapes, whereas complete and Ward’s method favor compact clusters." }, { "id": 48, "questionText": "Scenario: Large dendrogram with many clusters. Visualization solution?", "options": [ "Plot full dendrogram always", "Use K-Means instead", "Use truncated dendrogram or heatmap visualization", "Ignore dendrogram" ], "correctAnswerIndex": 2, "explanation": "Truncated dendrograms or heatmaps simplify visualization for large datasets." }, { "id": 49, "questionText": "Scenario: Hierarchical clustering on social network users. Benefit?", "options": [ "All points merged initially", "Reveals nested community structure without predefining cluster count", "Requires fixed number of clusters", "Noise ignored automatically" ], "correctAnswerIndex": 1, "explanation": "Hierarchical clustering uncovers nested structures like communities in social networks." }, { "id": 50, "questionText": "Scenario: Hierarchical clustering applied to product features. Challenge?", "options": [ "All features spherical", "Noise removed automatically", "Clusters detected perfectly", "High-dimensionality can affect distance metrics; consider PCA" ], "correctAnswerIndex": 3, "explanation": "Dimensionality reduction helps improve distance calculations and clustering quality." }, { "id": 51, "questionText": "Scenario: Hierarchical clustering produces elongated clusters using single linkage. Effect?", "options": [ "Chaining effect occurs; clusters may be less compact", "Noise automatically removed", "Clusters merge randomly", "Clusters remain spherical" ], "correctAnswerIndex": 0, "explanation": "Single linkage merges based on the closest points, which can create elongated chains of points." }, { "id": 52, "questionText": "Scenario: Using complete linkage on 2D spatial data. Effect?", "options": [ "Clusters merge randomly", "Produces compact clusters; less sensitive to outliers", "Chains clusters together", "Automatically detects noise" ], "correctAnswerIndex": 1, "explanation": "Complete linkage considers the maximum distance between clusters, yielding compact, tight clusters." }, { "id": 53, "questionText": "Scenario: Using average linkage for text document clustering. Advantage?", "options": [ "Produces elongated clusters", "Noise ignored automatically", "Balances sensitivity to outliers and cluster compactness", "Clusters must be pre-defined" ], "correctAnswerIndex": 2, "explanation": "Average linkage merges clusters based on average pairwise distance, balancing chaining and compactness." }, { "id": 54, "questionText": "Scenario: Hierarchical clustering for image segmentation. Best linkage for compact regions?", "options": [ "Centroid linkage", "Ward’s method", "Average linkage", "Single linkage" ], "correctAnswerIndex": 1, "explanation": "Ward’s method minimizes within-cluster variance, producing compact and meaningful segments." }, { "id": 55, "questionText": "Scenario: Hierarchical clustering with high-dimensional embeddings. Challenge?", "options": [ "Distances lose meaning; consider dimensionality reduction", "All clusters compact", "Noise ignored", "Clusters always detected perfectly" ], "correctAnswerIndex": 0, "explanation": "High-dimensional data can reduce the effectiveness of distance measures; PCA or t-SNE helps." }, { "id": 56, "questionText": "Scenario: Dendrogram shows one leaf merging at a very high distance. Interpretation?", "options": [ "Point is noise", "Point is similar to others", "Algorithm failed", "Point is an outlier" ], "correctAnswerIndex": 3, "explanation": "A leaf merging at high distance indicates it is far from other clusters, likely an outlier." }, { "id": 57, "questionText": "Scenario: Agglomerative clustering on social network data. Benefit?", "options": [ "Noise ignored automatically", "All points merged initially", "Detects nested communities without predefined K", "Requires fixed cluster count" ], "correctAnswerIndex": 2, "explanation": "Hierarchical clustering uncovers nested community structures in networks." }, { "id": 58, "questionText": "Scenario: Divisive clustering rarely used. Reason?", "options": [ "Fails on numeric data", "Computationally expensive due to evaluating all splits", "Noise ignored automatically", "Cannot handle categorical data" ], "correctAnswerIndex": 1, "explanation": "Divisive clustering evaluates multiple splits at each level, making it slower than agglomerative clustering." }, { "id": 59, "questionText": "Scenario: Hierarchical clustering on streaming data. Limitation?", "options": [ "Noise detected automatically", "Clusters merge randomly", "Not incremental; requires recomputation", "Automatically updates clusters" ], "correctAnswerIndex": 2, "explanation": "Standard hierarchical clustering is static and does not support incremental updates for streaming data." }, { "id": 60, "questionText": "Scenario: Combining hierarchical clustering with K-Means. Purpose?", "options": [ "Clusters must be spherical", "Merge clusters randomly", "Noise ignored", "Use dendrogram to select optimal K for K-Means" ], "correctAnswerIndex": 3, "explanation": "Hierarchical clustering can guide K selection for K-Means for improved clustering quality." }, { "id": 61, "questionText": "Scenario: Hierarchical clustering using Manhattan distance. Effect?", "options": [ "Noise detected automatically", "Algorithm fails", "Produces axis-aligned clusters; shapes may differ", "No effect on clusters" ], "correctAnswerIndex": 2, "explanation": "Distance metric affects clustering; Manhattan distance may create axis-aligned clusters." }, { "id": 62, "questionText": "Scenario: Hierarchical clustering on gene expression data. Advantage?", "options": [ "Noise ignored automatically", "Clusters merge randomly", "Requires predefined K", "Captures nested patterns in expression without specifying K" ], "correctAnswerIndex": 3, "explanation": "Hierarchical clustering can reveal hierarchical gene expression patterns in biological data." }, { "id": 63, "questionText": "Scenario: High-dimensional hierarchical clustering. Best practice?", "options": [ "Increase number of clusters randomly", "Reduce dimensions first (PCA, t-SNE) to improve distance metrics", "Ignore distance metrics", "Use raw high-dimensional distances" ], "correctAnswerIndex": 1, "explanation": "Reducing dimensionality improves distance computations and clustering quality in high-dimensional spaces." }, { "id": 64, "questionText": "Scenario: Hierarchical clustering dendrogram is too large. Visualization solution?", "options": [ "Use truncated dendrogram or heatmap", "Plot full dendrogram", "Use K-Means instead", "Ignore dendrogram" ], "correctAnswerIndex": 0, "explanation": "Truncated dendrograms or heatmaps help visualize large hierarchical structures." }, { "id": 65, "questionText": "Scenario: Clustering customer purchase patterns. Hierarchical advantage?", "options": [ "All points merged initially", "Noise ignored automatically", "Requires fixed cluster count", "Reveals subgroups without predefining number of clusters" ], "correctAnswerIndex": 3, "explanation": "Hierarchical clustering captures nested customer behavior subgroups without predefining K." }, { "id": 66, "questionText": "Scenario: Hierarchical clustering shows many small merges at low distances. Interpretation?", "options": [ "Data has many tight, similar clusters", "Noise detected automatically", "Algorithm failed", "Clusters are far apart" ], "correctAnswerIndex": 0, "explanation": "Short merges indicate many tightly grouped small clusters." }, { "id": 67, "questionText": "Scenario: Using dendrogram to detect outliers. Approach?", "options": [ "Identify leaves merging at very high distances", "Ignore dendrogram", "Count total clusters", "Leaves merging at low distances" ], "correctAnswerIndex": 0, "explanation": "Points merging at high distances may be considered outliers in hierarchical clustering." }, { "id": 68, "questionText": "Scenario: Combining hierarchical clustering with DBSCAN. Use case?", "options": [ "Clusters must be spherical", "Noise ignored", "Use DBSCAN to detect dense regions and hierarchical clustering to refine structure", "Merge clusters randomly" ], "correctAnswerIndex": 2, "explanation": "Combining methods improves clustering by leveraging DBSCAN's density-based grouping and hierarchical structure." }, { "id": 69, "questionText": "Scenario: Divisive clustering applied to social network communities. Limitation?", "options": [ "Clusters must be spherical", "All clusters detected automatically", "Computationally expensive for large networks", "Noise ignored" ], "correctAnswerIndex": 2, "explanation": "Divisive clustering evaluates all splits recursively, making it costly for large networks." }, { "id": 70, "questionText": "Scenario: Hierarchical clustering for anomaly detection. Approach?", "options": [ "Clusters merge randomly", "Points merging at high distances may indicate anomalies", "All points clustered normally", "Noise ignored automatically" ], "correctAnswerIndex": 1, "explanation": "Anomalous points tend to merge late at high distances in the dendrogram." }, { "id": 71, "questionText": "Scenario: Hierarchical clustering of customer reviews. Benefit?", "options": [ "Noise ignored automatically", "Reveals nested sentiment clusters and subtopics", "Requires fixed number of clusters", "All reviews merged initially" ], "correctAnswerIndex": 1, "explanation": "Hierarchical clustering captures nested sentiment and topic subgroups in text data." }, { "id": 72, "questionText": "Scenario: High-dimensional embeddings lead to poor clustering. Solution?", "options": [ "Ignore high-dimensionality", "Use Manhattan distance only", "Increase number of clusters randomly", "Apply dimensionality reduction like PCA or t-SNE" ], "correctAnswerIndex": 3, "explanation": "Dimensionality reduction improves distance measurement and clustering quality in high-dimensional spaces." }, { "id": 73, "questionText": "Scenario: Ward’s method vs single linkage. Difference?", "options": [ "Single linkage more compact", "Ward’s produces elongated chains", "Both produce identical clusters", "Ward’s minimizes variance; single linkage may chain" ], "correctAnswerIndex": 3, "explanation": "Ward’s focuses on variance, yielding compact clusters, whereas single linkage can chain clusters." }, { "id": 74, "questionText": "Scenario: Hierarchical clustering on GPS locations. Challenge?", "options": [ "Euclidean distance may misrepresent Earth distances; use haversine", "All clusters spherical", "Noise automatically ignored", "Clusters always detected correctly" ], "correctAnswerIndex": 0, "explanation": "GPS data requires geodesic distance metrics for accurate clustering." }, { "id": 75, "questionText": "Scenario: Divisive vs agglomerative clustering. Advantage of agglomerative?", "options": [ "Better for streaming data", "Handles categorical data automatically", "Less computationally expensive and more widely used", "Detects larger clusters first" ], "correctAnswerIndex": 2, "explanation": "Agglomerative clustering is more practical and commonly used due to lower computational cost." }, { "id": 76, "questionText": "Scenario: Hierarchical clustering of product categories. Use of dendrogram?", "options": [ "Identify hierarchical relationships and subcategories", "All categories merged", "Randomly assign clusters", "Ignore dendrogram" ], "correctAnswerIndex": 0, "explanation": "Dendrograms help visualize nested relationships among product categories." }, { "id": 77, "questionText": "Scenario: Hierarchical clustering on noisy data. Limitation?", "options": [ "All clusters detected perfectly", "Clusters always compact", "Noise may distort cluster merges; consider preprocessing", "Noise automatically removed" ], "correctAnswerIndex": 2, "explanation": "Noise can affect distances and merging decisions; preprocessing or outlier removal is advised." }, { "id": 78, "questionText": "Scenario: Hierarchical clustering using average linkage. Advantage?", "options": [ "Noise ignored", "Produces elongated clusters", "All clusters merge randomly", "Balances chaining and compactness" ], "correctAnswerIndex": 3, "explanation": "Average linkage considers average distances between clusters, reducing extreme chaining." }, { "id": 79, "questionText": "Scenario: Hierarchical clustering dendrogram shows large vertical distances. Interpretation?", "options": [ "Clusters are tight", "Clusters are well-separated", "Algorithm failed", "Noise ignored" ], "correctAnswerIndex": 1, "explanation": "Large vertical distances indicate clusters are far apart before merging." }, { "id": 80, "questionText": "Scenario: Combining hierarchical clustering and DBSCAN. Benefit?", "options": [ "Clusters must be spherical", "Noise ignored automatically", "Clusters merge randomly", "Captures dense regions and hierarchical structure together" ], "correctAnswerIndex": 3, "explanation": "Combining methods improves detection of dense clusters and nested hierarchical relationships." }, { "id": 81, "questionText": "Scenario: Hierarchical clustering of high-dimensional embeddings. Best practice?", "options": [ "Reduce dimensions first; visualize using dendrogram or heatmap", "Merge clusters randomly", "Use raw high-dimensional distances", "Ignore visualization" ], "correctAnswerIndex": 0, "explanation": "Dimensionality reduction improves clustering and makes dendrograms interpretable." }, { "id": 82, "questionText": "Scenario: Hierarchical clustering for anomaly detection. Approach?", "options": [ "Noise ignored automatically", "All points clustered normally", "Points merging at high distances may indicate anomalies", "Clusters merge randomly" ], "correctAnswerIndex": 2, "explanation": "Outliers tend to merge last at high distances in the dendrogram." }, { "id": 83, "questionText": "Scenario: High-dimensional sparse data for hierarchical clustering. Challenge?", "options": [ "Noise ignored", "Clusters always compact", "Algorithm fails automatically", "Distances may be misleading; consider feature selection or dimensionality reduction" ], "correctAnswerIndex": 3, "explanation": "Sparse high-dimensional data requires careful preprocessing to ensure meaningful distances." }, { "id": 84, "questionText": "Scenario: Hierarchical clustering for customer segmentation. Dendrogram use?", "options": [ "Identify nested subgroups for targeted marketing", "Noise ignored automatically", "Randomly assign clusters", "Merge clusters arbitrarily" ], "correctAnswerIndex": 0, "explanation": "Dendrogram shows nested subgroups, useful for marketing strategies." }, { "id": 85, "questionText": "Scenario: Ward’s method vs complete linkage. Key difference?", "options": [ "Ward’s produces elongated chains", "Both identical", "Ward’s minimizes variance; complete linkage uses max distance", "Complete linkage minimizes variance" ], "correctAnswerIndex": 2, "explanation": "Ward’s focuses on variance, while complete linkage merges based on maximum distance between clusters." }, { "id": 86, "questionText": "Scenario: Hierarchical clustering for time-series data. Challenge?", "options": [ "Noise ignored automatically", "Clusters always spherical", "Require meaningful distance measures like DTW or feature extraction", "No preprocessing needed" ], "correctAnswerIndex": 2, "explanation": "Time-series require specialized distance measures or feature extraction for meaningful clustering." }, { "id": 87, "questionText": "Scenario: Dendrogram cut defines number of clusters. How?", "options": [ "Count leaves", "Clusters merge randomly", "Use maximum distance arbitrarily", "Cut at horizontal line to define clusters below that height" ], "correctAnswerIndex": 3, "explanation": "Cutting dendrogram horizontally defines clusters at a chosen distance level." }, { "id": 88, "questionText": "Scenario: Hierarchical clustering on mixed data types. Solution?", "options": [ "Ignore categorical features", "Use Gower distance for similarity computation", "Merge clusters randomly", "Use Euclidean distance only" ], "correctAnswerIndex": 1, "explanation": "Gower distance accommodates mixed numeric and categorical features." }, { "id": 89, "questionText": "Scenario: Clustering geospatial data. Challenge?", "options": [ "Clusters merge randomly", "Use Euclidean directly", "Noise ignored automatically", "Use appropriate distance metric (haversine) for Earth coordinates" ], "correctAnswerIndex": 3, "explanation": "Euclidean distance may misrepresent distances on the globe; haversine is preferred." }, { "id": 90, "questionText": "Scenario: Hierarchical clustering for customer reviews. Benefit?", "options": [ "Reveals nested sentiment clusters without predefining K", "All points merged initially", "Requires fixed K", "Noise ignored automatically" ], "correctAnswerIndex": 0, "explanation": "Hierarchical clustering uncovers nested structures in text data like sentiment or topics." }, { "id": 91, "questionText": "Scenario: Hierarchical clustering shows long vertical merges in dendrogram. Interpretation?", "options": [ "Clusters are compact", "Algorithm failed", "Clusters are distinct and separated", "Noise ignored" ], "correctAnswerIndex": 2, "explanation": "Long vertical merges indicate clusters merge at high distances, showing separation." }, { "id": 92, "questionText": "Scenario: Agglomerative clustering with high-dimensional data. Solution?", "options": [ "Reduce dimensionality (PCA, t-SNE) before clustering", "Ignore dimensions", "Increase cluster count randomly", "Use raw distances" ], "correctAnswerIndex": 0, "explanation": "Dimensionality reduction improves distance calculations and cluster interpretability." }, { "id": 93, "questionText": "Scenario: Single linkage leads to chaining. Solution?", "options": [ "Increase clusters arbitrarily", "Ignore chaining", "Merge randomly", "Use complete or average linkage to reduce chaining" ], "correctAnswerIndex": 3, "explanation": "Complete or average linkage considers max or average distances, reducing elongated chains." }, { "id": 94, "questionText": "Scenario: Hierarchical clustering for image analysis. Ward’s method advantage?", "options": [ "Clusters merge randomly", "Noise detected automatically", "Minimizes within-cluster variance; compact segments", "Produces elongated clusters" ], "correctAnswerIndex": 2, "explanation": "Ward’s method creates compact clusters, suitable for image segmentation." }, { "id": 95, "questionText": "Scenario: Hierarchical clustering for anomaly detection. Key indicator?", "options": [ "All points clustered normally", "Clusters merge randomly", "Points merging at high distance are potential anomalies", "Noise ignored" ], "correctAnswerIndex": 2, "explanation": "Late-merging points at high distances indicate potential outliers." }, { "id": 96, "questionText": "Scenario: Divisive clustering is expensive. Alternative?", "options": [ "Agglomerative clustering is more practical", "Ignore hierarchy", "DBSCAN is slower", "K-Means cannot be used" ], "correctAnswerIndex": 0, "explanation": "Agglomerative clustering is faster and more widely used in practice." }, { "id": 97, "questionText": "Scenario: Hierarchical clustering dendrogram truncated for visualization. Benefit?", "options": [ "All clusters ignored", "Easier interpretation for large datasets", "Clusters merge randomly", "Noise removed automatically" ], "correctAnswerIndex": 1, "explanation": "Truncated dendrograms simplify visualization of large hierarchical structures." }, { "id": 98, "questionText": "Scenario: Hierarchical clustering on customer feedback. Benefit?", "options": [ "All reviews merged initially", "Requires fixed K", "Noise ignored automatically", "Identifies nested themes and sentiments without predefined K" ], "correctAnswerIndex": 3, "explanation": "Hierarchical clustering uncovers nested topics and sentiments in textual data." }, { "id": 99, "questionText": "Scenario: Combining hierarchical clustering and DBSCAN. Advantage?", "options": [ "Noise ignored automatically", "Clusters must be spherical", "Clusters merge randomly", "DBSCAN captures dense clusters; hierarchical reveals nested structure" ], "correctAnswerIndex": 3, "explanation": "Combining both methods leverages density detection and hierarchical structure analysis." }, { "id": 100, "questionText": "Scenario: High-dimensional hierarchical clustering. Key step?", "options": [ "Dimensionality reduction improves clustering performance and visualization", "Ignore distance metrics", "Use raw distances directly", "Merge clusters randomly" ], "correctAnswerIndex": 0, "explanation": "Reducing dimensionality ensures meaningful distances and interpretable clusters in high-dimensional data." } ] }