{ "title": "t-SNE Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions on t-Distributed Stochastic Neighbor Embedding (t-SNE), covering basic theory, medium-level conceptual understanding, and hard scenario-based applications.", "questions": [ { "id": 1, "questionText": "What does t-SNE stand for?", "options": [ "t-Scaled Neighbor Estimation", "t-Distributed Stochastic Neighbor Embedding", "Tensor Stochastic Network Embedding", "Total Stochastic Neural Embedding" ], "correctAnswerIndex": 1, "explanation": "t-SNE stands for t-Distributed Stochastic Neighbor Embedding, a technique for dimensionality reduction and visualization of high-dimensional data." }, { "id": 2, "questionText": "What is the primary purpose of t-SNE?", "options": [ "Normalizing data features", "Generating synthetic data", "Reducing dimensionality for visualization", "Classifying high-dimensional data" ], "correctAnswerIndex": 2, "explanation": "t-SNE is primarily used to reduce high-dimensional data into 2D or 3D for visualization while preserving local structure." }, { "id": 3, "questionText": "t-SNE is particularly good at preserving which type of data structure?", "options": [ "Linear relationships", "Global distances", "Local neighborhood structure", "Class labels" ], "correctAnswerIndex": 2, "explanation": "t-SNE focuses on preserving local similarities, meaning points that are close in high-dimensional space remain close in low-dimensional space." }, { "id": 4, "questionText": "What is the usual output dimension for t-SNE visualization?", "options": [ "1D", "10D", "2D or 3D", "5D" ], "correctAnswerIndex": 2, "explanation": "t-SNE is typically used to reduce data to 2D or 3D for easy visualization." }, { "id": 5, "questionText": "Which probability distribution is used in the low-dimensional space of t-SNE?", "options": [ "Student’s t-distribution", "Gaussian distribution", "Binomial distribution", "Uniform distribution" ], "correctAnswerIndex": 0, "explanation": "t-SNE uses a Student’s t-distribution with one degree of freedom in the low-dimensional space to model pairwise similarities and prevent crowding." }, { "id": 6, "questionText": "Which cost function does t-SNE minimize?", "options": [ "Cross-Entropy", "Euclidean distance", "Mean Squared Error", "Kullback-Leibler divergence" ], "correctAnswerIndex": 3, "explanation": "t-SNE minimizes the Kullback-Leibler divergence between high-dimensional and low-dimensional probability distributions." }, { "id": 7, "questionText": "In t-SNE, what is 'perplexity'?", "options": [ "Effective number of neighbors considered", "Learning rate for gradient descent", "A measure of dataset size", "Number of output dimensions" ], "correctAnswerIndex": 0, "explanation": "Perplexity controls how many neighbors influence the calculation of similarities; it acts like a smooth measure of the number of neighbors." }, { "id": 8, "questionText": "t-SNE is best suited for which type of relationships?", "options": [ "Only categorical", "Linear", "Only continuous labels", "Non-linear" ], "correctAnswerIndex": 3, "explanation": "t-SNE captures non-linear relationships that linear methods like PCA may miss." }, { "id": 9, "questionText": "Which step is part of the t-SNE algorithm?", "options": [ "Clustering data into fixed bins", "Computing pairwise similarities in high-dimensional space", "Normalizing labels only", "Sorting features alphabetically" ], "correctAnswerIndex": 1, "explanation": "t-SNE first computes pairwise similarities between all points in the high-dimensional space." }, { "id": 10, "questionText": "t-SNE initialization in low-dimensional space is usually:", "options": [ "Zero matrix", "Random", "Label-based ordering", "PCA projection" ], "correctAnswerIndex": 1, "explanation": "t-SNE typically starts with random placement of points in the low-dimensional space." }, { "id": 11, "questionText": "Which of these is a limitation of t-SNE?", "options": [ "Requires categorical labels", "Does not work on numeric data", "Does not scale well to very large datasets", "Cannot handle linear relationships" ], "correctAnswerIndex": 2, "explanation": "t-SNE can be computationally expensive and memory-intensive for large datasets." }, { "id": 12, "questionText": "Which t-SNE hyperparameter affects convergence speed?", "options": [ "Learning rate", "Perplexity", "KL divergence", "Number of features" ], "correctAnswerIndex": 0, "explanation": "The learning rate determines the step size in gradient descent optimization of the t-SNE cost function." }, { "id": 13, "questionText": "t-SNE is mainly used for:", "options": [ "Prediction", "Clustering as a main algorithm", "Classification", "Dimensionality reduction for visualization" ], "correctAnswerIndex": 3, "explanation": "t-SNE reduces dimensionality to visualize complex high-dimensional data effectively." }, { "id": 14, "questionText": "Which of these statements about t-SNE is correct?", "options": [ "It only works on 2D input", "It preserves global distances exactly", "It produces deterministic results", "It preserves local neighborhood structure" ], "correctAnswerIndex": 3, "explanation": "t-SNE focuses on maintaining local structure; global distances may be distorted." }, { "id": 15, "questionText": "t-SNE reduces crowding in low-dimensional space using:", "options": [ "Euclidean distance in high dimension only", "Gaussian kernel in high dimension, Student’s t-distribution in low dimension", "Uniform distance mapping", "PCA initialization only" ], "correctAnswerIndex": 1, "explanation": "Using t-distribution in low dimension with heavy tails helps spread out points to avoid crowding." }, { "id": 16, "questionText": "t-SNE’s output can vary between runs due to:", "options": [ "Random initialization", "Gradient descent step size", "Data normalization", "Perplexity only" ], "correctAnswerIndex": 0, "explanation": "Random initialization in low-dimensional space can lead to different local minima in optimization." }, { "id": 17, "questionText": "Which of these is true about t-SNE and PCA?", "options": [ "t-SNE is deterministic like PCA", "Both capture only linear structures", "PCA is better for visualization", "t-SNE captures non-linear structure; PCA is linear" ], "correctAnswerIndex": 3, "explanation": "t-SNE can capture complex non-linear relationships, whereas PCA preserves only linear variance." }, { "id": 18, "questionText": "t-SNE is not suitable for:", "options": [ "Non-linear data", "Small datasets", "Large-scale datasets without optimization", "2D visualization" ], "correctAnswerIndex": 2, "explanation": "t-SNE has high computational and memory cost for very large datasets." }, { "id": 19, "questionText": "Which t-SNE hyperparameter influences the balance between local and global structure?", "options": [ "Learning rate", "Perplexity", "Output dimension", "KL divergence" ], "correctAnswerIndex": 1, "explanation": "Perplexity acts as a smooth measure of the number of neighbors, balancing local vs. slightly broader structures." }, { "id": 20, "questionText": "t-SNE is stochastic because:", "options": [ "It uses KL divergence", "It uses random initialization and gradient descent", "It uses linear mapping", "It uses PCA first" ], "correctAnswerIndex": 1, "explanation": "The combination of random initialization and stochastic optimization leads to variability in results." }, { "id": 21, "questionText": "Which of these datasets is most appropriate for t-SNE?", "options": [ "Low-dimensional 2D data only", "Empty datasets", "Categorical data without encoding", "High-dimensional numeric data for visualization" ], "correctAnswerIndex": 3, "explanation": "t-SNE is designed to visualize high-dimensional data by projecting it to 2D or 3D." }, { "id": 22, "questionText": "t-SNE helps in which task indirectly?", "options": [ "Label encoding", "Understanding clusters or patterns", "Making predictions", "Model regularization" ], "correctAnswerIndex": 1, "explanation": "While t-SNE does not perform clustering, it can help visually identify clusters or patterns." }, { "id": 23, "questionText": "Why does t-SNE use Student’s t-distribution in low dimensions?", "options": [ "To handle the 'crowding problem' by allowing heavy tails", "To increase perplexity", "To linearize data", "To simplify computation" ], "correctAnswerIndex": 0, "explanation": "Heavy-tailed t-distribution spreads out points in low-dimensional space, avoiding crowding." }, { "id": 24, "questionText": "t-SNE is sensitive to which of the following?", "options": [ "Number of labels only", "Dataset size irrelevant", "Hyperparameters (perplexity, learning rate) and initialization", "Output dimension only" ], "correctAnswerIndex": 2, "explanation": "Small changes in parameters or random initialization can significantly affect t-SNE results." }, { "id": 25, "questionText": "t-SNE preserves which type of distance?", "options": [ "Global Euclidean distance", "Local pairwise similarity", "Cosine distance", "Manhattan distance" ], "correctAnswerIndex": 1, "explanation": "t-SNE preserves pairwise similarities among neighbors rather than absolute global distances." }, { "id": 26, "questionText": "Which of these is a recommended practice before t-SNE?", "options": [ "Removing labels", "Standardizing or normalizing features", "Shuffling the dataset randomly", "Discretizing continuous features" ], "correctAnswerIndex": 1, "explanation": "Feature scaling ensures no single feature dominates pairwise distance calculations." }, { "id": 27, "questionText": "t-SNE is mainly used in which field?", "options": [ "Optimization of hyperparameters", "Data visualization, exploratory data analysis", "Regression", "Prediction" ], "correctAnswerIndex": 1, "explanation": "t-SNE helps visualize high-dimensional data in 2D or 3D for analysis and pattern detection." }, { "id": 28, "questionText": "t-SNE is different from PCA because:", "options": [ "It is non-linear and focuses on local similarities", "It reduces to a single principal component", "It preserves global linear variance", "It always gives deterministic results" ], "correctAnswerIndex": 0, "explanation": "Unlike PCA, t-SNE focuses on preserving local structure and can capture complex non-linear relationships." }, { "id": 29, "questionText": "Which of these can be used to accelerate t-SNE on large datasets?", "options": [ "Reduce iterations to 1", "Increase perplexity to maximum", "Use raw data without scaling", "Barnes-Hut approximation or FIt-SNE" ], "correctAnswerIndex": 3, "explanation": "Barnes-Hut t-SNE and FIt-SNE optimize computation for larger datasets." }, { "id": 30, "questionText": "t-SNE is primarily a ______ technique.", "options": [ "Clustering algorithm", "Regression", "Classification", "Visualization and dimensionality reduction" ], "correctAnswerIndex": 3, "explanation": "t-SNE is mainly used to reduce dimensionality of data for visualization purposes." }, { "id": 31, "questionText": "t-SNE uses which similarity measure in high-dimensional space?", "options": [ "Conditional probability based on Gaussian distribution", "Hamming distance", "Manhattan distance", "Cosine similarity" ], "correctAnswerIndex": 0, "explanation": "t-SNE converts pairwise distances into conditional probabilities using a Gaussian distribution to represent similarity in high-dimensional space." }, { "id": 32, "questionText": "Scenario: You increase t-SNE perplexity from 5 to 50. Likely effect?", "options": [ "KL divergence becomes zero", "Clusters appear tighter and more separated", "Clusters merge, representing broader neighborhood", "Visualization fails" ], "correctAnswerIndex": 2, "explanation": "Higher perplexity considers more neighbors, leading to a broader view of local structure and sometimes merging of clusters." }, { "id": 33, "questionText": "t-SNE output varies between runs due to:", "options": [ "Variance scaling", "Perplexity normalization", "Gradient descent randomness and initialization", "Feature selection" ], "correctAnswerIndex": 2, "explanation": "Random initialization combined with stochastic gradient descent optimization can lead to different results in different runs." }, { "id": 34, "questionText": "Scenario: You apply t-SNE to 1000-dimensional word embeddings. Best practice?", "options": [ "Normalize only labels", "Discard half of the words randomly", "Optionally perform PCA first to reduce dimensions before t-SNE", "Apply t-SNE directly without scaling" ], "correctAnswerIndex": 2, "explanation": "Using PCA first reduces noise and computation while retaining most variance, improving t-SNE performance on high-dimensional embeddings." }, { "id": 35, "questionText": "t-SNE is sensitive to which hyperparameters?", "options": [ "Perplexity, learning rate, number of iterations", "Data type", "Number of output labels only", "PCA components only" ], "correctAnswerIndex": 0, "explanation": "Perplexity, learning rate, and iterations significantly influence the optimization and visualization outcome." }, { "id": 36, "questionText": "Scenario: t-SNE shows distorted global distances. Reason?", "options": [ "t-SNE focuses on preserving local structure, not global distances", "Data not normalized", "Algorithm failed", "Number of components is wrong" ], "correctAnswerIndex": 0, "explanation": "t-SNE prioritizes local similarity preservation; global distances may be distorted in low-dimensional visualization." }, { "id": 37, "questionText": "t-SNE uses which distribution in low-dimensional space to compute similarities?", "options": [ "Student’s t-distribution", "Poisson", "Uniform", "Gaussian" ], "correctAnswerIndex": 0, "explanation": "A heavy-tailed Student’s t-distribution is used to avoid crowding in low-dimensional embeddings." }, { "id": 38, "questionText": "Scenario: t-SNE applied to small dataset, clusters overlap in 2D. Possible reason?", "options": [ "Learning rate too small", "All of the above", "Data scaled incorrectly", "Perplexity too high" ], "correctAnswerIndex": 1, "explanation": "Perplexity, learning rate, and feature scaling all affect t-SNE output; poor tuning can cause cluster overlap." }, { "id": 39, "questionText": "t-SNE reduces dimensionality mainly for:", "options": [ "Prediction accuracy", "Label generation", "Visualization of high-dimensional patterns", "Feature elimination" ], "correctAnswerIndex": 2, "explanation": "t-SNE helps visualize complex high-dimensional data by reducing it to 2D or 3D while preserving local structure." }, { "id": 40, "questionText": "Scenario: t-SNE shows similar points far apart. Likely cause?", "options": [ "Random initialization", "All of the above", "Insufficient iterations", "Improper perplexity or learning rate" ], "correctAnswerIndex": 1, "explanation": "All these factors can distort local relationships in low-dimensional mapping." }, { "id": 41, "questionText": "Scenario: High-dimensional clusters not visible after t-SNE. Solution?", "options": [ "Use PCA for pre-reduction", "Tune perplexity and learning rate", "All of the above", "Increase iterations" ], "correctAnswerIndex": 2, "explanation": "Proper hyperparameter tuning, PCA pre-reduction, and enough iterations improve cluster separation." }, { "id": 42, "questionText": "t-SNE optimization uses which method?", "options": [ "Gradient descent", "Random selection", "Eigen decomposition only", "Closed-form solution" ], "correctAnswerIndex": 0, "explanation": "t-SNE minimizes KL divergence using iterative gradient descent." }, { "id": 43, "questionText": "Scenario: You use t-SNE on image embeddings, clusters appear inconsistent. Recommendation?", "options": [ "Reduce iterations", "Use raw pixels without embeddings", "Change output dimension to 1D", "Repeat multiple runs and average or use PCA initialization" ], "correctAnswerIndex": 3, "explanation": "Due to randomness, multiple runs or PCA initialization can stabilize t-SNE visualization." }, { "id": 44, "questionText": "Scenario: t-SNE applied after PCA with 50 components. Benefit?", "options": [ "Generates labels", "Prevents convergence", "Distorts local structure", "Reduces noise and computation" ], "correctAnswerIndex": 3, "explanation": "PCA pre-reduction helps t-SNE handle high-dimensional data efficiently while preserving structure." }, { "id": 45, "questionText": "t-SNE is mainly affected by:", "options": [ "Number of labels", "Hyperparameters and data scaling", "Dataset name", "Feature type only" ], "correctAnswerIndex": 1, "explanation": "t-SNE results are sensitive to perplexity, learning rate, iterations, and proper feature scaling." }, { "id": 46, "questionText": "Scenario: t-SNE clusters different classes but distorts distances. Interpretation?", "options": [ "Data incorrect", "Local structure preserved; global distances may differ", "Output dimension wrong", "Algorithm failed" ], "correctAnswerIndex": 1, "explanation": "t-SNE emphasizes local neighbor relations, which can distort large-scale global distances." }, { "id": 47, "questionText": "t-SNE is not ideal for:", "options": [ "Exploring patterns", "Small datasets", "Extremely large datasets without optimization", "Visualizing embeddings" ], "correctAnswerIndex": 2, "explanation": "t-SNE has high computational cost for very large datasets, though optimized versions exist." }, { "id": 48, "questionText": "Scenario: Learning rate too high in t-SNE. Effect?", "options": [ "All of the above", "Optimization diverges, poor visualization", "Slower convergence", "Better cluster separation" ], "correctAnswerIndex": 1, "explanation": "Excessively high learning rate can prevent gradient descent from converging, causing chaotic mapping." }, { "id": 49, "questionText": "Scenario: Low perplexity used on dense dataset. Effect?", "options": [ "Improves convergence", "All points overlap", "Merges clusters", "Overemphasizes very local structure, clusters may fragment" ], "correctAnswerIndex": 3, "explanation": "Low perplexity focuses on few neighbors, possibly fragmenting clusters that are globally coherent." }, { "id": 50, "questionText": "Scenario: t-SNE applied to gene expression data for visualization. Useful because?", "options": [ "Generates labels", "Predicts outcomes", "Reduces features for training", "Highlights local patterns and clusters of similar samples" ], "correctAnswerIndex": 3, "explanation": "t-SNE reveals underlying patterns in high-dimensional gene expression data." }, { "id": 51, "questionText": "Scenario: After applying t-SNE, some clusters appear elongated. Likely cause?", "options": [ "Perplexity or learning rate not optimal", "Insufficient iterations", "Random initialization", "All of the above" ], "correctAnswerIndex": 3, "explanation": "Cluster shape distortions can result from improper hyperparameters, initialization, or insufficient optimization steps." }, { "id": 52, "questionText": "Scenario: t-SNE shows overlapping clusters for distinct classes. Recommended action?", "options": [ "Reduce dataset size", "Increase output dimensions beyond 3", "Use raw data without scaling", "Adjust perplexity or learning rate, or try PCA initialization" ], "correctAnswerIndex": 3, "explanation": "Hyperparameter tuning and proper initialization help better separate clusters in low-dimensional mapping." }, { "id": 53, "questionText": "t-SNE can be combined with PCA to:", "options": [ "Replace t-SNE entirely", "Increase perplexity automatically", "Reduce noise and dimensionality before t-SNE", "Generate class labels" ], "correctAnswerIndex": 2, "explanation": "Using PCA first reduces high-dimensional noise, improving t-SNE efficiency and visualization quality." }, { "id": 54, "questionText": "Scenario: t-SNE on image embeddings produces different plots on repeated runs. Reason?", "options": [ "Data scaling issues", "Random initialization and stochastic gradient descent", "Perplexity too low", "Output dimension is too small" ], "correctAnswerIndex": 1, "explanation": "Variability is due to random initialization and stochastic optimization inherent to t-SNE." }, { "id": 55, "questionText": "Scenario: t-SNE applied to word embeddings shows tight clusters merging. Likely reason?", "options": [ "KL divergence minimized", "Learning rate too low", "Perplexity too high, considering more neighbors", "Insufficient iterations" ], "correctAnswerIndex": 2, "explanation": "High perplexity broadens the neighborhood, causing close clusters to merge visually." }, { "id": 56, "questionText": "Scenario: Large dataset t-SNE visualization is slow. Solution?", "options": [ "Reduce perplexity to 1", "Increase learning rate to max", "Use Barnes-Hut t-SNE or FIt-SNE approximation", "Use raw data without normalization" ], "correctAnswerIndex": 2, "explanation": "Optimized t-SNE versions like Barnes-Hut or FIt-SNE reduce computation and memory cost for large datasets." }, { "id": 57, "questionText": "Scenario: t-SNE on 100-dimensional embeddings, output 2D. Why might global distances be inaccurate?", "options": [ "Incorrect PCA initialization", "t-SNE prioritizes local neighborhood preservation over global distances", "Random features selected", "Algorithm failed" ], "correctAnswerIndex": 1, "explanation": "t-SNE focuses on preserving local similarities; global distances may be distorted in low-dimensional space." }, { "id": 58, "questionText": "Scenario: t-SNE applied on noisy dataset. Recommended preprocessing?", "options": [ "Normalize or standardize features, optionally reduce noise with PCA", "Reduce output dimension to 1D", "Leave data raw", "Increase perplexity to max" ], "correctAnswerIndex": 0, "explanation": "Scaling and dimensionality reduction improve t-SNE’s ability to capture meaningful structure." }, { "id": 59, "questionText": "Scenario: Two similar clusters in high-dimensional space are far apart in t-SNE plot. Likely reason?", "options": [ "Data labeling issues", "Algorithm failure", "Insufficient perplexity or learning rate tuning", "Incorrect output dimension" ], "correctAnswerIndex": 2, "explanation": "Improper hyperparameters can distort low-dimensional mapping even if local structure is partially preserved." }, { "id": 60, "questionText": "t-SNE can indirectly help in which of these tasks?", "options": [ "Direct prediction", "Visual identification of clusters or patterns", "Label encoding", "Feature selection for regression" ], "correctAnswerIndex": 1, "explanation": "While not a clustering method itself, t-SNE helps visually identify clusters or patterns in high-dimensional data." }, { "id": 61, "questionText": "Scenario: You want to visualize 10,000 points with t-SNE but it is slow. Best practice?", "options": [ "Increase perplexity to 1000", "Use optimized versions like FIt-SNE or reduce dimensionality with PCA first", "Randomly remove half the points", "Reduce iterations to 10" ], "correctAnswerIndex": 1, "explanation": "Optimized algorithms or PCA pre-reduction improve t-SNE performance on large datasets." }, { "id": 62, "questionText": "Scenario: t-SNE applied on text embeddings shows random patterns. Likely cause?", "options": [ "Low-dimensional output", "Dataset too large", "Random initialization and inappropriate hyperparameters", "Data normalization applied" ], "correctAnswerIndex": 2, "explanation": "Random initialization combined with suboptimal perplexity or learning rate can produce unstable visualizations." }, { "id": 63, "questionText": "t-SNE is particularly useful when:", "options": [ "High-dimensional data visualization is needed", "Regression is required", "Prediction is the goal", "Clustering as a main task" ], "correctAnswerIndex": 0, "explanation": "t-SNE is designed for visualization of complex, high-dimensional datasets." }, { "id": 64, "questionText": "Scenario: t-SNE clusters appear overlapping even after PCA pre-reduction. Recommendation?", "options": [ "Reduce dataset size further", "Decrease output dimension to 1D", "Tune perplexity and learning rate, or increase iterations", "Switch to raw data" ], "correctAnswerIndex": 2, "explanation": "Hyperparameter tuning is key to achieving better separation in t-SNE visualizations." }, { "id": 65, "questionText": "Scenario: t-SNE visualization is chaotic. Possible reasons?", "options": [ "High learning rate, low perplexity, random initialization", "PCA used for pre-reduction", "Data normalization applied", "Output dimension too large" ], "correctAnswerIndex": 0, "explanation": "Improper hyperparameters and random initialization can produce poor or chaotic t-SNE plots." }, { "id": 66, "questionText": "Scenario: You reduce embeddings to 2D with t-SNE, but clusters not apparent. Next step?", "options": [ "Increase dataset size", "Change output to 1D", "Use raw data only", "Adjust perplexity, learning rate, or perform PCA first" ], "correctAnswerIndex": 3, "explanation": "Hyperparameter tuning and preprocessing like PCA can help reveal clusters in t-SNE plots." }, { "id": 67, "questionText": "Scenario: t-SNE applied to 300-dimensional image embeddings, some clusters scattered. Likely reason?", "options": [ "All of the above", "Learning rate too low", "High-dimensional noise, consider PCA pre-reduction", "Perplexity too high" ], "correctAnswerIndex": 0, "explanation": "Noise and improper hyperparameters can scatter clusters; preprocessing and tuning are essential." }, { "id": 68, "questionText": "t-SNE preserves local distances by converting pairwise distances to:", "options": [ "Probabilities using Gaussian in high-d and t-distribution in low-d", "Manhattan distance only", "Euclidean distances only", "Cosine similarity only" ], "correctAnswerIndex": 0, "explanation": "Pairwise distances are converted to conditional probabilities in high-d, and Student’s t-distribution in low-d preserves local similarity." }, { "id": 69, "questionText": "Scenario: You run t-SNE multiple times and get slightly different plots. How to improve consistency?", "options": [ "Increase output dimension to 5D", "Decrease dataset size", "Normalize labels only", "Use PCA initialization and fix random seed" ], "correctAnswerIndex": 3, "explanation": "PCA initialization and fixing random seed reduce variability in t-SNE visualization." }, { "id": 70, "questionText": "Scenario: t-SNE produces compressed clusters in center. Likely cause?", "options": [ "Crowding problem in low-dimensional space", "Algorithm failure", "Learning rate too low", "Perplexity too high" ], "correctAnswerIndex": 0, "explanation": "The crowding problem arises because high-dimensional neighborhoods cannot be perfectly represented in low-dimensional space, causing compression." }, { "id": 71, "questionText": "Scenario: You apply t-SNE on 10,000 image embeddings and clusters appear noisy. Which is the best approach?", "options": [ "Use PCA to reduce dimensions before t-SNE and tune perplexity", "Increase learning rate to maximum", "Use raw pixel values directly", "Reduce output dimensions to 1D" ], "correctAnswerIndex": 0, "explanation": "PCA pre-reduction reduces noise and dimensionality, improving t-SNE visualization on large datasets." }, { "id": 72, "questionText": "Scenario: t-SNE on text embeddings shows overlapping topics. Likely cause?", "options": [ "Perplexity too low or high, or insufficient iterations", "Data normalization applied", "Output dimension too high", "Embedding size too small" ], "correctAnswerIndex": 0, "explanation": "Hyperparameter tuning is essential; low/high perplexity or insufficient iterations can cause overlapping clusters." }, { "id": 73, "questionText": "Scenario: Two clusters in high-dimensional space appear merged in t-SNE plot. What can you do?", "options": [ "Adjust perplexity, learning rate, or use PCA initialization", "Increase output dimension to 5D", "Normalize labels", "Reduce dataset size randomly" ], "correctAnswerIndex": 0, "explanation": "Proper hyperparameter tuning and PCA initialization can help separate clusters that appear merged in low-dimensional mapping." }, { "id": 74, "questionText": "Scenario: Running t-SNE on genomic data, you notice small clusters isolated. Reason?", "options": [ "Perplexity may be low, emphasizing very local neighborhoods", "High learning rate", "Output dimension too high", "Data normalization missing" ], "correctAnswerIndex": 0, "explanation": "Low perplexity focuses on very local neighborhoods, potentially isolating small clusters in visualization." }, { "id": 75, "questionText": "Scenario: t-SNE produces different visualizations on repeated runs. How to stabilize?", "options": [ "Use PCA initialization and fix random seed", "Reduce dataset size", "Increase output dimension beyond 3D", "Use raw data without scaling" ], "correctAnswerIndex": 0, "explanation": "PCA initialization and setting a fixed random seed reduce stochastic variation in t-SNE results." }, { "id": 76, "questionText": "Scenario: t-SNE on high-dimensional sensor data shows tight clusters but global distances are distorted. Interpretation?", "options": [ "Local structure preserved; global distances are not maintained", "Algorithm failed", "Data incorrectly scaled", "Output dimension wrong" ], "correctAnswerIndex": 0, "explanation": "t-SNE preserves local pairwise relationships; global distances can appear distorted in 2D/3D visualization." }, { "id": 77, "questionText": "Scenario: Clusters appear fragmented after t-SNE on customer embeddings. Likely reason?", "options": [ "Perplexity too low", "Learning rate too high", "Data normalization missing", "All of the above" ], "correctAnswerIndex": 3, "explanation": "Low perplexity, high learning rate, or improper feature scaling can fragment clusters in t-SNE plots." }, { "id": 78, "questionText": "Scenario: Applying t-SNE to visualize embeddings after deep learning model training. Best preprocessing?", "options": [ "Normalize features and optionally use PCA to reduce dimensions", "Use raw embeddings directly", "Randomly shuffle dimensions", "Use first two features only" ], "correctAnswerIndex": 0, "explanation": "Normalization and PCA pre-reduction enhance t-SNE visualization quality for deep embeddings." }, { "id": 79, "questionText": "Scenario: Clusters appear compressed in center of t-SNE plot. Likely cause?", "options": [ "Crowding problem inherent to low-dimensional mapping", "Algorithm failed", "Perplexity too high", "Learning rate too low" ], "correctAnswerIndex": 0, "explanation": "Crowding problem occurs because high-dimensional neighborhoods cannot be perfectly represented in low dimensions, causing compression." }, { "id": 80, "questionText": "Scenario: After t-SNE, similar data points are far apart in 2D. Likely reason?", "options": [ "Hyperparameters not tuned correctly", "Data normalization failed", "Output dimension too high", "Labels missing" ], "correctAnswerIndex": 0, "explanation": "Incorrect perplexity, learning rate, or insufficient iterations can cause similar points to appear far apart." }, { "id": 81, "questionText": "Scenario: t-SNE on embeddings shows elongated clusters. Best action?", "options": [ "Adjust perplexity and learning rate, or increase iterations", "Reduce output dimension to 1D", "Use raw embeddings without scaling", "Remove random points" ], "correctAnswerIndex": 0, "explanation": "Cluster elongation often occurs due to suboptimal hyperparameters; tuning and more iterations can improve results." }, { "id": 82, "questionText": "Scenario: Visualizing 50,000 text embeddings with t-SNE is very slow. Solution?", "options": [ "Use FIt-SNE or Barnes-Hut t-SNE for faster computation", "Reduce perplexity to 1", "Use raw text instead of embeddings", "Decrease output dimensions to 1D" ], "correctAnswerIndex": 0, "explanation": "Optimized t-SNE implementations significantly speed up visualization of large datasets." }, { "id": 83, "questionText": "Scenario: t-SNE clusters overlap despite tuning. Next step?", "options": [ "Consider alternative dimensionality reduction methods like UMAP", "Reduce output dimensions further", "Use raw data without embeddings", "Remove labels" ], "correctAnswerIndex": 0, "explanation": "If t-SNE cannot separate clusters even after tuning, UMAP or other DR methods might better preserve structure." }, { "id": 84, "questionText": "Scenario: t-SNE on protein expression data shows some scattered clusters. Likely cause?", "options": [ "Noise in high-dimensional data", "Insufficient iterations", "Suboptimal hyperparameters", "All of the above" ], "correctAnswerIndex": 3, "explanation": "Noise and suboptimal hyperparameters can cause scattered clusters; preprocessing and tuning help visualization." }, { "id": 85, "questionText": "Scenario: You apply t-SNE after PCA to reduce 500D embeddings to 50D. Why?", "options": [ "Reduce computation and noise while retaining important variance", "Increase global distance preservation", "Generate labels automatically", "Visualize in 3D directly" ], "correctAnswerIndex": 0, "explanation": "PCA pre-reduction helps t-SNE efficiently process high-dimensional data while keeping meaningful structure." }, { "id": 86, "questionText": "Scenario: Clusters appear stretched along a single axis. Likely cause?", "options": [ "Perplexity too high or learning rate too low", "Algorithm failure", "Data normalization missing", "Incorrect output dimension" ], "correctAnswerIndex": 0, "explanation": "Improper hyperparameters can cause cluster elongation in low-dimensional embeddings." }, { "id": 87, "questionText": "Scenario: t-SNE applied to 300-dimensional embeddings of customer behavior. Output 2D. What can distort clusters?", "options": [ "Random initialization, hyperparameters, noisy features", "Data scaling applied", "Output dimension too high", "Label missing" ], "correctAnswerIndex": 0, "explanation": "Cluster distortion occurs due to noise, initialization randomness, and hyperparameter settings." }, { "id": 88, "questionText": "Scenario: t-SNE output differs between runs. Best practice to make consistent?", "options": [ "Fix random seed and use PCA initialization", "Use raw data directly", "Reduce output dimension to 1D", "Change KL divergence formula" ], "correctAnswerIndex": 0, "explanation": "Fixed random seed and PCA initialization reduce stochastic variation across runs." }, { "id": 89, "questionText": "Scenario: t-SNE applied to embeddings shows overlapping clusters, perplexity set to 5. Recommendation?", "options": [ "Increase perplexity to consider more neighbors", "Reduce learning rate", "Decrease output dimension to 1D", "Use raw high-dimensional features" ], "correctAnswerIndex": 0, "explanation": "Low perplexity can underrepresent neighborhood structure; increasing perplexity may separate clusters better." }, { "id": 90, "questionText": "Scenario: After t-SNE, similar embeddings appear scattered. Likely hyperparameter issue?", "options": [ "Learning rate too high or perplexity not optimal", "Output dimension too high", "Labels missing", "Data normalized incorrectly" ], "correctAnswerIndex": 0, "explanation": "Improper learning rate or perplexity can scatter similar points, reducing visualization quality." }, { "id": 91, "questionText": "Scenario: t-SNE shows different cluster sizes for similar data. Why?", "options": [ "Local density differences and crowding problem in low-dimensional space", "Algorithm failure", "Incorrect output dimension", "Data normalization missing" ], "correctAnswerIndex": 0, "explanation": "t-SNE preserves local structure; dense regions appear larger and sparse regions smaller due to crowding problem." }, { "id": 92, "questionText": "Scenario: You want faster t-SNE on 100,000 points. Recommendation?", "options": [ "Use Barnes-Hut or FIt-SNE approximation", "Reduce output dimension to 1D", "Use raw data without scaling", "Decrease perplexity to 1" ], "correctAnswerIndex": 0, "explanation": "Optimized t-SNE versions reduce computation and memory for large datasets." }, { "id": 93, "questionText": "Scenario: t-SNE shows elongated clusters. Likely hyperparameter adjustment?", "options": [ "Adjust perplexity and learning rate, or increase iterations", "Reduce dataset size", "Use raw data", "Remove features randomly" ], "correctAnswerIndex": 0, "explanation": "Cluster elongation often occurs due to improper hyperparameters; tuning can improve visualization." }, { "id": 94, "questionText": "Scenario: t-SNE on embedding shows isolated points far from clusters. Likely reason?", "options": [ "Outliers or low perplexity emphasizing local neighborhoods", "Algorithm failure", "Output dimension too high", "Data normalization missing" ], "correctAnswerIndex": 0, "explanation": "Outliers or very low perplexity can cause points to appear isolated in visualization." }, { "id": 95, "questionText": "Scenario: You want t-SNE results reproducible across runs. Steps?", "options": [ "Fix random seed, use PCA initialization, standardize features", "Increase output dimensions", "Reduce dataset size", "Use raw data" ], "correctAnswerIndex": 0, "explanation": "Reproducibility requires controlling randomness and preprocessing consistently." }, { "id": 96, "questionText": "Scenario: t-SNE applied to multi-class embeddings, some classes overlap. Best solution?", "options": [ "Tune perplexity, learning rate, or try PCA initialization", "Reduce number of classes", "Change output to 1D", "Use raw features without preprocessing" ], "correctAnswerIndex": 0, "explanation": "Hyperparameter tuning and PCA initialization often improve cluster separation for multi-class data." }, { "id": 97, "questionText": "Scenario: t-SNE visualization shows tight clusters compressed together. Likely cause?", "options": [ "Crowding problem and insufficient perplexity", "Algorithm failure", "Output dimension too high", "Data not normalized" ], "correctAnswerIndex": 0, "explanation": "Crowding problem causes clusters to compress in low-dimensional space, especially with suboptimal perplexity." }, { "id": 98, "questionText": "Scenario: You apply t-SNE on embeddings with high noise. Recommended step?", "options": [ "Denoise or reduce dimensionality with PCA before t-SNE", "Use raw embeddings", "Reduce output dimension to 1D", "Increase iterations without preprocessing" ], "correctAnswerIndex": 0, "explanation": "Preprocessing helps t-SNE focus on meaningful structure rather than noise." }, { "id": 99, "questionText": "Scenario: t-SNE visualization shows variable cluster shapes between runs. Solution?", "options": [ "Fix random seed and use PCA initialization", "Reduce dataset size", "Change output dimension to 1D", "Normalize labels" ], "correctAnswerIndex": 0, "explanation": "Fixing seed and PCA initialization stabilizes t-SNE output across runs." }, { "id": 100, "questionText": "Scenario: t-SNE applied to 500D embeddings, some clusters overlapping. Recommended approach?", "options": [ "Try PCA pre-reduction, adjust perplexity and learning rate, increase iterations", "Reduce output dimension to 1D", "Use raw features", "Remove labels" ], "correctAnswerIndex": 0, "explanation": "Proper preprocessing and hyperparameter tuning help t-SNE separate overlapping clusters." } ] }