Spaces:

deedrop1140
/

MachineLearningAlgorithms

Runtime error

File size: 47,886 Bytes

0d00d62

{
  "title": "t-SNE Mastery: 100 MCQs",
  "description": "A comprehensive set of 100 multiple-choice questions on t-Distributed Stochastic Neighbor Embedding (t-SNE), covering basic theory, medium-level conceptual understanding, and hard scenario-based applications.",
  "questions": [
    {
      "id": 1,
      "questionText": "What does t-SNE stand for?",
      "options": [
        "t-Scaled Neighbor Estimation",
        "t-Distributed Stochastic Neighbor Embedding",
        "Tensor Stochastic Network Embedding",
        "Total Stochastic Neural Embedding"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE stands for t-Distributed Stochastic Neighbor Embedding, a technique for dimensionality reduction and visualization of high-dimensional data."
    },
    {
      "id": 2,
      "questionText": "What is the primary purpose of t-SNE?",
      "options": [
        "Normalizing data features",
        "Generating synthetic data",
        "Reducing dimensionality for visualization",
        "Classifying high-dimensional data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE is primarily used to reduce high-dimensional data into 2D or 3D for visualization while preserving local structure."
    },
    {
      "id": 3,
      "questionText": "t-SNE is particularly good at preserving which type of data structure?",
      "options": [
        "Linear relationships",
        "Global distances",
        "Local neighborhood structure",
        "Class labels"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE focuses on preserving local similarities, meaning points that are close in high-dimensional space remain close in low-dimensional space."
    },
    {
      "id": 4,
      "questionText": "What is the usual output dimension for t-SNE visualization?",
      "options": [
        "1D",
        "10D",
        "2D or 3D",
        "5D"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE is typically used to reduce data to 2D or 3D for easy visualization."
    },
    {
      "id": 5,
      "questionText": "Which probability distribution is used in the low-dimensional space of t-SNE?",
      "options": [
        "Student’s t-distribution",
        "Gaussian distribution",
        "Binomial distribution",
        "Uniform distribution"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE uses a Student’s t-distribution with one degree of freedom in the low-dimensional space to model pairwise similarities and prevent crowding."
    },
    {
      "id": 6,
      "questionText": "Which cost function does t-SNE minimize?",
      "options": [
        "Cross-Entropy",
        "Euclidean distance",
        "Mean Squared Error",
        "Kullback-Leibler divergence"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE minimizes the Kullback-Leibler divergence between high-dimensional and low-dimensional probability distributions."
    },
    {
      "id": 7,
      "questionText": "In t-SNE, what is 'perplexity'?",
      "options": [
        "Effective number of neighbors considered",
        "Learning rate for gradient descent",
        "A measure of dataset size",
        "Number of output dimensions"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Perplexity controls how many neighbors influence the calculation of similarities; it acts like a smooth measure of the number of neighbors."
    },
    {
      "id": 8,
      "questionText": "t-SNE is best suited for which type of relationships?",
      "options": [
        "Only categorical",
        "Linear",
        "Only continuous labels",
        "Non-linear"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE captures non-linear relationships that linear methods like PCA may miss."
    },
    {
      "id": 9,
      "questionText": "Which step is part of the t-SNE algorithm?",
      "options": [
        "Clustering data into fixed bins",
        "Computing pairwise similarities in high-dimensional space",
        "Normalizing labels only",
        "Sorting features alphabetically"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE first computes pairwise similarities between all points in the high-dimensional space."
    },
    {
      "id": 10,
      "questionText": "t-SNE initialization in low-dimensional space is usually:",
      "options": [
        "Zero matrix",
        "Random",
        "Label-based ordering",
        "PCA projection"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE typically starts with random placement of points in the low-dimensional space."
    },
    {
      "id": 11,
      "questionText": "Which of these is a limitation of t-SNE?",
      "options": [
        "Requires categorical labels",
        "Does not work on numeric data",
        "Does not scale well to very large datasets",
        "Cannot handle linear relationships"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE can be computationally expensive and memory-intensive for large datasets."
    },
    {
      "id": 12,
      "questionText": "Which t-SNE hyperparameter affects convergence speed?",
      "options": [
        "Learning rate",
        "Perplexity",
        "KL divergence",
        "Number of features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "The learning rate determines the step size in gradient descent optimization of the t-SNE cost function."
    },
    {
      "id": 13,
      "questionText": "t-SNE is mainly used for:",
      "options": [
        "Prediction",
        "Clustering as a main algorithm",
        "Classification",
        "Dimensionality reduction for visualization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE reduces dimensionality to visualize complex high-dimensional data effectively."
    },
    {
      "id": 14,
      "questionText": "Which of these statements about t-SNE is correct?",
      "options": [
        "It only works on 2D input",
        "It preserves global distances exactly",
        "It produces deterministic results",
        "It preserves local neighborhood structure"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE focuses on maintaining local structure; global distances may be distorted."
    },
    {
      "id": 15,
      "questionText": "t-SNE reduces crowding in low-dimensional space using:",
      "options": [
        "Euclidean distance in high dimension only",
        "Gaussian kernel in high dimension, Student’s t-distribution in low dimension",
        "Uniform distance mapping",
        "PCA initialization only"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Using t-distribution in low dimension with heavy tails helps spread out points to avoid crowding."
    },
    {
      "id": 16,
      "questionText": "t-SNE’s output can vary between runs due to:",
      "options": [
        "Random initialization",
        "Gradient descent step size",
        "Data normalization",
        "Perplexity only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Random initialization in low-dimensional space can lead to different local minima in optimization."
    },
    {
      "id": 17,
      "questionText": "Which of these is true about t-SNE and PCA?",
      "options": [
        "t-SNE is deterministic like PCA",
        "Both capture only linear structures",
        "PCA is better for visualization",
        "t-SNE captures non-linear structure; PCA is linear"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE can capture complex non-linear relationships, whereas PCA preserves only linear variance."
    },
    {
      "id": 18,
      "questionText": "t-SNE is not suitable for:",
      "options": [
        "Non-linear data",
        "Small datasets",
        "Large-scale datasets without optimization",
        "2D visualization"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE has high computational and memory cost for very large datasets."
    },
    {
      "id": 19,
      "questionText": "Which t-SNE hyperparameter influences the balance between local and global structure?",
      "options": [
        "Learning rate",
        "Perplexity",
        "Output dimension",
        "KL divergence"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Perplexity acts as a smooth measure of the number of neighbors, balancing local vs. slightly broader structures."
    },
    {
      "id": 20,
      "questionText": "t-SNE is stochastic because:",
      "options": [
        "It uses KL divergence",
        "It uses random initialization and gradient descent",
        "It uses linear mapping",
        "It uses PCA first"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The combination of random initialization and stochastic optimization leads to variability in results."
    },
    {
      "id": 21,
      "questionText": "Which of these datasets is most appropriate for t-SNE?",
      "options": [
        "Low-dimensional 2D data only",
        "Empty datasets",
        "Categorical data without encoding",
        "High-dimensional numeric data for visualization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE is designed to visualize high-dimensional data by projecting it to 2D or 3D."
    },
    {
      "id": 22,
      "questionText": "t-SNE helps in which task indirectly?",
      "options": [
        "Label encoding",
        "Understanding clusters or patterns",
        "Making predictions",
        "Model regularization"
      ],
      "correctAnswerIndex": 1,
      "explanation": "While t-SNE does not perform clustering, it can help visually identify clusters or patterns."
    },
    {
      "id": 23,
      "questionText": "Why does t-SNE use Student’s t-distribution in low dimensions?",
      "options": [
        "To handle the 'crowding problem' by allowing heavy tails",
        "To increase perplexity",
        "To linearize data",
        "To simplify computation"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Heavy-tailed t-distribution spreads out points in low-dimensional space, avoiding crowding."
    },
    {
      "id": 24,
      "questionText": "t-SNE is sensitive to which of the following?",
      "options": [
        "Number of labels only",
        "Dataset size irrelevant",
        "Hyperparameters (perplexity, learning rate) and initialization",
        "Output dimension only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Small changes in parameters or random initialization can significantly affect t-SNE results."
    },
    {
      "id": 25,
      "questionText": "t-SNE preserves which type of distance?",
      "options": [
        "Global Euclidean distance",
        "Local pairwise similarity",
        "Cosine distance",
        "Manhattan distance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE preserves pairwise similarities among neighbors rather than absolute global distances."
    },
    {
      "id": 26,
      "questionText": "Which of these is a recommended practice before t-SNE?",
      "options": [
        "Removing labels",
        "Standardizing or normalizing features",
        "Shuffling the dataset randomly",
        "Discretizing continuous features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Feature scaling ensures no single feature dominates pairwise distance calculations."
    },
    {
      "id": 27,
      "questionText": "t-SNE is mainly used in which field?",
      "options": [
        "Optimization of hyperparameters",
        "Data visualization, exploratory data analysis",
        "Regression",
        "Prediction"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE helps visualize high-dimensional data in 2D or 3D for analysis and pattern detection."
    },
    {
      "id": 28,
      "questionText": "t-SNE is different from PCA because:",
      "options": [
        "It is non-linear and focuses on local similarities",
        "It reduces to a single principal component",
        "It preserves global linear variance",
        "It always gives deterministic results"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Unlike PCA, t-SNE focuses on preserving local structure and can capture complex non-linear relationships."
    },
    {
      "id": 29,
      "questionText": "Which of these can be used to accelerate t-SNE on large datasets?",
      "options": [
        "Reduce iterations to 1",
        "Increase perplexity to maximum",
        "Use raw data without scaling",
        "Barnes-Hut approximation or FIt-SNE"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Barnes-Hut t-SNE and FIt-SNE optimize computation for larger datasets."
    },
    {
      "id": 30,
      "questionText": "t-SNE is primarily a ______ technique.",
      "options": [
        "Clustering algorithm",
        "Regression",
        "Classification",
        "Visualization and dimensionality reduction"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE is mainly used to reduce dimensionality of data for visualization purposes."
    },
    {
      "id": 31,
      "questionText": "t-SNE uses which similarity measure in high-dimensional space?",
      "options": [
        "Conditional probability based on Gaussian distribution",
        "Hamming distance",
        "Manhattan distance",
        "Cosine similarity"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE converts pairwise distances into conditional probabilities using a Gaussian distribution to represent similarity in high-dimensional space."
    },
    {
      "id": 32,
      "questionText": "Scenario: You increase t-SNE perplexity from 5 to 50. Likely effect?",
      "options": [
        "KL divergence becomes zero",
        "Clusters appear tighter and more separated",
        "Clusters merge, representing broader neighborhood",
        "Visualization fails"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Higher perplexity considers more neighbors, leading to a broader view of local structure and sometimes merging of clusters."
    },
    {
      "id": 33,
      "questionText": "t-SNE output varies between runs due to:",
      "options": [
        "Variance scaling",
        "Perplexity normalization",
        "Gradient descent randomness and initialization",
        "Feature selection"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random initialization combined with stochastic gradient descent optimization can lead to different results in different runs."
    },
    {
      "id": 34,
      "questionText": "Scenario: You apply t-SNE to 1000-dimensional word embeddings. Best practice?",
      "options": [
        "Normalize only labels",
        "Discard half of the words randomly",
        "Optionally perform PCA first to reduce dimensions before t-SNE",
        "Apply t-SNE directly without scaling"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Using PCA first reduces noise and computation while retaining most variance, improving t-SNE performance on high-dimensional embeddings."
    },
    {
      "id": 35,
      "questionText": "t-SNE is sensitive to which hyperparameters?",
      "options": [
        "Perplexity, learning rate, number of iterations",
        "Data type",
        "Number of output labels only",
        "PCA components only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Perplexity, learning rate, and iterations significantly influence the optimization and visualization outcome."
    },
    {
      "id": 36,
      "questionText": "Scenario: t-SNE shows distorted global distances. Reason?",
      "options": [
        "t-SNE focuses on preserving local structure, not global distances",
        "Data not normalized",
        "Algorithm failed",
        "Number of components is wrong"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE prioritizes local similarity preservation; global distances may be distorted in low-dimensional visualization."
    },
    {
      "id": 37,
      "questionText": "t-SNE uses which distribution in low-dimensional space to compute similarities?",
      "options": [
        "Student’s t-distribution",
        "Poisson",
        "Uniform",
        "Gaussian"
      ],
      "correctAnswerIndex": 0,
      "explanation": "A heavy-tailed Student’s t-distribution is used to avoid crowding in low-dimensional embeddings."
    },
    {
      "id": 38,
      "questionText": "Scenario: t-SNE applied to small dataset, clusters overlap in 2D. Possible reason?",
      "options": [
        "Learning rate too small",
        "All of the above",
        "Data scaled incorrectly",
        "Perplexity too high"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Perplexity, learning rate, and feature scaling all affect t-SNE output; poor tuning can cause cluster overlap."
    },
    {
      "id": 39,
      "questionText": "t-SNE reduces dimensionality mainly for:",
      "options": [
        "Prediction accuracy",
        "Label generation",
        "Visualization of high-dimensional patterns",
        "Feature elimination"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE helps visualize complex high-dimensional data by reducing it to 2D or 3D while preserving local structure."
    },
    {
      "id": 40,
      "questionText": "Scenario: t-SNE shows similar points far apart. Likely cause?",
      "options": [
        "Random initialization",
        "All of the above",
        "Insufficient iterations",
        "Improper perplexity or learning rate"
      ],
      "correctAnswerIndex": 1,
      "explanation": "All these factors can distort local relationships in low-dimensional mapping."
    },
    {
      "id": 41,
      "questionText": "Scenario: High-dimensional clusters not visible after t-SNE. Solution?",
      "options": [
        "Use PCA for pre-reduction",
        "Tune perplexity and learning rate",
        "All of the above",
        "Increase iterations"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Proper hyperparameter tuning, PCA pre-reduction, and enough iterations improve cluster separation."
    },
    {
      "id": 42,
      "questionText": "t-SNE optimization uses which method?",
      "options": [
        "Gradient descent",
        "Random selection",
        "Eigen decomposition only",
        "Closed-form solution"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE minimizes KL divergence using iterative gradient descent."
    },
    {
      "id": 43,
      "questionText": "Scenario: You use t-SNE on image embeddings, clusters appear inconsistent. Recommendation?",
      "options": [
        "Reduce iterations",
        "Use raw pixels without embeddings",
        "Change output dimension to 1D",
        "Repeat multiple runs and average or use PCA initialization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Due to randomness, multiple runs or PCA initialization can stabilize t-SNE visualization."
    },
    {
      "id": 44,
      "questionText": "Scenario: t-SNE applied after PCA with 50 components. Benefit?",
      "options": [
        "Generates labels",
        "Prevents convergence",
        "Distorts local structure",
        "Reduces noise and computation"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA pre-reduction helps t-SNE handle high-dimensional data efficiently while preserving structure."
    },
    {
      "id": 45,
      "questionText": "t-SNE is mainly affected by:",
      "options": [
        "Number of labels",
        "Hyperparameters and data scaling",
        "Dataset name",
        "Feature type only"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE results are sensitive to perplexity, learning rate, iterations, and proper feature scaling."
    },
    {
      "id": 46,
      "questionText": "Scenario: t-SNE clusters different classes but distorts distances. Interpretation?",
      "options": [
        "Data incorrect",
        "Local structure preserved; global distances may differ",
        "Output dimension wrong",
        "Algorithm failed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE emphasizes local neighbor relations, which can distort large-scale global distances."
    },
    {
      "id": 47,
      "questionText": "t-SNE is not ideal for:",
      "options": [
        "Exploring patterns",
        "Small datasets",
        "Extremely large datasets without optimization",
        "Visualizing embeddings"
      ],
      "correctAnswerIndex": 2,
      "explanation": "t-SNE has high computational cost for very large datasets, though optimized versions exist."
    },
    {
      "id": 48,
      "questionText": "Scenario: Learning rate too high in t-SNE. Effect?",
      "options": [
        "All of the above",
        "Optimization diverges, poor visualization",
        "Slower convergence",
        "Better cluster separation"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Excessively high learning rate can prevent gradient descent from converging, causing chaotic mapping."
    },
    {
      "id": 49,
      "questionText": "Scenario: Low perplexity used on dense dataset. Effect?",
      "options": [
        "Improves convergence",
        "All points overlap",
        "Merges clusters",
        "Overemphasizes very local structure, clusters may fragment"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Low perplexity focuses on few neighbors, possibly fragmenting clusters that are globally coherent."
    },
    {
      "id": 50,
      "questionText": "Scenario: t-SNE applied to gene expression data for visualization. Useful because?",
      "options": [
        "Generates labels",
        "Predicts outcomes",
        "Reduces features for training",
        "Highlights local patterns and clusters of similar samples"
      ],
      "correctAnswerIndex": 3,
      "explanation": "t-SNE reveals underlying patterns in high-dimensional gene expression data."
    },
    {
      "id": 51,
      "questionText": "Scenario: After applying t-SNE, some clusters appear elongated. Likely cause?",
      "options": [
        "Perplexity or learning rate not optimal",
        "Insufficient iterations",
        "Random initialization",
        "All of the above"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Cluster shape distortions can result from improper hyperparameters, initialization, or insufficient optimization steps."
    },
    {
      "id": 52,
      "questionText": "Scenario: t-SNE shows overlapping clusters for distinct classes. Recommended action?",
      "options": [
        "Reduce dataset size",
        "Increase output dimensions beyond 3",
        "Use raw data without scaling",
        "Adjust perplexity or learning rate, or try PCA initialization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Hyperparameter tuning and proper initialization help better separate clusters in low-dimensional mapping."
    },
    {
      "id": 53,
      "questionText": "t-SNE can be combined with PCA to:",
      "options": [
        "Replace t-SNE entirely",
        "Increase perplexity automatically",
        "Reduce noise and dimensionality before t-SNE",
        "Generate class labels"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Using PCA first reduces high-dimensional noise, improving t-SNE efficiency and visualization quality."
    },
    {
      "id": 54,
      "questionText": "Scenario: t-SNE on image embeddings produces different plots on repeated runs. Reason?",
      "options": [
        "Data scaling issues",
        "Random initialization and stochastic gradient descent",
        "Perplexity too low",
        "Output dimension is too small"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Variability is due to random initialization and stochastic optimization inherent to t-SNE."
    },
    {
      "id": 55,
      "questionText": "Scenario: t-SNE applied to word embeddings shows tight clusters merging. Likely reason?",
      "options": [
        "KL divergence minimized",
        "Learning rate too low",
        "Perplexity too high, considering more neighbors",
        "Insufficient iterations"
      ],
      "correctAnswerIndex": 2,
      "explanation": "High perplexity broadens the neighborhood, causing close clusters to merge visually."
    },
    {
      "id": 56,
      "questionText": "Scenario: Large dataset t-SNE visualization is slow. Solution?",
      "options": [
        "Reduce perplexity to 1",
        "Increase learning rate to max",
        "Use Barnes-Hut t-SNE or FIt-SNE approximation",
        "Use raw data without normalization"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Optimized t-SNE versions like Barnes-Hut or FIt-SNE reduce computation and memory cost for large datasets."
    },
    {
      "id": 57,
      "questionText": "Scenario: t-SNE on 100-dimensional embeddings, output 2D. Why might global distances be inaccurate?",
      "options": [
        "Incorrect PCA initialization",
        "t-SNE prioritizes local neighborhood preservation over global distances",
        "Random features selected",
        "Algorithm failed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "t-SNE focuses on preserving local similarities; global distances may be distorted in low-dimensional space."
    },
    {
      "id": 58,
      "questionText": "Scenario: t-SNE applied on noisy dataset. Recommended preprocessing?",
      "options": [
        "Normalize or standardize features, optionally reduce noise with PCA",
        "Reduce output dimension to 1D",
        "Leave data raw",
        "Increase perplexity to max"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Scaling and dimensionality reduction improve t-SNE’s ability to capture meaningful structure."
    },
    {
      "id": 59,
      "questionText": "Scenario: Two similar clusters in high-dimensional space are far apart in t-SNE plot. Likely reason?",
      "options": [
        "Data labeling issues",
        "Algorithm failure",
        "Insufficient perplexity or learning rate tuning",
        "Incorrect output dimension"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Improper hyperparameters can distort low-dimensional mapping even if local structure is partially preserved."
    },
    {
      "id": 60,
      "questionText": "t-SNE can indirectly help in which of these tasks?",
      "options": [
        "Direct prediction",
        "Visual identification of clusters or patterns",
        "Label encoding",
        "Feature selection for regression"
      ],
      "correctAnswerIndex": 1,
      "explanation": "While not a clustering method itself, t-SNE helps visually identify clusters or patterns in high-dimensional data."
    },
    {
      "id": 61,
      "questionText": "Scenario: You want to visualize 10,000 points with t-SNE but it is slow. Best practice?",
      "options": [
        "Increase perplexity to 1000",
        "Use optimized versions like FIt-SNE or reduce dimensionality with PCA first",
        "Randomly remove half the points",
        "Reduce iterations to 10"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Optimized algorithms or PCA pre-reduction improve t-SNE performance on large datasets."
    },
    {
      "id": 62,
      "questionText": "Scenario: t-SNE applied on text embeddings shows random patterns. Likely cause?",
      "options": [
        "Low-dimensional output",
        "Dataset too large",
        "Random initialization and inappropriate hyperparameters",
        "Data normalization applied"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random initialization combined with suboptimal perplexity or learning rate can produce unstable visualizations."
    },
    {
      "id": 63,
      "questionText": "t-SNE is particularly useful when:",
      "options": [
        "High-dimensional data visualization is needed",
        "Regression is required",
        "Prediction is the goal",
        "Clustering as a main task"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE is designed for visualization of complex, high-dimensional datasets."
    },
    {
      "id": 64,
      "questionText": "Scenario: t-SNE clusters appear overlapping even after PCA pre-reduction. Recommendation?",
      "options": [
        "Reduce dataset size further",
        "Decrease output dimension to 1D",
        "Tune perplexity and learning rate, or increase iterations",
        "Switch to raw data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Hyperparameter tuning is key to achieving better separation in t-SNE visualizations."
    },
    {
      "id": 65,
      "questionText": "Scenario: t-SNE visualization is chaotic. Possible reasons?",
      "options": [
        "High learning rate, low perplexity, random initialization",
        "PCA used for pre-reduction",
        "Data normalization applied",
        "Output dimension too large"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Improper hyperparameters and random initialization can produce poor or chaotic t-SNE plots."
    },
    {
      "id": 66,
      "questionText": "Scenario: You reduce embeddings to 2D with t-SNE, but clusters not apparent. Next step?",
      "options": [
        "Increase dataset size",
        "Change output to 1D",
        "Use raw data only",
        "Adjust perplexity, learning rate, or perform PCA first"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Hyperparameter tuning and preprocessing like PCA can help reveal clusters in t-SNE plots."
    },
    {
      "id": 67,
      "questionText": "Scenario: t-SNE applied to 300-dimensional image embeddings, some clusters scattered. Likely reason?",
      "options": [
        "All of the above",
        "Learning rate too low",
        "High-dimensional noise, consider PCA pre-reduction",
        "Perplexity too high"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Noise and improper hyperparameters can scatter clusters; preprocessing and tuning are essential."
    },
    {
      "id": 68,
      "questionText": "t-SNE preserves local distances by converting pairwise distances to:",
      "options": [
        "Probabilities using Gaussian in high-d and t-distribution in low-d",
        "Manhattan distance only",
        "Euclidean distances only",
        "Cosine similarity only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Pairwise distances are converted to conditional probabilities in high-d, and Student’s t-distribution in low-d preserves local similarity."
    },
    {
      "id": 69,
      "questionText": "Scenario: You run t-SNE multiple times and get slightly different plots. How to improve consistency?",
      "options": [
        "Increase output dimension to 5D",
        "Decrease dataset size",
        "Normalize labels only",
        "Use PCA initialization and fix random seed"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA initialization and fixing random seed reduce variability in t-SNE visualization."
    },
    {
      "id": 70,
      "questionText": "Scenario: t-SNE produces compressed clusters in center. Likely cause?",
      "options": [
        "Crowding problem in low-dimensional space",
        "Algorithm failure",
        "Learning rate too low",
        "Perplexity too high"
      ],
      "correctAnswerIndex": 0,
      "explanation": "The crowding problem arises because high-dimensional neighborhoods cannot be perfectly represented in low-dimensional space, causing compression."
    },
    {
      "id": 71,
      "questionText": "Scenario: You apply t-SNE on 10,000 image embeddings and clusters appear noisy. Which is the best approach?",
      "options": [
        "Use PCA to reduce dimensions before t-SNE and tune perplexity",
        "Increase learning rate to maximum",
        "Use raw pixel values directly",
        "Reduce output dimensions to 1D"
      ],
      "correctAnswerIndex": 0,
      "explanation": "PCA pre-reduction reduces noise and dimensionality, improving t-SNE visualization on large datasets."
    },
    {
      "id": 72,
      "questionText": "Scenario: t-SNE on text embeddings shows overlapping topics. Likely cause?",
      "options": [
        "Perplexity too low or high, or insufficient iterations",
        "Data normalization applied",
        "Output dimension too high",
        "Embedding size too small"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Hyperparameter tuning is essential; low/high perplexity or insufficient iterations can cause overlapping clusters."
    },
    {
      "id": 73,
      "questionText": "Scenario: Two clusters in high-dimensional space appear merged in t-SNE plot. What can you do?",
      "options": [
        "Adjust perplexity, learning rate, or use PCA initialization",
        "Increase output dimension to 5D",
        "Normalize labels",
        "Reduce dataset size randomly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Proper hyperparameter tuning and PCA initialization can help separate clusters that appear merged in low-dimensional mapping."
    },
    {
      "id": 74,
      "questionText": "Scenario: Running t-SNE on genomic data, you notice small clusters isolated. Reason?",
      "options": [
        "Perplexity may be low, emphasizing very local neighborhoods",
        "High learning rate",
        "Output dimension too high",
        "Data normalization missing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Low perplexity focuses on very local neighborhoods, potentially isolating small clusters in visualization."
    },
    {
      "id": 75,
      "questionText": "Scenario: t-SNE produces different visualizations on repeated runs. How to stabilize?",
      "options": [
        "Use PCA initialization and fix random seed",
        "Reduce dataset size",
        "Increase output dimension beyond 3D",
        "Use raw data without scaling"
      ],
      "correctAnswerIndex": 0,
      "explanation": "PCA initialization and setting a fixed random seed reduce stochastic variation in t-SNE results."
    },
    {
      "id": 76,
      "questionText": "Scenario: t-SNE on high-dimensional sensor data shows tight clusters but global distances are distorted. Interpretation?",
      "options": [
        "Local structure preserved; global distances are not maintained",
        "Algorithm failed",
        "Data incorrectly scaled",
        "Output dimension wrong"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE preserves local pairwise relationships; global distances can appear distorted in 2D/3D visualization."
    },
    {
      "id": 77,
      "questionText": "Scenario: Clusters appear fragmented after t-SNE on customer embeddings. Likely reason?",
      "options": [
        "Perplexity too low",
        "Learning rate too high",
        "Data normalization missing",
        "All of the above"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Low perplexity, high learning rate, or improper feature scaling can fragment clusters in t-SNE plots."
    },
    {
      "id": 78,
      "questionText": "Scenario: Applying t-SNE to visualize embeddings after deep learning model training. Best preprocessing?",
      "options": [
        "Normalize features and optionally use PCA to reduce dimensions",
        "Use raw embeddings directly",
        "Randomly shuffle dimensions",
        "Use first two features only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Normalization and PCA pre-reduction enhance t-SNE visualization quality for deep embeddings."
    },
    {
      "id": 79,
      "questionText": "Scenario: Clusters appear compressed in center of t-SNE plot. Likely cause?",
      "options": [
        "Crowding problem inherent to low-dimensional mapping",
        "Algorithm failed",
        "Perplexity too high",
        "Learning rate too low"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Crowding problem occurs because high-dimensional neighborhoods cannot be perfectly represented in low dimensions, causing compression."
    },
    {
      "id": 80,
      "questionText": "Scenario: After t-SNE, similar data points are far apart in 2D. Likely reason?",
      "options": [
        "Hyperparameters not tuned correctly",
        "Data normalization failed",
        "Output dimension too high",
        "Labels missing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Incorrect perplexity, learning rate, or insufficient iterations can cause similar points to appear far apart."
    },
    {
      "id": 81,
      "questionText": "Scenario: t-SNE on embeddings shows elongated clusters. Best action?",
      "options": [
        "Adjust perplexity and learning rate, or increase iterations",
        "Reduce output dimension to 1D",
        "Use raw embeddings without scaling",
        "Remove random points"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Cluster elongation often occurs due to suboptimal hyperparameters; tuning and more iterations can improve results."
    },
    {
      "id": 82,
      "questionText": "Scenario: Visualizing 50,000 text embeddings with t-SNE is very slow. Solution?",
      "options": [
        "Use FIt-SNE or Barnes-Hut t-SNE for faster computation",
        "Reduce perplexity to 1",
        "Use raw text instead of embeddings",
        "Decrease output dimensions to 1D"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Optimized t-SNE implementations significantly speed up visualization of large datasets."
    },
    {
      "id": 83,
      "questionText": "Scenario: t-SNE clusters overlap despite tuning. Next step?",
      "options": [
        "Consider alternative dimensionality reduction methods like UMAP",
        "Reduce output dimensions further",
        "Use raw data without embeddings",
        "Remove labels"
      ],
      "correctAnswerIndex": 0,
      "explanation": "If t-SNE cannot separate clusters even after tuning, UMAP or other DR methods might better preserve structure."
    },
    {
      "id": 84,
      "questionText": "Scenario: t-SNE on protein expression data shows some scattered clusters. Likely cause?",
      "options": [
        "Noise in high-dimensional data",
        "Insufficient iterations",
        "Suboptimal hyperparameters",
        "All of the above"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Noise and suboptimal hyperparameters can cause scattered clusters; preprocessing and tuning help visualization."
    },
    {
      "id": 85,
      "questionText": "Scenario: You apply t-SNE after PCA to reduce 500D embeddings to 50D. Why?",
      "options": [
        "Reduce computation and noise while retaining important variance",
        "Increase global distance preservation",
        "Generate labels automatically",
        "Visualize in 3D directly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "PCA pre-reduction helps t-SNE efficiently process high-dimensional data while keeping meaningful structure."
    },
    {
      "id": 86,
      "questionText": "Scenario: Clusters appear stretched along a single axis. Likely cause?",
      "options": [
        "Perplexity too high or learning rate too low",
        "Algorithm failure",
        "Data normalization missing",
        "Incorrect output dimension"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Improper hyperparameters can cause cluster elongation in low-dimensional embeddings."
    },
    {
      "id": 87,
      "questionText": "Scenario: t-SNE applied to 300-dimensional embeddings of customer behavior. Output 2D. What can distort clusters?",
      "options": [
        "Random initialization, hyperparameters, noisy features",
        "Data scaling applied",
        "Output dimension too high",
        "Label missing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Cluster distortion occurs due to noise, initialization randomness, and hyperparameter settings."
    },
    {
      "id": 88,
      "questionText": "Scenario: t-SNE output differs between runs. Best practice to make consistent?",
      "options": [
        "Fix random seed and use PCA initialization",
        "Use raw data directly",
        "Reduce output dimension to 1D",
        "Change KL divergence formula"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Fixed random seed and PCA initialization reduce stochastic variation across runs."
    },
    {
      "id": 89,
      "questionText": "Scenario: t-SNE applied to embeddings shows overlapping clusters, perplexity set to 5. Recommendation?",
      "options": [
        "Increase perplexity to consider more neighbors",
        "Reduce learning rate",
        "Decrease output dimension to 1D",
        "Use raw high-dimensional features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Low perplexity can underrepresent neighborhood structure; increasing perplexity may separate clusters better."
    },
    {
      "id": 90,
      "questionText": "Scenario: After t-SNE, similar embeddings appear scattered. Likely hyperparameter issue?",
      "options": [
        "Learning rate too high or perplexity not optimal",
        "Output dimension too high",
        "Labels missing",
        "Data normalized incorrectly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Improper learning rate or perplexity can scatter similar points, reducing visualization quality."
    },
    {
      "id": 91,
      "questionText": "Scenario: t-SNE shows different cluster sizes for similar data. Why?",
      "options": [
        "Local density differences and crowding problem in low-dimensional space",
        "Algorithm failure",
        "Incorrect output dimension",
        "Data normalization missing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "t-SNE preserves local structure; dense regions appear larger and sparse regions smaller due to crowding problem."
    },
    {
      "id": 92,
      "questionText": "Scenario: You want faster t-SNE on 100,000 points. Recommendation?",
      "options": [
        "Use Barnes-Hut or FIt-SNE approximation",
        "Reduce output dimension to 1D",
        "Use raw data without scaling",
        "Decrease perplexity to 1"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Optimized t-SNE versions reduce computation and memory for large datasets."
    },
    {
      "id": 93,
      "questionText": "Scenario: t-SNE shows elongated clusters. Likely hyperparameter adjustment?",
      "options": [
        "Adjust perplexity and learning rate, or increase iterations",
        "Reduce dataset size",
        "Use raw data",
        "Remove features randomly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Cluster elongation often occurs due to improper hyperparameters; tuning can improve visualization."
    },
    {
      "id": 94,
      "questionText": "Scenario: t-SNE on embedding shows isolated points far from clusters. Likely reason?",
      "options": [
        "Outliers or low perplexity emphasizing local neighborhoods",
        "Algorithm failure",
        "Output dimension too high",
        "Data normalization missing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Outliers or very low perplexity can cause points to appear isolated in visualization."
    },
    {
      "id": 95,
      "questionText": "Scenario: You want t-SNE results reproducible across runs. Steps?",
      "options": [
        "Fix random seed, use PCA initialization, standardize features",
        "Increase output dimensions",
        "Reduce dataset size",
        "Use raw data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Reproducibility requires controlling randomness and preprocessing consistently."
    },
    {
      "id": 96,
      "questionText": "Scenario: t-SNE applied to multi-class embeddings, some classes overlap. Best solution?",
      "options": [
        "Tune perplexity, learning rate, or try PCA initialization",
        "Reduce number of classes",
        "Change output to 1D",
        "Use raw features without preprocessing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Hyperparameter tuning and PCA initialization often improve cluster separation for multi-class data."
    },
    {
      "id": 97,
      "questionText": "Scenario: t-SNE visualization shows tight clusters compressed together. Likely cause?",
      "options": [
        "Crowding problem and insufficient perplexity",
        "Algorithm failure",
        "Output dimension too high",
        "Data not normalized"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Crowding problem causes clusters to compress in low-dimensional space, especially with suboptimal perplexity."
    },
    {
      "id": 98,
      "questionText": "Scenario: You apply t-SNE on embeddings with high noise. Recommended step?",
      "options": [
        "Denoise or reduce dimensionality with PCA before t-SNE",
        "Use raw embeddings",
        "Reduce output dimension to 1D",
        "Increase iterations without preprocessing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Preprocessing helps t-SNE focus on meaningful structure rather than noise."
    },
    {
      "id": 99,
      "questionText": "Scenario: t-SNE visualization shows variable cluster shapes between runs. Solution?",
      "options": [
        "Fix random seed and use PCA initialization",
        "Reduce dataset size",
        "Change output dimension to 1D",
        "Normalize labels"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Fixing seed and PCA initialization stabilizes t-SNE output across runs."
    },
    {
      "id": 100,
      "questionText": "Scenario: t-SNE applied to 500D embeddings, some clusters overlapping. Recommended approach?",
      "options": [
        "Try PCA pre-reduction, adjust perplexity and learning rate, increase iterations",
        "Reduce output dimension to 1D",
        "Use raw features",
        "Remove labels"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Proper preprocessing and hyperparameter tuning help t-SNE separate overlapping clusters."
    }
  ]
}