LLM_Quiz_Beg / index.html
Saurabh502's picture
Update index.html
36bd29a verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Large Language Models (LLMs) Quiz</title>
<script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
<script src="https://unpkg.com/react@18/umd/react.development.js" crossorigin></script>
<script src="https://unpkg.com/react-dom@18/umd/react-dom.development.js" crossorigin></script>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
body {
font-family: 'Inter', sans-serif;
background-color: #f0f0f0;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
background-image: url('quiz-background.jpg');
background-size: cover;
background-position: center;
}
#root {
background-color: rgba(255, 255, 255, 0.95);
padding: 30px;
border-radius: 12px;
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.1);
width: 80%;
max-width: 800px;
text-align: center;
box-sizing: border-box;
}
h1 {
font-size: 2rem;
margin-bottom: 20px;
color: #3498db;
font-weight: 600;
}
p {
font-size: 1.1rem;
margin-bottom: 25px;
color: #555;
line-height: 1.7;
}
#question-area {
font-size: 1.2rem;
margin-bottom: 20px;
padding: 15px;
background-color: #e8f0fa;
border-radius: 8px;
border: 1px solid #b8c6da;
color: #2c3e50;
text-align: left;
}
#answer-options {
display: flex;
flex-direction: column;
align-items: stretch;
margin-bottom: 25px;
}
.answer-option {
padding: 15px;
margin-bottom: 12px;
background-color: #fff;
border-radius: 8px;
border: 1px solid #ddd;
cursor: pointer;
transition: background-color 0.3s ease, transform 0.2s ease;
font-size: 1.1rem;
text-align: left;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
}
.answer-option:hover {
background-color: #f0f8ff;
transform: translateY(-2px);
border-color: #a6d4fa;
}
.answer-option.selected {
background-color: #a6d4fa;
border-color: #3498db;
color: #fff;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.answer-option.correct {
background-color: #86ef7d;
border-color: #22c55e;
color: #fff;
font-weight: 600;
}
.answer-option.incorrect {
background-color: #fca5a5;
border-color: #dc2626;
color: #fff;
font-weight: 600;
}
#result-message {
font-size: 1.2rem;
margin-bottom: 25px;
font-weight: 500;
}
.correct-message {
color: #22c55e;
}
.incorrect-message {
color: #dc2626;
}
#next-button, #skip-button, #reset-button, #review-button {
padding: 12px 25px;
font-size: 1.1rem;
margin: 5px 10px;
border-radius: 8px;
border: none;
cursor: pointer;
transition: background-color 0.3s ease, transform 0.2s ease, box-shadow 0.3s ease;
font-weight: 500;
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
}
#next-button {
background-color: #3498db;
color: #fff;
}
#next-button:hover {
background-color: #2980b9;
transform: translateY(-2px);
box-shadow: 0 4px 7px rgba(0, 0, 0, 0.15);
}
#next-button:disabled {
background-color: #b8c6da;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
#skip-button {
background-color: #f39c12;
color: #fff;
}
#skip-button:hover {
background-color: #e67e22;
transform: translateY(-2px);
box-shadow: 0 4px 7px rgba(0, 0, 0, 0.15);
}
#reset-button {
background-color: #2ecc71;
color: #fff;
}
#reset-button:hover {
background-color: #27ae60;
transform: translateY(-2px);
box-shadow: 0 4px 7px rgba(0, 0, 0, 0.15);
}
#review-button {
background-color: #8e44ad;
color: #fff;
}
#review-button:hover {
background-color: #732d91;
transform: translateY(-2px);
box-shadow: 0 4px 7px rgba(0, 0, 0, 0.15);
}
#final-score-area {
font-size: 1.5rem;
font-weight: 600;
margin-bottom: 30px;
color: #2c3e50;
}
.pass-message {
color: #22c55e;
}
.fail-message {
color: #dc2626;
}
#review-section {
text-align: left;
margin-top: 20px;
}
.review-item {
margin-bottom: 20px;
padding: 15px;
background-color: #f9f9f9;
border-radius: 8px;
border: 1px solid #ddd;
}
.review-question {
font-size: 1.2rem;
color: #2c3e50;
margin-bottom: 10px;
}
.review-answer {
font-size: 1.1rem;
margin-bottom: 5px;
}
.review-explanation {
font-size: 1rem;
color: #555;
margin-top: 10px;
}
@media (max-width: 768px) {
#root {
width: 95%;
padding: 20px;
}
.answer-option {
padding: 12px;
margin-bottom: 10px;
font-size: 1rem;
}
#question-area {
font-size: 1.1rem;
padding: 12px;
}
#next-button, #skip-button, #reset-button, #review-button {
padding: 10px 20px;
font-size: 1rem;
}
h1 {
font-size: 1.75rem;
}
p {
font-size: 1rem;
}
}
@media (max-width: 480px) {
#root {
width: 100%;
padding: 15px;
border-radius: 0;
box-shadow: none;
}
.answer-option {
padding: 10px;
margin-bottom: 8px;
font-size: 0.95rem;
}
#question-area {
font-size: 1rem;
padding: 10px;
}
#next-button, #skip-button, #reset-button, #review-button {
padding: 10px 18px;
font-size: 0.9rem;
margin: 5px 5px;
}
h1 {
font-size: 1.5rem;
}
p {
font-size: 0.95rem;
}
#answer-options {
margin-bottom: 20px;
}
#final-score-area {
font-size: 1.25rem;
}
}
</style>
</head>
<body>
<div id="root"></div>
<script type="text/babel">
const quizData = [
{
question: "What is the primary goal of language modeling (LM)?",
options: [
"To understand and generate human language.",
"To translate between different programming languages.",
"To create computer graphics.",
"To predict stock market trends.",
],
correctAnswer: "To understand and generate human language.",
explanation: "Language modeling aims to understand and generate human language by predicting the likelihood of word sequences, forming the basis for many NLP applications."
},
{
question: "Which of the following is NOT a stage in the development of language models?",
options: [
"Statistical language models (SLM)",
"Neural language models (NLM)",
"Pre-trained language models (PLM)",
"Quantum language models (QLM)",
],
correctAnswer: "Quantum language models (QLM)",
explanation: "Quantum language models (QLM) are not a recognized stage in language model development; the progression includes SLMs, NLMs, and PLMs."
},
{
question: "What is the basic idea behind statistical language models (SLMs)?",
options: [
"To use neural networks for word prediction.",
"To build word prediction models based on the Markov assumption.",
"To learn distributed representations of words.",
"To pre-train Transformer models.",
],
correctAnswer: "To build word prediction models based on the Markov assumption.",
explanation: "SLMs rely on the Markov assumption, which posits that the probability of a word depends only on a fixed number of previous words (n-grams)."
},
{
question: "What is a limitation of SLMs?",
options: [
"They cannot be applied to information retrieval.",
"They suffer from the curse of dimensionality.",
"They cannot predict future words.",
"They are not used in NLP.",
],
correctAnswer: "They suffer from the curse of dimensionality.",
explanation: "SLMs struggle with high-dimensional data due to the curse of dimensionality, where the number of possible n-grams grows exponentially, making it hard to estimate probabilities accurately."
},
{
question: "What is the main characteristic of neural language models (NLMs)?",
options: [
"They use statistical learning methods.",
"They characterize the probability of word sequences by neural networks.",
"They are based on the Markov assumption.",
"They use n-gram models.",
],
correctAnswer: "They characterize the probability of word sequences by neural networks.",
explanation: "NLMs use neural networks to model the probability of word sequences, overcoming some limitations of SLMs by learning continuous representations."
},
{
question: "What concept did the work in [1] introduce?",
options: [
"N-gram language models.",
"The Transformer architecture.",
"Distributed representation of words.",
"Pre-training and fine-tuning.",
],
correctAnswer: "Distributed representation of words.",
explanation: "The work in [1] refers to early neural network models (e.g., word2vec) that introduced distributed representations, allowing words to be represented as dense vectors in a continuous space."
},
{
question: "What is word2vec?",
options: [
"A type of statistical language model.",
"A neural network for sequence modeling.",
"A simplified shallow neural network for learning distributed word representations.",
"A pre-trained language model.",
],
correctAnswer: "A simplified shallow neural network for learning distributed word representations.",
explanation: "Word2vec is a shallow neural network designed to learn distributed word representations (word embeddings) efficiently from large text corpora."
},
{
question: "What was an early attempt at capturing context-aware word representations?",
options: ["BERT", "ELMo", "GPT-2", "word2vec"],
correctAnswer: "ELMo",
explanation: "ELMo (Embeddings from Language Models) was an early model that captured context-aware word representations by using bidirectional LSTMs, unlike the static embeddings of word2vec."
},
{
question: "Which architecture is highly parallelizable and used in BERT?",
options: ["RNN", "LSTM", "Transformer", "MLP"],
correctAnswer: "Transformer",
explanation: "BERT uses the Transformer architecture, which is highly parallelizable due to its self-attention mechanism, unlike sequential models like RNNs or LSTMs."
},
{
question: "What is a key feature of BERT?",
options: [
"It uses fixed word representations.",
"It is based on statistical learning.",
"It pre-trains bidirectional language models.",
"It predicts the next word based on the most recent context.",
],
correctAnswer: "It pre-trains bidirectional language models.",
explanation: "BERT’s key feature is its bidirectional pre-training, allowing it to consider both left and right context for each word, unlike unidirectional models."
},
{
question: "What paradigm did BERT inspire?",
options: [
"N-gram modeling.",
"Pre-training and fine-tuning.",
"Markov assumption.",
"Distributed representation learning.",
],
correctAnswer: "Pre-training and fine-tuning.",
explanation: "BERT popularized the pre-training and fine-tuning paradigm, where a model is pre-trained on a large corpus and then fine-tuned for specific tasks."
},
{
question: "What do researchers find about scaling PLMs?",
options: [
"It decreases model capacity.",
"It has no effect on model performance.",
"It often leads to an improved model capacity.",
"It only works for small models.",
],
correctAnswer: "It often leads to an improved model capacity.",
explanation: "Scaling pre-trained language models (PLMs) by increasing parameters and data often improves their capacity to handle complex tasks, as seen in models like GPT-3."
},
{
question: "What is a characteristic of large-sized PLMs (LLMs)?",
options: [
"They have smaller parameter sizes than BERT.",
"They display different behaviors from smaller PLMs.",
"They cannot solve few-shot tasks.",
"They are less complex than SLMs.",
],
correctAnswer: "They display different behaviors from smaller PLMs.",
explanation: "Large-sized PLMs (LLMs) exhibit emergent behaviors, such as few-shot learning, that smaller PLMs like BERT typically do not show."
},
{
question: "What is an example of an emergent ability in LLMs?",
options: [
"Solving only specific tasks.",
"Solving few-shot tasks through in-context learning.",
"Learning fixed word representations.",
"Using only n-gram models.",
],
correctAnswer: "Solving few-shot tasks through in-context learning.",
explanation: "An emergent ability in LLMs is solving few-shot tasks via in-context learning, where the model adapts to new tasks with just a few examples provided in the prompt."
},
{
question: "What does the term 'LLM' stand for?",
options: [
"Limited Language Model",
"Large Language Model",
"Linear Language Model",
"Logical Language Model",
],
correctAnswer: "Large Language Model",
explanation: "LLM stands for Large Language Model, referring to models with billions of parameters trained on massive datasets."
},
{
question: "Which of the following is a remarkable application of LLMs?",
options: ["ELMo", "ChatGPT", "word2vec", "SLM"],
correctAnswer: "ChatGPT",
explanation: "ChatGPT, built on the GPT architecture, is a remarkable LLM application known for its conversational abilities and widespread use."
},
{
question: "What is a key focus of the latest language models (e.g., GPT-4)?",
options: [
"Modeling and generating text data.",
"Complex task solving.",
"Learning fixed word representations.",
"Using only statistical methods.",
],
correctAnswer: "Complex task solving.",
explanation: "Latest models like GPT-4 focus on solving complex tasks, leveraging their scale and training to handle diverse, intricate problems."
},
{
question: "How do LLMs differ from small PLMs in accessing them?",
options: [
"They are accessed through fine-tuning.",
"They are accessed through the prompting interface.",
"They do not require any specific interface.",
"They are accessed using n-gram models.",
],
correctAnswer: "They are accessed through the prompting interface.",
explanation: "LLMs are typically accessed via prompting, where users provide instructions or examples in natural language, unlike smaller PLMs that often require fine-tuning."
},
{
question: "What is a challenge in developing LLMs?",
options: [
"Small demand for computation resources.",
"Easy to carry out repetitive, ablating studies.",
"LLMs are mainly trained by academia with full transparency.",
"It is very costly to train them due to huge demand for computation resources.",
],
correctAnswer: "It is very costly to train them due to huge demand for computation resources.",
explanation: "Training LLMs requires vast computational resources, making it expensive and often limiting development to well-funded organizations."
},
{
question: "What is a potential issue with LLMs despite their capacities?",
options: [
"They only produce accurate and helpful content.",
"They are likely to produce toxic, fictitious, or harmful content.",
"They are easy to align with human values.",
"They do not require effective control approaches.",
],
correctAnswer: "They are likely to produce toxic, fictitious, or harmful content.",
explanation: "Despite their capabilities, LLMs can generate toxic or false content due to biases in training data or lack of perfect alignment with human values."
},
{
question: "What are the four major aspects of LLMs covered in the survey?",
options: [
"Pre-training, adaptation, utilization, and capacity evaluation.",
"SLM, NLM, PLM, and LLM.",
"Data collection, model training, fine-tuning, and deployment.",
"Input, processing, output, and storage.",
],
correctAnswer: "Pre-training, adaptation, utilization, and capacity evaluation.",
explanation: "The survey covers pre-training (initial training), adaptation (tuning), utilization (application), and capacity evaluation (performance assessment) as key aspects of LLMs."
},
{
question: "What is the typical parameter size of LLMs?",
options: [
"Millions of parameters.",
"Billions or hundreds of billions of parameters.",
"Thousands of parameters.",
"Less than 100 parameters.",
],
correctAnswer: "Billions or hundreds of billions of parameters.",
explanation: "LLMs typically have billions or hundreds of billions of parameters, enabling their vast capacity, unlike smaller models with millions."
},
{
question: "On what type of data are LLMs typically trained?",
options: [
"Small, specific datasets.",
"Massive text data.",
"Image data only.",
"Audio data only.",
],
correctAnswer: "Massive text data.",
explanation: "LLMs are trained on massive text corpora, such as web texts, books, and articles, to capture broad language patterns."
},
{
question: "What strong capacity do LLMs exhibit?",
options: [
"Understanding only programming languages.",
"Solving complex tasks via text generation.",
"Generating only image data.",
"Performing only simple calculations.",
],
correctAnswer: "Solving complex tasks via text generation.",
explanation: "LLMs excel at solving complex tasks by generating text, leveraging their understanding of language and context."
},
{
question: "What does the survey introduce as basic background for LLMs?",
options: [
"Only key techniques.",
"Scaling laws, emergent abilities, and key techniques.",
"Only emergent abilities.",
"Only scaling laws.",
],
correctAnswer: "Scaling laws, emergent abilities, and key techniques.",
explanation: "The survey provides background on scaling laws (performance vs. size), emergent abilities (e.g., few-shot learning), and key techniques (e.g., Transformers)."
},
{
question: "What is the key to understanding the development of language models in research history?",
options: [
"Focusing on text data generation.",
"The leap from language modeling to task solving.",
"Using only statistical methods.",
"Ignoring the evolution of model capacities.",
],
correctAnswer: "The leap from language modeling to task solving.",
explanation: "The shift from merely modeling language (predicting words) to solving tasks (e.g., reasoning, Q&A) marks a pivotal development in language models."
},
{
question: "Which of the following is NOT a characteristic of LLMs?",
options: [
"Strong capacities to understand natural language.",
"Solving complex tasks.",
"Training on small datasets.",
"Using Transformer language models.",
],
correctAnswer: "Training on small datasets.",
explanation: "LLMs are characterized by training on massive datasets, not small ones, which enables their strong performance."
},
{
question: "What is a major difference between LLMs and previous smaller PLMs?",
options: [
"LLMs have smaller parameter sizes.",
"LLMs display surprising emergent abilities.",
"LLMs are simpler to train.",
"LLMs do not require large-scale data.",
],
correctAnswer: "LLMs display surprising emergent abilities.",
explanation: "LLMs show emergent abilities like in-context learning, which smaller PLMs typically lack due to their scale and training."
},
{
question: "How has the research paradigm shifted towards the use of LLMs in NLP?",
options: [
"Away from using LLMs.",
"LLMs serve as a general-purpose language task solver.",
"Focusing only on statistical language models.",
"Ignoring pre-training and fine-tuning.",
],
correctAnswer: "LLMs serve as a general-purpose language task solver.",
explanation: "The paradigm has shifted to using LLMs as general-purpose solvers for various NLP tasks via prompting, rather than task-specific fine-tuning."
},
{
question: "What is a challenge that traditional search engines face with the advent of LLMs?",
options: [
"Increased user engagement.",
"New information seeking way through AI chatbots.",
"Lower computational costs.",
"Easier data processing.",
],
correctAnswer: "New information seeking way through AI chatbots.",
explanation: "LLM-powered chatbots offer a conversational way to seek information, challenging the keyword-based approach of traditional search engines."
},
{
question: "What is a current trend in computer vision (CV) research related to LLMs?",
options: [
"Developing only traditional CV models.",
"Developing ChatGPT-like vision-language models.",
"Ignoring multimodal dialogues.",
"Focusing solely on image classification.",
],
correctAnswer: "Developing ChatGPT-like vision-language models.",
explanation: "CV research is trending towards multimodal models that combine vision and language, inspired by ChatGPT’s success."
},
{
question: "What is a potential impact of the new wave of LLM technology?",
options: [
"A decline in real-world applications.",
"A prosperous ecosystem of real-world applications based on LLMs.",
"No change in the AI community.",
"Slower development of AI algorithms.",
],
correctAnswer: "A prosperous ecosystem of real-world applications based on LLMs.",
explanation: "LLMs are fostering a wide range of real-world applications, from chatbots to automation, due to their versatility."
},
{
question: "What is one of the mysterious aspects of LLMs?",
options: [
"Why they are easy to train.",
"Why emergent abilities occur in LLMs.",
"Why they have small parameter sizes.",
"Why they only work on small datasets.",
],
correctAnswer: "Why emergent abilities occur in LLMs.",
explanation: "The emergence of abilities like few-shot learning in LLMs is not fully understood, making it a mysterious aspect of their behavior."
},
{
question: "Why is it difficult for the research community to train capable LLMs?",
options: [
"Due to the small demand for computation resources.",
"Because it is very costly to carry out repetitive studies.",
"Because all training details are publicly available.",
"Because LLMs are mainly trained by academia.",
],
correctAnswer: "Because it is very costly to carry out repetitive studies.",
explanation: "Training LLMs requires extensive computational resources, making repetitive studies costly and limiting academic research."
},
{
question: "What is a challenge in aligning LLMs?",
options: [
"Ensuring they produce only factual content.",
"Aligning them with human values or preferences.",
"Making them generate longer texts.",
"Reducing their computational costs.",
],
correctAnswer: "Aligning them with human values or preferences.",
explanation: "Aligning LLMs with human values is challenging due to biases in data and the complexity of defining universal preferences."
},
{
question: "What does the paper 'Planning for AGI and beyond' discuss?",
options: [
"The limitations of AGI.",
"Short-term and long-term plans to approach AGI.",
"The irrelevance of AGI.",
"Only short-term plans for AI development.",
],
correctAnswer: "Short-term and long-term plans to approach AGI.",
explanation: "The paper outlines strategies for developing Artificial General Intelligence (AGI), covering both immediate and future steps."
},
{
question: "What is a recent argument about GPT-4?",
options: [
"It is far from being an AGI system.",
"It might be considered an early version of an AGI system.",
"It is less capable than previous models.",
"It can only perform simple tasks.",
],
correctAnswer: "It might be considered an early version of an AGI system.",
explanation: "Some argue GPT-4’s broad capabilities suggest it could be an early AGI, though it lacks full general intelligence."
},
{
question: "How is Microsoft 365 being empowered?",
options: [
"By removing AI algorithms.",
"By LLMs to automate office work.",
"By increasing manual labor.",
"By using only statistical models.",
],
correctAnswer: "By LLMs to automate office work.",
explanation: "Microsoft 365 integrates LLMs (e.g., via Copilot) to automate tasks like writing, summarizing, and data analysis."
},
{
question: "What is a suggestion for choosing layer normalization in LLMs?",
options: ["Post RMSNorm", "Pre RMSNorm", "Post LN", "No Normalization"],
correctAnswer: "Pre RMSNorm",
explanation: "Pre RMSNorm (Root Mean Square Normalization before layers) is suggested for LLMs due to its stability and performance benefits."
},
{
question: "Which activation function is recommended for stronger generalization and training stability?",
options: ["ReLU", "Sigmoid", "SwiGLU", "Tanh"],
correctAnswer: "SwiGLU",
explanation: "SwiGLU (Swish-Gated Linear Unit) is recommended for LLMs as it improves generalization and training stability over traditional functions like ReLU."
},
{
question: "Which position embedding is considered a better choice for LLMs?",
options: ["Absolute Positional Embedding", "Relative Positional Embedding", "RoPE", "Sinusoidal Positional Encoding"],
correctAnswer: "RoPE",
explanation: "RoPE (Rotary Position Embedding) is favored in LLMs for its ability to encode relative positions efficiently and scale with sequence length."
},
{
question: "What is the primary role of pre-training in LLMs?",
options: [
"To fine-tune models for specific tasks.",
"To encode general knowledge from large-scale corpus.",
"To reduce the size of the model.",
"To improve inference speed.",
],
correctAnswer: "To encode general knowledge from large-scale corpus.",
explanation: "Pre-training encodes general knowledge from vast text corpora into LLMs, providing a foundation for later task-specific adaptation."
},
{
question: "What type of data is included in the arXiv Dataset?",
options: [
"Only book data.",
"Scientific publication data.",
"Social media posts.",
"Image and video data.",
],
correctAnswer: "Scientific publication data.",
explanation: "The arXiv Dataset contains scientific publication data, primarily research papers, used for training models on academic content."
},
{
question: "What is the approximate size of the peS2o dataset?",
options: ["42MB", "42GB", "42TB", "42B tokens"],
correctAnswer: "42B tokens",
explanation: "The peS2o dataset is approximately 42 billion tokens, a massive text corpus used for training LLMs."
},
{
question: "What is a characteristic of the articles in Wikipedia?",
options: [
"They are mostly written in a conversational style.",
"They are composed in an expository style with references.",
"They are short and lack detail.",
"They cover only a narrow range of topics.",
],
correctAnswer: "They are composed in an expository style with references.",
explanation: "Wikipedia articles are written in an expository style, providing detailed explanations with references, making them a valuable training resource."
},
{
question: "What is a technique used to improve memory efficiency and throughput of deployed LLMs?",
options: ["Data Parallelism", "Tensor Parallelism", "Pipeline Parallelism", "PagedAttention"],
correctAnswer: "PagedAttention",
explanation: "PagedAttention improves memory efficiency and throughput in LLMs by managing key-value caches more effectively during inference."
},
{
question: "How does PagedAttention partition sequences?",
options: [
"Into random segments.",
"Into equal-length segments.",
"Into subsequences.",
"Into overlapping segments.",
],
correctAnswer: "Into subsequences.",
explanation: "PagedAttention partitions sequences into subsequences, allowing efficient memory management by processing them in blocks."
},
{
question: "What is the benefit of using PagedAttention?",
options: [
"Reduces computational cost.",
"Increases GPU utilization and enables efficient memory sharing.",
"Simplifies model architecture.",
"Improves model accuracy.",
],
correctAnswer: "Increases GPU utilization and enables efficient memory sharing.",
explanation: "PagedAttention boosts GPU utilization and memory sharing, optimizing resource use during LLM inference."
},
{
question: "What type of floating-point number was predominantly used for pre-training in previous PLMs like BERT?",
options: ["FP16", "BF16", "FP32", "INT8"],
correctAnswer: "FP32",
explanation: "FP32 (32-bit floating-point) was commonly used in earlier PLMs like BERT for its high precision during pre-training."
},
{
question: "Why have some studies started to use FP16 for pre-training LLMs?",
options: [
"To increase computational accuracy.",
"To reduce memory usage and communication overhead.",
"To simplify the model architecture.",
"To avoid the loss of computational accuracy.",
],
correctAnswer: "To reduce memory usage and communication overhead.",
explanation: "FP16 (16-bit floating-point) reduces memory usage and communication overhead, making pre-training LLMs more efficient despite lower precision."
},
{
question: "What is a potential issue with using FP16 for training?",
options: [
"Increased memory usage.",
"Higher communication overhead.",
"Loss of computational accuracy.",
"Improved model performance.",
],
correctAnswer: "Loss of computational accuracy.",
explanation: "FP16’s lower precision can lead to a loss of computational accuracy, potentially affecting model quality during training."
},
{
question: "What is BF16?",
options: [
"A 64-bit floating-point number.",
"A type of activation function.",
"Brain Floating Point, an alternative to FP16.",
"A method for parallel training.",
],
correctAnswer: "Brain Floating Point, an alternative to FP16.",
explanation: "BF16 (Brain Floating Point) is a 16-bit format developed by Google, offering a balance between FP16’s efficiency and FP32’s precision."
},
{
question: "How does BF16 compare to FP16 in terms of representation accuracy for pre-training?",
options: [
"BF16 performs worse than FP16.",
"BF16 performs the same as FP16.",
"BF16 generally performs better than FP16.",
"BF16 is not suitable for pre-training.",
],
correctAnswer: "BF16 generally performs better than FP16.",
explanation: "BF16 provides better representation accuracy than FP16 due to its wider dynamic range, making it more suitable for pre-training LLMs."
},
{
question: "Which training technique is often used jointly with 3D parallelism to improve training throughput?",
options: [
"Mixed precision training.",
"Data parallelism.",
"Sequence parallelism.",
"PagedAttention.",
],
correctAnswer: "Mixed precision training.",
explanation: "Mixed precision training, combining FP16/BF16 with FP32, is used with 3D parallelism (data, tensor, pipeline) to boost LLM training throughput."
},
{
question: "What type of parallelism was used to train BLOOM on 384 A100 GPUs?",
options: [
"Only data parallelism.",
"Only tensor parallelism.",
"8-way data parallelism, 4-way tensor parallelism, and 12-way pipeline parallelism.",
"Only pipeline parallelism.",
],
correctAnswer: "8-way data parallelism, 4-way tensor parallelism, and 12-way pipeline parallelism.",
explanation: "BLOOM used a combination of 8-way data, 4-way tensor, and 12-way pipeline parallelism to efficiently train on 384 A100 GPUs."
},
{
question: "What is the primary approach to using LLMs after pre-training or adaptation tuning?",
options: [
"Fine-tuning on small datasets.",
"Designing suitable prompting strategies.",
"Ignoring task-specific prompts.",
"Using only n-gram models.",
],
correctAnswer: "Designing suitable prompting strategies.",
explanation: "Post-pre-training, LLMs are primarily used via prompting strategies, where carefully crafted inputs elicit desired outputs."
},
{
question: "What is a representative prompting method discussed in the text?",
options: ["Fine-tuning", "Backpropagation", "In-context learning", "Random search"],
correctAnswer: "In-context learning",
explanation: "In-context learning is a key prompting method where LLMs learn tasks from examples provided in the input prompt."
},
{
question: "What does in-context learning involve?",
options: [
"Learning fixed word representations.",
"Formulating task description and demonstrations in natural language text.",
"Using only code data for training.",
"Employing only manual creation of prompts.",
],
correctAnswer: "Formulating task description and demonstrations in natural language text.",
explanation: "In-context learning involves providing a task description and examples in natural language within the prompt to guide the LLM."
},
{
question: "What is the process of manually creating a suitable prompt also called?",
options: ["Automatic prompt optimization", "Prompt engineering", "Prompt tuning", "Prompt generation"],
correctAnswer: "Prompt engineering",
explanation: "Prompt engineering refers to the manual design of prompts to effectively leverage LLMs’ capabilities for specific tasks."
},
{
question: "What is the impact of a well-designed prompt on LLMs?",
options: [
"It hinders the LLMs' ability to accomplish specific tasks.",
"It is very helpful to elicit the abilities of LLMs.",
"It has no influence on the performance of LLMs.",
"It makes LLMs generate random outputs.",
],
correctAnswer: "It is very helpful to elicit the abilities of LLMs.",
explanation: "A well-designed prompt significantly enhances an LLM’s ability to perform tasks by providing clear context and instructions."
},
{
question: "What type of data is considered well-organized with algorithmic logic and programming flow?",
options: ["Natural language text", "Image data", "Code data", "Audio data"],
correctAnswer: "Code data",
explanation: "Code data is structured with algorithmic logic and programming flow, making it distinct from unstructured natural language text."
},
{
question: "What ability do models trained on code show?",
options: ["Weak reasoning ability", "Strong reasoning ability", "No reasoning ability", "Only language generation ability"],
correctAnswer: "Strong reasoning ability",
explanation: "Models trained on code exhibit strong reasoning ability due to the logical and structured nature of programming data."
},
{
question: "What is a hypothesis regarding code data and LLMs' reasoning performance?",
options: [
"Code data hinders reasoning performance.",
"Code data has no effect on reasoning performance.",
"Code data may be useful to improve the reasoning performance of LLMs.",
"Code data is only useful for code generation.",
],
correctAnswer: "Code data may be useful to improve the reasoning performance of LLMs.",
explanation: "It’s hypothesized that code data’s logical structure could enhance LLMs’ reasoning skills beyond just code generation."
},
{
question: "What is a characteristic of LLMs' text generation quality?",
options: [
"It is poor compared to human-written texts.",
"It is comparable to human-written texts.",
"It is always underestimated by automatic metrics.",
"It cannot be evaluated.",
],
correctAnswer: "It is comparable to human-written texts.",
explanation: "LLMs generate text that is often comparable to human-written content in coherence and quality, a testament to their training scale."
},
{
question: "How can LLMs be used in the context of generation evaluation?",
options: [
"Only to generate texts, not evaluate them.",
"As language generation evaluators.",
"To replace human evaluators entirely.",
"To perform only statistical analysis.",
],
correctAnswer: "As language generation evaluators.",
explanation: "LLMs can evaluate generated text by assessing its quality, coherence, or relevance, supplementing human judgment."
},
{
question: "What is a limitation of LLMs in specialized generation?",
options: [
"They excel in all types of generation.",
"They have learned general language patterns but underperform in specialized generation.",
"They cannot generate coherent text.",
"They are only good at generating code.",
],
correctAnswer: "They have learned general language patterns but underperform in specialized generation.",
explanation: "LLMs excel in general language but may struggle with highly specialized domains due to limited domain-specific training data."
},
{
question: "What is a common approach to enhancing LLMs' factual knowledge?",
options: [
"Excluding external information.",
"Incorporating extracted relevant information into the context.",
"Relying solely on pre-training data.",
"Ignoring up-to-date information.",
],
correctAnswer: "Incorporating extracted relevant information into the context.",
explanation: "Enhancing LLMs’ factual knowledge often involves adding relevant external information (e.g., via retrieval-augmented generation) to the context."
},
{
question: "What is a finding about smaller models with instruction tuning compared to larger models without it?",
options: [
"Smaller models always perform worse.",
"Smaller models can perform better.",
"Size has no impact on performance.",
"Larger models are always better.",
],
correctAnswer: "Smaller models can perform better.",
explanation: "Smaller models with instruction tuning can outperform larger untuned models by being more aligned with specific tasks."
},
{
question: "What does instruction tuning enable LLMs to do?",
options: [
"Only perform on seen tasks.",
"Follow human instructions to perform specific tasks, even on unseen tasks.",
"Ignore natural language instructions.",
"Perform only without demonstrations.",
],
correctAnswer: "Follow human instructions to perform specific tasks, even on unseen tasks.",
explanation: "Instruction tuning allows LLMs to generalize to unseen tasks by following human instructions provided in natural language."
},
{
question: "What is a benefit of instruction tuning?",
options: [
"It is more costly than pre-training.",
"It requires a large amount of instruction data.",
"It is much less costly than pre-training.",
"It does not improve model performance.",
],
correctAnswer: "It is much less costly than pre-training.",
explanation: "Instruction tuning is less resource-intensive than pre-training, requiring only a smaller dataset of instructions to adapt the model."
},
{
question: "What is the primary goal of instruction tuning?",
options: [
"To decrease the model size",
"To make LLMs better at following instructions",
"To remove the need for pre-training",
"To generate random text",
],
correctAnswer: "To make LLMs better at following instructions",
explanation: "The main goal of instruction tuning is to improve LLMs’ ability to accurately follow human instructions for various tasks."
},
{
question: "What kind of tasks does instruction tuning help LLMs perform?",
options: [
"Only simple calculations",
"Specific tasks without demonstrations",
"Only tasks with examples",
"No specific tasks",
],
correctAnswer: "Specific tasks without demonstrations",
explanation: "Instruction tuning enables LLMs to perform specific tasks based solely on instructions, without needing example demonstrations."
},
{
question: "How does instruction tuning affect LLMs' ability to follow instructions?",
options: [
"It impairs their ability",
"It has no effect",
"It enhances their ability",
"It makes them ignore instructions",
],
correctAnswer: "It enhances their ability",
explanation: "Instruction tuning enhances LLMs’ capability to interpret and act on human instructions effectively."
},
{
question: "What have a large number of studies confirmed about instruction tuning?",
options: [
"It is ineffective",
"It achieves superior performance on seen and unseen tasks",
"It only works on seen tasks",
"It decreases performance",
],
correctAnswer: "It achieves superior performance on seen and unseen tasks",
explanation: "Studies show instruction tuning boosts LLM performance on both familiar (seen) and new (unseen) tasks."
},
{
question: "What is a key aspect of high-quality long CoT data curation?",
options: [
"Using small models",
"Using open models or APIs for data synthesis",
"Avoiding teacher models",
"Manual data collection only",
],
correctAnswer: "Using open models or APIs for data synthesis",
explanation: "High-quality Chain-of-Thought (CoT) data is often curated using open models or APIs to synthesize detailed reasoning steps."
},
{
question: "What is the basic idea behind creating long CoT response data?",
options: [
"Manually writing responses",
"Feeding prompts into teacher models",
"Using only small datasets",
"Ignoring the prompt structure",
],
correctAnswer: "Feeding prompts into teacher models",
explanation: "Long CoT response data is created by feeding prompts into teacher models to generate step-by-step reasoning responses."
},
{
question: "What is the finding about smaller models with instruction tuning compared to larger models without fine-tuning?",
options: [
"Smaller models always perform worse.",
"Smaller models can perform better.",
"Size has no impact on performance.",
"Larger models are always better.",
],
correctAnswer: "Smaller models can perform better.",
explanation: "Research indicates that smaller, instruction-tuned models can outperform larger models without tuning due to better task alignment."
},
];
const QuizApp = () => {
const [currentQuestionIndex, setCurrentQuestionIndex] = React.useState(0);
const [selectedAnswer, setSelectedAnswer] = React.useState(null);
const [score, setScore] = React.useState(0);
const [message, setMessage] = React.useState('');
const [quizEnd, setQuizEnd] = React.useState(false);
const [selectedAnswerColor, setSelectedAnswerColor] = React.useState('');
const [currentQuestionNumber, setCurrentQuestionNumber] = React.useState(1);
const [incorrectAnswers, setIncorrectAnswers] = React.useState([]);
const [showReview, setShowReview] = React.useState(false);
const currentQuestion = quizData[currentQuestionIndex];
const totalQuestions = quizData.length;
const handleAnswerSelection = (answer) => {
setSelectedAnswer(answer);
if (answer === currentQuestion.correctAnswer) {
setScore(score + 1);
setMessage('Correct!');
setSelectedAnswerColor('correct');
} else {
setMessage('Incorrect!');
setSelectedAnswerColor('incorrect');
setIncorrectAnswers([...incorrectAnswers, {
question: currentQuestion.question,
selectedAnswer: answer,
correctAnswer: currentQuestion.correctAnswer,
explanation: currentQuestion.explanation
}]);
}
};
const handleNextQuestion = () => {
if (currentQuestionIndex < quizData.length - 1) {
setCurrentQuestionIndex(currentQuestionIndex + 1);
setSelectedAnswer(null);
setMessage('');
setSelectedAnswerColor('');
setCurrentQuestionNumber(currentQuestionNumber + 1);
} else {
setQuizEnd(true);
}
};
const handleSkipQuestion = () => {
if (currentQuestionIndex < quizData.length - 1) {
setCurrentQuestionIndex(currentQuestionIndex + 1);
setSelectedAnswer(null);
setMessage('');
setSelectedAnswerColor('');
setCurrentQuestionNumber(currentQuestionNumber + 1);
} else {
setQuizEnd(true);
}
};
const handleResetQuiz = () => {
setCurrentQuestionIndex(0);
setSelectedAnswer(null);
setScore(0);
setMessage('');
setQuizEnd(false);
setSelectedAnswerColor('');
setCurrentQuestionNumber(1);
setIncorrectAnswers([]);
setShowReview(false);
};
const handleReviewIncorrect = () => {
setShowReview(true);
};
return (
<div>
<h1>Large Language Models (LLMs) Quiz</h1>
<p>Test your knowledge on Large Language Models (LLMs). Choose the best answer for each question.</p>
{!quizEnd ? (
<>
<div id="question-area">
Question {currentQuestionNumber}/{totalQuestions}: {currentQuestion.question}
</div>
<div id="answer-options">
{currentQuestion.options.map((option) => (
<div
key={option}
className={`answer-option ${selectedAnswer === option ? selectedAnswerColor : ''} ${selectedAnswer ? 'disabled' : ''} ${selectedAnswerColor && option === currentQuestion.correctAnswer ? 'correct' : ''}`}
onClick={() => !selectedAnswer && handleAnswerSelection(option)}
>
{option}
</div>
))}
</div>
<div id="result-message" className={selectedAnswerColor === 'correct' ? 'correct-message' : 'incorrect-message'}>{message}</div>
<div style={{display: 'flex', justifyContent: 'center'}}>
<button id="next-button" onClick={handleNextQuestion} disabled={!selectedAnswer}>
Next Question
</button>
<button id="skip-button" onClick={handleSkipQuestion}>
Skip Question
</button>
</div>
</>
) : (
<>
<div id="final-score-area">
Your Final Score: {score} / {quizData.length} <br/>
{score >= Math.ceil(quizData.length * 0.8) ? (
<span className="pass-message">Passed</span>
) : (
<span className="fail-message">Failed</span>
)}
</div>
<div style={{display: 'flex', justifyContent: 'center'}}>
<button id="reset-button" onClick={handleResetQuiz}>
Play Again
</button>
{incorrectAnswers.length > 0 && (
<button id="review-button" onClick={handleReviewIncorrect}>
Review Incorrect Answers
</button>
)}
</div>
{showReview && (
<div id="review-section">
<h2>Review of Incorrect Answers</h2>
{incorrectAnswers.map((item, index) => (
<div key={index} className="review-item">
<div className="review-question">{item.question}</div>
<div className="review-answer">
<strong>Your Answer:</strong> {item.selectedAnswer} <span className="incorrect-message">(Incorrect)</span>
</div>
<div className="review-answer">
<strong>Correct Answer:</strong> {item.correctAnswer} <span className="correct-message">(Correct)</span>
</div>
<div className="review-explanation">
<strong>Explanation:</strong> {item.explanation}
</div>
</div>
))}
</div>
)}
</>
)}
</div>
);
};
ReactDOM.render(<QuizApp />, document.getElementById("root"));
</script>
</body>
</html>