Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Mixture of Experts (MoE) Quiz</title> | |
| <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script> | |
| <script src="https://unpkg.com/react@18/umd/react.development.js" crossorigin></script> | |
| <script src="https://unpkg.com/react-dom@18/umd/react-dom.development.js" crossorigin></script> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet"> | |
| <style> | |
| body { | |
| font-family: 'Inter', sans-serif; | |
| background-color: #f0f0f0; | |
| margin: 0; | |
| padding: 0; | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| min-height: 100vh; | |
| background-image: url('quiz-background.jpg'); /* Replace with your image */ | |
| background-size: cover; | |
| background-position: center; | |
| } | |
| #quiz-container { | |
| background-color: #fff; | |
| border-radius: 12px; | |
| padding: 24px; | |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); | |
| width: 80%; | |
| max-width: 600px; | |
| text-align: center; | |
| opacity: 0.95; | |
| backdrop-filter: blur(5px); | |
| } | |
| h1 { | |
| color: #3498db; | |
| margin-bottom: 20px; | |
| font-size: 2rem; | |
| } | |
| #question-area { | |
| font-size: 1.4rem; | |
| margin-bottom: 20px; | |
| color: #555; | |
| font-weight: 500; | |
| line-height: 1.6; | |
| } | |
| #answer-options { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: stretch; | |
| margin-bottom: 20px; | |
| } | |
| .answer-option { | |
| padding: 12px; | |
| margin: 8px 0; | |
| border: 1px solid #ccc; | |
| border-radius: 6px; | |
| cursor: pointer; | |
| background-color: #e0e0e0; | |
| transition: background-color 0.3s, color 0.3s, box-shadow 0.3s; | |
| font-size: 1.1rem; | |
| text-align: left; | |
| box-shadow: 2px 2px 4px rgba(0, 0, 0, 0.05); | |
| } | |
| .answer-option:hover { | |
| background-color: #3498db; | |
| color: white; | |
| border-color: #2980b9; | |
| box-shadow: 3px 3px 6px rgba(0, 0, 0, 0.1); | |
| } | |
| .answer-option.correct { | |
| background-color: #2ecc71; | |
| color: white; | |
| border-color: #27ae60; | |
| box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); | |
| } | |
| .answer-option.incorrect { | |
| background-color: #e74c3c; | |
| color: white; | |
| border-color: #c0392b; | |
| box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); | |
| } | |
| #timer-area { | |
| font-size: 1.2rem; | |
| margin-bottom: 20px; | |
| color: #777; | |
| font-weight: 600; | |
| } | |
| #score-area { | |
| font-size: 1.5rem; | |
| font-weight: bold; | |
| color: #3498db; | |
| margin-bottom: 24px; | |
| } | |
| #final-score-area { | |
| font-size: 1.8rem; | |
| font-weight: bolder; | |
| color: #e67e22; | |
| margin-bottom: 24px; | |
| } | |
| #reset-button, #skip-button, #next-button { | |
| padding: 12px 24px; | |
| background-color: #3498db; | |
| color: white; | |
| border: none; | |
| border-radius: 6px; | |
| cursor: pointer; | |
| font-size: 1.1rem; | |
| transition: background-color 0.3s, transform 0.2s; | |
| margin: 8px; | |
| box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1); | |
| } | |
| #reset-button:hover, #skip-button:hover, #next-button:hover { | |
| background-color: #217dbb; | |
| transform: translateY(-2px); | |
| box-shadow: 3px 3px 7px rgba(0, 0, 0, 0.15); | |
| } | |
| #result-message { | |
| font-size: 1.2rem; | |
| font-weight: 600; | |
| margin-bottom: 20px; | |
| min-height: 30px; | |
| transition: color 0.3s; | |
| } | |
| .correct-message { | |
| color: #2ecc71; | |
| } | |
| .incorrect-message { | |
| color: #e74c3c; | |
| } | |
| .hide { | |
| display: none; | |
| } | |
| .disabled { | |
| pointer-events: none; | |
| opacity: 0.6; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div id="root"></div> | |
| <script type="text/babel"> | |
| const quizData = [ | |
| { | |
| question: "What is a primary challenge in developing large AI models as datasets grow more diverse and complex?", | |
| options: ["Decreased computational resource consumption", "Difficulty in fitting heterogeneous and complex data", "Reduced deployment difficulties", "Enhanced model usability"], | |
| correctAnswer: "Difficulty in fitting heterogeneous and complex data", | |
| }, | |
| { | |
| question: "What is the core idea behind the Mixture of Experts (MoE) architecture?", | |
| options: ["Activating all parameters for every input", "Dynamically selecting and activating relevant sub-models", "Using a single, large expert model", "Simplifying model architecture"], | |
| correctAnswer: "Dynamically selecting and activating relevant sub-models", | |
| }, | |
| { | |
| question: "What is a key advantage of MoE models in handling large-scale, multimodal data?", | |
| options: ["Increased computational costs", "Reduced model performance", "Improved model efficiency", "Limited capacity"], | |
| correctAnswer: "Improved model efficiency", | |
| }, | |
| { | |
| question: "Which of the following is NOT a key component in the design of an MoE?", | |
| options: ["Gating function", "Expert networks", "Routing mechanism", "Activation function"], | |
| correctAnswer: "Activation function", | |
| }, | |
| { | |
| question: "What is the role of the gating function in an MoE architecture?", | |
| options: ["To define the expert networks", "To allocate input data to designated experts", "To train the model", "To determine the output"], | |
| correctAnswer: "To allocate input data to designated experts", | |
| }, | |
| { | |
| question: "What is a critical consideration when designing a gating function?", | |
| options: ["Ensuring uneven distribution of input data", "Assigning dissimilar data to the same expert", "Distributing input data as evenly as possible", "Maximizing computational cost"], | |
| correctAnswer: "Distributing input data as evenly as possible", | |
| }, | |
| { | |
| question: "Which type of function is commonly used as a gating function due to its simplicity and effectiveness?", | |
| options: ["Non-linear function", "Linear function with softmax", "Complex polynomial function", "Random function"], | |
| correctAnswer: "Linear function with softmax", | |
| }, | |
| { | |
| question: "In an MoE model, what replaces the Feed-Forward Network (FFN) layer within a transformer block?", | |
| options: ["Self-Attention layer", "MoE layer", "Convolutional layer", "Pooling layer"], | |
| correctAnswer: "MoE layer", | |
| }, | |
| { | |
| question: "Why is the FFN layer typically replaced by an MoE layer in a Transformer?", | |
| options: ["To increase computational cost", "To disrupt the core mechanism of the Transformer", "To reduce computational cost while maintaining expressive power", "To simplify the model architecture"], | |
| correctAnswer: "To reduce computational cost while maintaining expressive power", | |
| }, | |
| { | |
| question: "What is a disadvantage of performing the TopK operation before softmax in a gating function?", | |
| options: ["Reduced computational overhead", "Scores may not conform to a probability distribution", "Provides statistically meaningful activation weights", "Clearer guidance on determining the value of k"], | |
| correctAnswer: "Scores may not conform to a probability distribution", | |
| }, | |
| { | |
| question: "What is a disadvantage of applying the TopK function after softmax?", | |
| options: ["Scores conform to a probability distribution", "More straightforward normalization", "Requires computing softmax for all experts", "Filters out irrelevant experts quickly"], | |
| correctAnswer: "Requires computing softmax for all experts", | |
| }, | |
| { | |
| question: "What is the primary function of expert networks in an MoE architecture?", | |
| options: ["To determine the gating function", "To allocate input data", "To specialize in distinct knowledge domains", "To manage the routing mechanism"], | |
| correctAnswer: "To specialize in distinct knowledge domains", | |
| }, | |
| { | |
| question: "How are expert networks typically integrated to ensure efficiency and scalability?", | |
| options: ["As completely separate, independent models", "Integrated into a single network model with specific layers replaced by MoE layers", "As a simplified version of the gating function", "Directly replacing the input layer"], | |
| correctAnswer: "Integrated into a single network model with specific layers replaced by MoE layers", | |
| }, | |
| { | |
| question: "Which layer in a Transformer is commonly replaced with an MoE layer?", | |
| options: ["Input layer", "Self-Attention layer", "Feed-Forward Network (FFN) layer", "Output layer"], | |
| correctAnswer: "Feed-Forward Network (FFN) layer", | |
| }, | |
| { | |
| question: "What is the primary goal of training strategies in MoEs?", | |
| options: ["To complicate the training process", "To guarantee proper training of MoEs", "To reduce the number of experts", "To simplify the gating function"], | |
| correctAnswer: "To guarantee proper training of MoEs", | |
| }, | |
| { | |
| question: "What is the purpose of the auxiliary loss term in MoE training?", | |
| options: ["To decrease model performance", "To balance expert utilization", "To increase computational cost", "To simplify the gating function"], | |
| correctAnswer: "To balance expert utilization", | |
| }, | |
| { | |
| question: "What does expert capacity refer to?", | |
| options: ["The minimum number of data points an expert can process", "The maximum number of data points an expert can process", "The average size of expert networks", "The complexity of the gating function"], | |
| correctAnswer: "The maximum number of data points an expert can process", | |
| }, | |
| { | |
| question: "What can result from insufficient expert capacity?", | |
| options: ["Increased computational resources", "Under-trained experts", "Balanced expert utilization", "Improved model performance"], | |
| correctAnswer: "Under-trained experts", | |
| }, | |
| { | |
| question: "What is a potential consequence of increasing expert capacity too much?", | |
| options: ["Skipped input data", "Improved model performance", "Wasted computational and memory resources", "Reduced training time"], | |
| correctAnswer: "Wasted computational and memory resources", | |
| }, | |
| { | |
| question: "Which strategy does Switch-Transformer propose to address parameter storage issues?", | |
| options: ["Hierarchical storage management", "Parameter migration", "Model compression", "Distributed training"], | |
| correctAnswer: "Parameter migration", | |
| }, | |
| { | |
| question: "What is the main goal of system designs in MoEs?", | |
| options: ["To complicate the model architecture", "To optimize the system efficiency of MoE", "To reduce the number of experts", "To increase computational costs"], | |
| correctAnswer: "To optimize the system efficiency of MoE", | |
| }, | |
| { | |
| question: "Which technique does DeepSpeed-MoE use to reduce the size of MoE models?", | |
| options: ["Increasing expert capacity", "Hierarchical storage management", "Model compression", "Parameter migration"], | |
| correctAnswer: "Model compression", | |
| }, | |
| { | |
| question: "What is a key benefit of MoE models, besides efficiency?", | |
| options: ["Decreased model interpretability", "Improved model interpretability", "Increased training complexity", "Reduced generalization ability"], | |
| correctAnswer: "Improved model interpretability", | |
| }, | |
| { | |
| question: "How do MoE models enhance interpretability?", | |
| options: ["By using a single, large expert", "By learning intrinsic allocation mechanisms", "By simplifying the gating function", "By removing expert networks"], | |
| correctAnswer: "By learning intrinsic allocation mechanisms", | |
| }, | |
| { | |
| question: "What is a major focus of ongoing research in MoE?", | |
| options: ["Reducing the number of experts", "Developing more complex gating functions", "Summarizing and disseminating recent advances of MoE", "Limiting application domains"], | |
| correctAnswer: "Summarizing and disseminating recent advances of MoE", | |
| }, | |
| { | |
| question: "Which of the following is NOT a mainstream machine learning direction where MoE-based algorithm designs are being applied?", | |
| options: ["Continual learning", "Meta-learning", "Supervised learning", "Reinforcement learning"], | |
| correctAnswer: "Supervised learning", | |
| }, | |
| { | |
| question: "What is the primary goal of continual learning?", | |
| options: ["To learn from independent and identically distributed data", "To enable models to learn from a continuous stream of non-stationary data", "To train models on a fixed dataset", "To reduce model complexity"], | |
| correctAnswer: "To enable models to learn from a continuous stream of non-stationary data", | |
| }, | |
| { | |
| question: "In the context of continual learning, what is a key challenge that MoE can help address?", | |
| options: ["Increasing computational costs", "Forgetting previously learned knowledge", "Simplifying model architecture", "Reducing data diversity"], | |
| correctAnswer: "Forgetting previously learned knowledge", | |
| }, | |
| { | |
| question: "What is the main idea behind meta-learning?", | |
| options: ["Learning from a single task", "Learning to learn", "Forgetting previously learned knowledge", "Increasing data complexity"], | |
| correctAnswer: "Learning to learn", | |
| }, | |
| { | |
| question: "How do MoEs enhance meta-learning?", | |
| options: ["By reducing the number of experts", "By simplifying the gating function", "By improving the model's ability to adapt to new tasks", "Increasing computational costs"], | |
| correctAnswer: "By improving the model's ability to adapt to new tasks", | |
| }, | |
| { | |
| question: "What is multi-task learning?", | |
| options: ["Learning a single task", "Learning multiple tasks simultaneously", "Forgetting previously learned knowledge", "Reducing model size"], | |
| correctAnswer: "Learning multiple tasks simultaneously", | |
| }, | |
| { | |
| question: "How do MoEs improve multi-task learning?", | |
| options: ["By increasing interference between tasks", "By enabling knowledge sharing between tasks", "By simplifying model architecture", "Reducing data diversity"], | |
| correctAnswer: "By enabling knowledge sharing between tasks", | |
| }, | |
| { | |
| question: "In reinforcement learning, what is a key challenge that MoE can help address?", | |
| options: ["Reducing the complexity of the environment", "Improving exploration and exploitation balance", "Increasing computational costs", "Simplifying the reward function"], | |
| correctAnswer: "Improving exploration and exploitation balance", | |
| }, | |
| { | |
| question: "What is the focus of theoretical studies on MoE?", | |
| options: ["Application-specific implementations", "Understanding MoE in various scenarios", "Reducing the number of experts", "Simplifying the gating function"], | |
| correctAnswer: "Understanding MoE in various scenarios", | |
| }, | |
| { | |
| question: "Which of the following is NOT a theoretical aspect of MoE being studied?", | |
| options: ["Different gating functions", "Different expert models", "Different learning scenarios", "Specific application domains"], | |
| correctAnswer: "Specific application domains", | |
| }, | |
| { | |
| question: "In which domains have MoE shown significant application?", | |
| options: ["Only in natural language processing", "Only in computer vision", "Both computer vision and natural language processing", "Neither computer vision nor natural language processing"], | |
| correctAnswer: "Both computer vision and natural language processing", | |
| }, | |
| { | |
| question: "Which of the following is a subproblem in computer vision where MoE is applied?", | |
| options: ["Machine translation", "Image classification", "Text generation", "Spoken language understanding"], | |
| correctAnswer: "Image classification", | |
| }, | |
| { | |
| question: "Which of the following is a subproblem in natural language processing where MoE is applied?", | |
| options: ["Object detection", "Semantic segmentation", "Machine translation", "Image classification"], | |
| correctAnswer: "Machine translation", | |
| }, | |
| { | |
| question: "What is a potential benefit of MoE in image classification?", | |
| options: ["Reduced accuracy", "Enhanced specialization of experts", "Increased computational costs", "Simplified model architecture"], | |
| correctAnswer: "Enhanced specialization of experts", | |
| }, | |
| { | |
| question: "How does MoE enhance machine translation?", | |
| options: ["By reducing translation accuracy", "By improving the handling of diverse languages", "By increasing computational costs", "By simplifying the model architecture"], | |
| correctAnswer: "By improving the handling of diverse languages", | |
| }, | |
| { | |
| question: "What is a future research direction for MoE?", | |
| options: ["Reducing the number of experts", "Developing simpler gating functions", "Exploring new application domains", "Limiting model capacity"], | |
| correctAnswer: "Exploring new application domains", | |
| }, | |
| { | |
| question: "What is a key advantage of MoE models?", | |
| options: ["They activate all parameters for every input.", "They dynamically select and activate only the most relevant subset of parameters.", "They are only suitable for small datasets.", "They increase computational costs."], | |
| correctAnswer: "They dynamically select and activate only the most relevant subset of parameters.", | |
| }, | |
| { | |
| question: "Which of the following is a characteristic of modern datasets that motivates the use of MoE?", | |
| options: ["They are becoming less diverse.", "They are becoming less complex.", "They often contain multimodal data.", "They have simpler structures."], | |
| correctAnswer: "They often contain multimodal data.", | |
| }, | |
| { | |
| question: "What is a challenge associated with integrating conflicting knowledge in a single model?", | |
| options: ["It leads to more stable training dynamics.", "It often results in suboptimal performance.", "It simplifies model deployment.", "It reduces computational costs."], | |
| correctAnswer: "It often results in suboptimal performance.", | |
| }, | |
| { | |
| question: "How does MoE address the challenge of conflicting knowledge?", | |
| options: ["By activating all parameters for every input.", "By using a single expert for all data types.", "By leveraging specialized 'experts' for different tasks or data types.", "By simplifying the model architecture."], | |
| correctAnswer: "By leveraging specialized 'experts' for different tasks or data types.", | |
| }, | |
| { | |
| question: "What is the impact of the selective activation mechanism in MoE models?", | |
| options: ["It decreases model capacity.", "It proportionally increases computational costs.", "It allows models to handle diverse knowledge domains without proportionally increasing computational costs.", "It reduces model efficiency."], | |
| correctAnswer: "It allows models to handle diverse knowledge domains without proportionally increasing computational costs.", | |
| }, | |
| { | |
| question: "Which large language model demonstrated a 7 times faster pre-training speed compared to T5-Base by using MoE?", | |
| options: ["GPT-4", "BERT", "Switch Transformer", "CLIP"], | |
| correctAnswer: "Switch Transformer", | |
| }, | |
| { | |
| question: "What is a key contribution of OpenMoE to the open-source community?", | |
| options: ["Reducing model size by 3.7 times.", "Achieving state-of-the-art results on SuperGLUE.", "Experimenting with decoder-only MoE.", "Proposing Router z-loss."], | |
| correctAnswer: "Proposing Router z-loss.", | |
| }, | |
| { | |
| question: "What does the Mixtral 8x7B model demonstrate regarding parameter access?", | |
| options: ["It processes each token with all of its parameters.", "It can access a total number of parameters greater than the number used per token.", "It has lower parameter efficiency compared to other models.", "It increases computational costs significantly."], | |
| correctAnswer: "It can access a total number of parameters greater than the number used per token.", | |
| }, | |
| { | |
| question: "Beyond efficiency, what is another opportunity that MoE models offer?", | |
| options: ["Reduced model interpretability.", "Simplified training dynamics.", "Improved model interpretability.", "Decreased generalization ability."], | |
| correctAnswer: "Improved model interpretability.", | |
| }, | |
| ]; | |
| const QuizApp = () => { | |
| const [currentQuestionIndex, setCurrentQuestionIndex] = React.useState(0); | |
| const [selectedAnswer, setSelectedAnswer] = React.useState(null); | |
| const [score, setScore] = React.useState(0); | |
| const [timeLeft, setTimeLeft] = React.useState(60); | |
| const [isTimeRunning, setIsTimeRunning] = React.useState(true); | |
| const [quizFinished, setQuizFinished] = React.useState(false); | |
| const [selectedAnswerColor, setSelectedAnswerColor] = React.useState(''); | |
| const [message, setMessage] = React.useState(''); | |
| const correctAnswers = quizData.map(q => q.correctAnswer); | |
| const totalQuestions = quizData.length; | |
| const currentQuestion = quizData[currentQuestionIndex]; | |
| React.useEffect(() => { | |
| if (isTimeRunning && timeLeft > 0) { | |
| const timerId = setInterval(() => { | |
| setTimeLeft(timeLeft - 1); | |
| }, 1000); | |
| return () => clearInterval(timerId); | |
| } else if (timeLeft === 0) { | |
| setQuizFinished(true); | |
| setIsTimeRunning(false); | |
| } | |
| }, [isTimeRunning, timeLeft]); | |
| const handleAnswerSelection = (answer) => { | |
| if (!selectedAnswer) { | |
| setSelectedAnswer(answer); | |
| if (answer === currentQuestion.correctAnswer) { | |
| setScore(score + 10); | |
| setSelectedAnswerColor('correct'); | |
| setMessage('Correct Answer!'); | |
| } else { | |
| setSelectedAnswerColor('incorrect'); | |
| setMessage('Incorrect Answer!'); | |
| } | |
| } | |
| }; | |
| const handleNextQuestion = () => { | |
| if (selectedAnswer) { | |
| if (currentQuestionIndex < quizData.length - 1) { | |
| setCurrentQuestionIndex(currentQuestionIndex + 1); | |
| setSelectedAnswer(null); | |
| setSelectedAnswerColor(''); | |
| setMessage(''); | |
| setTimeLeft(60); | |
| setIsTimeRunning(true); | |
| } else { | |
| setQuizFinished(true); | |
| setIsTimeRunning(false); | |
| } | |
| } | |
| }; | |
| const handleSkipQuestion = () => { | |
| if (currentQuestionIndex < quizData.length - 1) { | |
| setCurrentQuestionIndex(currentQuestionIndex + 1); | |
| setSelectedAnswer(null); | |
| setSelectedAnswerColor(''); | |
| setMessage(''); | |
| setTimeLeft(60); | |
| setIsTimeRunning(true); | |
| } else { | |
| setQuizFinished(true); | |
| setIsTimeRunning(false); | |
| } | |
| } | |
| const handleResetQuiz = () => { | |
| setCurrentQuestionIndex(0); | |
| setSelectedAnswer(null); | |
| setScore(0); | |
| setTimeLeft(60); | |
| setIsTimeRunning(true); | |
| setQuizFinished(false); | |
| setSelectedAnswerColor(''); | |
| setMessage(''); | |
| }; | |
| return ( | |
| <div id="quiz-container"> | |
| <h1>Mixture of Experts (MoE) Quiz</h1> | |
| <div id="score-area">Score: {score} <br/> Question: {currentQuestionIndex + 1} / {totalQuestions}</div> | |
| <div id="timer-area">Time Left: {timeLeft} seconds</div> | |
| {!quizFinished ? ( | |
| <> | |
| <div id="question-area">{currentQuestion.question}</div> | |
| <div id="answer-options"> | |
| {currentQuestion.options.map((option) => ( | |
| <div | |
| key={option} | |
| className={`answer-option ${selectedAnswer === option ? selectedAnswerColor : ''} ${selectedAnswer ? 'disabled' : ''} ${selectedAnswerColor && option === currentQuestion.correctAnswer ? 'correct' : ''}`} | |
| onClick={() => handleAnswerSelection(option)} | |
| > | |
| {option} | |
| </div> | |
| ))} | |
| </div> | |
| <div id="result-message" className={selectedAnswerColor === 'correct' ? 'correct-message' : 'incorrect-message'}>{message}</div> | |
| <div style={{display: 'flex', justifyContent: 'center'}}> | |
| <button id="next-button" onClick={handleNextQuestion} disabled={!selectedAnswer}> | |
| Next Question | |
| </button> | |
| <button id="skip-button" onClick={handleSkipQuestion}> | |
| Skip Question | |
| </button> | |
| </div> | |
| </> | |
| ) : ( | |
| <> | |
| <div id="final-score-area">Your Final Score: {score}</div> | |
| <button id="reset-button"onClick={handleResetQuiz}> | |
| Play Again | |
| </button> | |
| </> | |
| )} | |
| </div> | |
| ); | |
| }; | |
| ReactDOM.render(<QuizApp />, document.getElementById("root")); | |
| </script> | |
| </body> | |
| </html> | |