Spaces:
Sleeping
Sleeping
| """ | |
| Glossary component providing educational definitions for key terms, | |
| grouped by related 3Blue1Brown video explanations. | |
| """ | |
| from dash import html | |
| # --- Glossary data: terms grouped by explanatory video --- | |
| VIDEO_GROUPS = [ | |
| { | |
| "title": "The Transformer Pipeline", | |
| "video_id": "wjZofJX0v4M", | |
| "terms": [ | |
| ( | |
| "Text Splitting (Tokenization)", | |
| "Breaking text into pieces", | |
| "Models don't read words like we do. They break text into small " | |
| "chunks called 'tokens'. A token can be a whole word (like " | |
| "'apple'), part of a word (like 'ing' in 'playing'), or even a space." | |
| ), | |
| ( | |
| "Meaning Encoding (Embedding)", | |
| "Converting words to numbers", | |
| "Once text is split into pieces, each piece is converted into a " | |
| "list of numbers. This list represents the meaning of the piece. " | |
| "Words with similar meanings (like 'dog' and 'puppy') get similar " | |
| "numbers." | |
| ), | |
| ( | |
| "Confidence Scores (Logits)", | |
| "Prediction Scores", | |
| "The raw scores the model assigns to every possible next word. " | |
| "Higher scores mean the model thinks that word is more likely to " | |
| "come next." | |
| ), | |
| ( | |
| "Softmax", | |
| "Turning Scores into Probabilities", | |
| "The function that converts raw prediction scores (logits) into " | |
| "probabilities — positive numbers that add up to 1.0. This lets " | |
| "us interpret the model's output as 'how likely is each next word?'" | |
| ), | |
| ( | |
| "Residual Stream", | |
| "The Information Highway", | |
| "Think of this as a conveyor belt carrying the model's current " | |
| "understanding of the sentence. As it passes through each layer, " | |
| "the layer adds new information to it (via addition), refining " | |
| "the prediction step-by-step." | |
| ), | |
| ], | |
| }, | |
| { | |
| "title": "The Attention Mechanism", | |
| "video_id": "eMlx5fFNoYc", | |
| "terms": [ | |
| ( | |
| "Attention", | |
| "Context Lookup", | |
| "This is how the model understands context. When processing a " | |
| "word (like 'it'), the model 'pays attention' to other words in " | |
| "the sentence (like 'the cat') to figure out what 'it' refers " | |
| "to. It's like a spotlight shining on relevant past information." | |
| ), | |
| ( | |
| "Attention Detectors (Heads)", | |
| "Parallel Context Searchers", | |
| "Instead of having just one attention mechanism, models use " | |
| "multiple 'detectors' (called 'heads') in parallel. Each " | |
| "detector can learn to look for different types of relationships " | |
| "(e.g., one might look for adjectives, while another tracks " | |
| "pronouns)." | |
| ), | |
| ], | |
| }, | |
| { | |
| "title": "The Big Picture", | |
| "video_id": "LPZh9BOjkQs", | |
| "terms": [ | |
| ( | |
| "Probability Distribution", | |
| "The Full Prediction Spread", | |
| "The complete set of probabilities over all possible next words. " | |
| "Instead of one answer, the model gives a probability for every " | |
| "word in its vocabulary. The dashboard shows the top 5 most " | |
| "likely predictions." | |
| ), | |
| ( | |
| "Knowledge Retrieval (MLP / Feed-Forward Network)", | |
| "The Model's Memory Banks", | |
| "The component in each layer that processes tokens individually. " | |
| "It acts like a memory lookup — retrieving stored factual " | |
| "knowledge about the world. It expands the information, applies " | |
| "learned patterns, then compresses it back down." | |
| ), | |
| ( | |
| "Parameters / Weights", | |
| "The Model's Learned Numbers", | |
| "The learnable numbers inside the model. These are adjusted " | |
| "during training to improve predictions. GPT-2 has about 124 " | |
| "million of them. Every decision the model makes flows through " | |
| "these numbers." | |
| ), | |
| ( | |
| "Training", | |
| "Learning from Examples", | |
| "The process of showing the model billions of text examples and " | |
| "adjusting its parameters to get better at predicting what comes " | |
| "next. The model doesn't memorize text — it learns patterns and " | |
| "relationships between words." | |
| ), | |
| ], | |
| }, | |
| { | |
| "title": "How Models Store Knowledge", | |
| "video_id": "9-Jl0dxWQs8", | |
| "terms": [ | |
| ( | |
| "Hidden Dimension", | |
| "The Width of the Conveyor Belt", | |
| "The size of the number list representing each token internally. " | |
| "For GPT-2, each token is represented by 768 numbers at each " | |
| "layer. A wider conveyor belt means the model can carry more " | |
| "nuanced information about each word." | |
| ), | |
| ( | |
| "Neurons, Weights, and Biases", | |
| "The Building Blocks", | |
| "Neurons are the individual units that activate (or don't) based " | |
| "on their input. Weights control how strongly inputs influence " | |
| "each neuron. Biases set the threshold for when a neuron 'fires'. " | |
| "Together, they form the basic machinery of every layer." | |
| ), | |
| ], | |
| }, | |
| { | |
| "title": "How Models Learn", | |
| "video_id": "IHZwWFHWa-w", | |
| "terms": [ | |
| ( | |
| "Gradient", | |
| "Measuring Influence", | |
| "A measure of how much each part of the model contributed to its " | |
| "prediction. In the dashboard's 'Word Influence' tool, gradients " | |
| "reveal which input words had the biggest effect on the model's " | |
| "output — like tracing cause and effect." | |
| ), | |
| ( | |
| "Loss", | |
| "The Model's Error Score", | |
| "A number measuring how wrong the model's predictions are. " | |
| "During training, the goal is to make this number as small as " | |
| "possible. Lower loss means the model is making better " | |
| "predictions." | |
| ), | |
| ], | |
| }, | |
| { | |
| "title": "Dashboard Tools", | |
| "video_id": None, | |
| "terms": [ | |
| ( | |
| "Layer", | |
| "One Processing Step", | |
| "One complete processing step in the transformer, containing " | |
| "both an attention mechanism and a knowledge retrieval (MLP) " | |
| "component. GPT-2 has 12 layers stacked on top of each other, " | |
| "each refining the model's understanding." | |
| ), | |
| ( | |
| "Beam Search", | |
| "Exploring Multiple Paths", | |
| "Instead of just picking the single best next word, Beam Search " | |
| "explores several likely future paths simultaneously (like " | |
| "parallel universes) and picks the one that makes the most sense " | |
| "overall. The 'Options to Generate' setting controls how many " | |
| "paths are explored at once." | |
| ), | |
| ( | |
| "Test by Removing (Ablation)", | |
| "Digital Brain Surgery", | |
| "A technique used to understand which parts of a model are " | |
| "responsible for certain behaviors. By artificially 'turning off' " | |
| "specific attention detectors, we can measure how much the " | |
| "model's output changes, revealing the importance of those " | |
| "components." | |
| ), | |
| ( | |
| "Word Influence (Token Attribution)", | |
| "Tracing What Mattered", | |
| "A way to measure how much each input word affected the model's " | |
| "final prediction. Uses gradients to trace the flow of influence " | |
| "backwards through the model, highlighting which words pushed " | |
| "the prediction in a particular direction." | |
| ), | |
| ( | |
| "Vocabulary", | |
| "The Model's Dictionary", | |
| "The complete set of tokens the model knows. GPT-2 has a " | |
| "vocabulary of about 50,257 tokens. The model can only predict " | |
| "words that are in its vocabulary — it picks from this fixed set " | |
| "every time." | |
| ), | |
| ( | |
| "Inference / Forward Pass", | |
| "Running the Model", | |
| "Using the trained model to make predictions on new text. This " | |
| "is what happens when you click 'Analyze' in the dashboard — no " | |
| "learning occurs, the model just processes your input through " | |
| "all its layers to produce a prediction." | |
| ), | |
| ( | |
| "Temperature", | |
| "Controlling Randomness", | |
| "A setting that controls how spread out the model's predictions " | |
| "are. Low temperature makes the model more confident and " | |
| "predictable (it strongly favors its top pick). High temperature " | |
| "makes predictions more spread out and creative." | |
| ), | |
| ], | |
| }, | |
| ] | |
| def create_glossary_modal(): | |
| """ | |
| Create the hidden glossary modal that appears when the Help button is clicked. | |
| """ | |
| return html.Div([ | |
| html.Div(id='glossary-overlay-bg', className='glossary-overlay'), | |
| html.Div([ | |
| html.Div([ | |
| html.H2("Transformer Concept Glossary"), | |
| html.Button('×', id='close-glossary-btn', className='close-button', | |
| style={'background': 'none', 'border': 'none', 'fontSize': '28px', 'cursor': 'pointer', 'color': '#a0aec0'}) | |
| ], className='glossary-header'), | |
| html.Div([ | |
| _create_video_group(group) for group in VIDEO_GROUPS | |
| ], className="glossary-content-area"), | |
| ], id='glossary-drawer-content', className="glossary-drawer") | |
| ], id='glossary-container') | |
| def _create_video_group(group): | |
| """Render a section with an optional video followed by its related terms.""" | |
| children = [ | |
| html.H3(group["title"], style={ | |
| 'color': '#2d3748', | |
| 'fontSize': '16px', | |
| 'fontWeight': '600', | |
| 'marginBottom': '15px', | |
| 'paddingBottom': '8px', | |
| 'borderBottom': '2px solid #e2e8f0', | |
| }) | |
| ] | |
| if group["video_id"]: | |
| children.append(html.Iframe( | |
| src=f"https://www.youtube.com/embed/{group['video_id']}?rel=0", | |
| style={ | |
| 'width': '100%', | |
| 'height': '350px', | |
| 'border': 'none', | |
| 'borderRadius': '8px', | |
| 'marginBottom': '20px', | |
| }, | |
| allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; fullscreen" | |
| )) | |
| for term, analogy, definition in group["terms"]: | |
| children.append(_create_term_entry(term, analogy, definition)) | |
| return html.Div(children, style={'marginBottom': '35px'}) | |
| def _create_term_entry(term, analogy, definition): | |
| """Render a single glossary term with its friendly analogy and definition.""" | |
| return html.Div([ | |
| html.Div([ | |
| html.H4(term, style={'margin': '0', 'color': '#4a5568'}), | |
| html.Span(analogy, style={'fontSize': '12px', 'backgroundColor': '#ebf8ff', 'color': '#2b6cb0', 'padding': '2px 8px', 'borderRadius': '12px', 'marginLeft': '10px'}) | |
| ], style={'display': 'flex', 'alignItems': 'center', 'marginBottom': '10px'}), | |
| html.P(definition, style={'color': '#718096', 'fontSize': '14px', 'lineHeight': '1.5', 'marginTop': '0', 'marginBottom': '0'}) | |
| ], style={'marginBottom': '20px', 'paddingBottom': '15px', 'borderBottom': '1px solid #f7fafc'}) | |