Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import torch | |
| from transformers import AutoConfig, AutoTokenizer | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Transformer Visualizer", | |
| page_icon="π§ ", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS styling | |
| st.markdown(""" | |
| <style> | |
| .reportview-container { | |
| background: linear-gradient(45deg, #1a1a1a, #4a4a4a); | |
| } | |
| .sidebar .sidebar-content { | |
| background: #2c2c2c !important; | |
| } | |
| h1, h2, h3, h4, h5, h6 { | |
| color: #00ff00 !important; | |
| } | |
| .stMetric { | |
| background-color: #333333; | |
| border-radius: 10px; | |
| padding: 15px; | |
| } | |
| .architecture { | |
| font-family: monospace; | |
| color: #00ff00; | |
| white-space: pre-wrap; | |
| background-color: #1a1a1a; | |
| padding: 20px; | |
| border-radius: 10px; | |
| border: 1px solid #00ff00; | |
| } | |
| .token-table { | |
| margin-top: 20px; | |
| border: 1px solid #00ff00; | |
| border-radius: 5px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Model database | |
| MODELS = { | |
| "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, "params": 109.48}, | |
| "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, "params": 117}, | |
| "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, "params": 60}, | |
| "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 125}, | |
| "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, "heads": 12, "params": 66}, | |
| "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, "params": 11.8}, | |
| "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", "layers": 12, "heads": 12, "params": 13.5}, | |
| "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, "heads": 12, "params": 110}, | |
| "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, "params": 139}, | |
| "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 139} | |
| } | |
| def get_model_config(model_name): | |
| config = AutoConfig.from_pretrained(MODELS[model_name]["model_name"]) | |
| return config | |
| def plot_model_comparison(selected_model): | |
| model_names = list(MODELS.keys()) | |
| params = [m["params"] for m in MODELS.values()] | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| bars = ax.bar(model_names, params) | |
| index = list(MODELS.keys()).index(selected_model) | |
| bars[index].set_color('#00ff00') | |
| ax.set_ylabel('Parameters (Millions)', color='white') | |
| ax.set_title('Model Size Comparison', color='white') | |
| ax.tick_params(axis='x', rotation=45, colors='white') | |
| ax.tick_params(axis='y', colors='white') | |
| ax.set_facecolor('#2c2c2c') | |
| fig.patch.set_facecolor('#2c2c2c') | |
| st.pyplot(fig) | |
| def visualize_architecture(model_info): | |
| architecture = [] | |
| model_type = model_info["type"] | |
| layers = model_info["layers"] | |
| heads = model_info["heads"] | |
| architecture.append("Input") | |
| architecture.append("β") | |
| architecture.append("βΌ") | |
| if model_type == "Encoder": | |
| architecture.append("[Embedding Layer]") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Encoder Layer {i+1}", | |
| "ββ Multi-Head Attention", | |
| f"β ββ {heads} Heads", | |
| "ββ Layer Normalization", | |
| "ββ Feed Forward Network", | |
| "β", | |
| "βΌ" | |
| ]) | |
| architecture.append("[Output]") | |
| elif model_type == "Decoder": | |
| architecture.append("[Embedding Layer]") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Decoder Layer {i+1}", | |
| "ββ Masked Multi-Head Attention", | |
| f"β ββ {heads} Heads", | |
| "ββ Layer Normalization", | |
| "ββ Feed Forward Network", | |
| "β", | |
| "βΌ" | |
| ]) | |
| architecture.append("[Output]") | |
| elif model_type == "Seq2Seq": | |
| architecture.append("Encoder Stack") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Encoder Layer {i+1}", | |
| "ββ Self-Attention", | |
| "ββ Feed Forward Network", | |
| "β", | |
| "βΌ" | |
| ]) | |
| architecture.append("βββ [Context] βββ") | |
| architecture.append("Decoder Stack") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Decoder Layer {i+1}", | |
| "ββ Masked Self-Attention", | |
| "ββ Encoder-Decoder Attention", | |
| "ββ Feed Forward Network", | |
| "β", | |
| "βΌ" | |
| ]) | |
| architecture.append("[Output]") | |
| return "\n".join(architecture) | |
| def visualize_attention_patterns(): | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| data = torch.randn(5, 5) | |
| ax.imshow(data, cmap='viridis') | |
| ax.set_title('Attention Patterns Example', color='white') | |
| ax.set_facecolor('#2c2c2c') | |
| fig.patch.set_facecolor('#2c2c2c') | |
| st.pyplot(fig) | |
| def main(): | |
| st.title("π§ Transformer Model Visualizer") | |
| selected_model = st.sidebar.selectbox("Select Model", list(MODELS.keys())) | |
| model_info = MODELS[selected_model] | |
| config = get_model_config(selected_model) | |
| tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"]) | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Model Type", model_info["type"]) | |
| with col2: | |
| st.metric("Layers", model_info["layers"]) | |
| with col3: | |
| st.metric("Attention Heads", model_info["heads"]) | |
| with col4: | |
| st.metric("Parameters", f"{model_info['params']}M") | |
| tab1, tab2, tab3, tab4 = st.tabs(["Model Structure", "Comparison", "Model Attention", "Tokenization"]) | |
| with tab1: | |
| st.subheader("Architecture Diagram") | |
| architecture = visualize_architecture(model_info) | |
| st.markdown(f"<div class='architecture'>{architecture}</div>", unsafe_allow_html=True) | |
| st.markdown(""" | |
| **Legend:** | |
| - **Multi-Head Attention**: Self-attention mechanism with multiple parallel heads | |
| - **Layer Normalization**: Normalization operation between layers | |
| - **Feed Forward Network**: Position-wise fully connected network | |
| - **Masked Attention**: Attention with future token masking | |
| """) | |
| with tab2: | |
| st.subheader("Model Size Comparison") | |
| plot_model_comparison(selected_model) | |
| with tab3: | |
| st.subheader("Model-specific Visualizations") | |
| visualize_attention_patterns() | |
| if selected_model == "BERT": | |
| st.write("BERT-specific visualization example") | |
| elif selected_model == "GPT-2": | |
| st.write("GPT-2 attention mask visualization") | |
| with tab4: | |
| st.subheader("π Tokenization Visualization") | |
| input_text = st.text_input("Enter Text:", "Hello, how are you?") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Tokenized Output**") | |
| tokens = tokenizer.tokenize(input_text) | |
| st.write(tokens) | |
| with col2: | |
| st.markdown("**Token IDs**") | |
| encoded_ids = tokenizer.encode(input_text) | |
| st.write(encoded_ids) | |
| st.markdown("**Token-ID Mapping**") | |
| token_data = pd.DataFrame({ | |
| "Token": tokens, | |
| "ID": encoded_ids[1:-1] if tokenizer.cls_token else encoded_ids | |
| }) | |
| st.dataframe( | |
| token_data, | |
| height=150, | |
| use_container_width=True, | |
| column_config={ | |
| "Token": "Token", | |
| "ID": {"header": "ID", "help": "Numerical representation of the token"} | |
| } | |
| ) | |
| st.markdown(f""" | |
| **Tokenizer Info:** | |
| - Vocabulary size: `{tokenizer.vocab_size}` | |
| - Special tokens: `{tokenizer.all_special_tokens}` | |
| - Padding token: `{tokenizer.pad_token}` | |
| - Max length: `{tokenizer.model_max_length}` | |
| """) | |
| if __name__ == "__main__": | |
| main() |