| import streamlit as st |
| import pandas as pd |
| from datasets import load_dataset |
| from datetime import datetime |
| import json |
| import plotly.express as px |
| import plotly.graph_objects as go |
| from collections import defaultdict, Counter |
|
|
| |
| st.set_page_config( |
| page_title="πΊοΈ ModelAtlas Community Dashboard", |
| page_icon="πΊοΈ", |
| layout="wide", |
| initial_sidebar_state="collapsed" |
| ) |
|
|
| |
| @st.cache_data(ttl=300) |
| def load_community_data(): |
| try: |
| dataset = load_dataset("RadicalNotionAI/community-analyses", split="train") |
| df = dataset.to_pandas() |
| |
| if not isinstance(df, pd.DataFrame): |
| return pd.DataFrame() |
| return df |
| except Exception as e: |
| st.error(f"Error loading dataset: {e}") |
| return pd.DataFrame() |
|
|
| |
| st.title("πΊοΈ ModelAtlas Community Dashboard") |
| st.subheader("Collaborative Intelligence for AI Model Architecture Analysis") |
|
|
| |
| df = load_community_data() |
|
|
| |
| if not df.empty: |
| st.success(f"β
Live with {len(df)} Community Models!") |
| else: |
| st.info("β³ Waiting for First Contributions") |
|
|
| |
| tab1, tab2, tab3, tab4 = st.tabs(["π Overview", "ποΈ Models", "π¬ Technical Details", "π Access & Contributing"]) |
|
|
| with tab1: |
| st.header("π Community Overview") |
|
|
| if not df.empty: |
| |
| total_models = len(df) |
| organizations = df['organization'].nunique() if 'organization' in df.columns else 0 |
| model_types = len(df['model_type'].unique()) if 'model_type' in df.columns else 0 |
| latest_analysis = df['analyzed_at'].max() if 'analyzed_at' in df.columns else None |
|
|
| col1, col2, col3, col4 = st.columns(4) |
|
|
| with col1: |
| st.metric("πΊοΈ Models Analyzed", total_models) |
| with col2: |
| st.metric("π’ Organizations", organizations) |
| with col3: |
| st.metric("π§ Model Types", model_types) |
| with col4: |
| if latest_analysis: |
| latest_date = latest_analysis[:10] if isinstance(latest_analysis, str) else str(latest_analysis)[:10] |
| st.metric("π
Latest Analysis", latest_date) |
| else: |
| st.metric("π
Latest Analysis", "Unknown") |
|
|
| st.subheader("ποΈ Community Dataset") |
| st.write(f"**Location:** [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)") |
| st.write(f"**Status:** Live with {total_models} community contributions!") |
|
|
| |
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| if 'organization' in df.columns: |
| st.subheader("π’ Top Organizations") |
| org_counts = df['organization'].value_counts().head(5) |
| for org, count in org_counts.items(): |
| st.write(f"β’ **{org}**: {count} models") |
|
|
| with col2: |
| if 'model_type' in df.columns: |
| st.subheader("π§ Popular Model Types") |
| type_counts = df['model_type'].value_counts().head(5) |
| for model_type, count in type_counts.items(): |
| st.write(f"β’ **{model_type}**: {count} models") |
|
|
| |
| st.subheader("π Recent Contributions") |
| if 'analyzed_at' in df.columns: |
| |
| recent_df = df.sort_values('analyzed_at', ascending=False).head(5)[['model_id', 'organization', 'analyzed_at']] |
| else: |
| recent_df = df.head(5)[['model_id', 'organization']] |
|
|
| for _, row in recent_df.iterrows(): |
| analysis_date = f" ({row['analyzed_at'][:10]})" if 'analyzed_at' in row else "" |
| st.write(f"β’ `{row['model_id']}` - {row['organization']}{analysis_date}") |
|
|
| else: |
| st.write("**Status:** The ModelAtlas community platform is live and ready for contributions!") |
|
|
| col1, col2, col3 = st.columns(3) |
|
|
| with col1: |
| st.subheader("ποΈ Community Dataset") |
| st.write("Central repository for model architecture analyses") |
| st.write("**Location:** [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)") |
| st.write("**Status:** Live and accepting contributions") |
|
|
| with col2: |
| st.subheader("π Features Available") |
| st.write("Ready for your contributions:") |
| st.write("β’ Community model browser") |
| st.write("β’ Innovation timeline analytics") |
| st.write("β’ Cross-organizational insights") |
| st.write("β’ Technique adoption tracking") |
|
|
| with col3: |
| st.subheader("π Getting Started") |
| st.write("Ready to contribute? Follow these steps:") |
| st.write("1. Install ModelAtlas CLI") |
| st.write("2. Setup: `python atlas.py contribute --setup`") |
| st.write("3. Analyze: `python model_test.py model/name`") |
| st.write("4. Submit: `python atlas.py contribute --submit`") |
|
|
| with tab2: |
| st.header("ποΈ Community Model Browser") |
|
|
| if not df.empty: |
| st.success(f"**Community Models:** {len(df)} models available from the community!") |
|
|
| |
| col1, col2 = st.columns(2) |
|
|
| selected_org = 'All' |
| selected_type = 'All' |
|
|
| with col1: |
| if 'organization' in df.columns: |
| orgs = ['All'] + sorted(df['organization'].unique().tolist()) |
| selected_org = st.selectbox("Filter by Organization", orgs) |
|
|
| with col2: |
| if 'model_type' in df.columns: |
| types = ['All'] + sorted(df['model_type'].unique().tolist()) |
| selected_type = st.selectbox("Filter by Model Type", types) |
|
|
| |
| filtered_df = df.copy() |
| if 'organization' in df.columns and selected_org != 'All': |
| filtered_df = filtered_df[filtered_df['organization'] == selected_org] |
| if 'model_type' in df.columns and selected_type != 'All': |
| filtered_df = filtered_df[filtered_df['model_type'] == selected_type] |
|
|
| st.write(f"**Showing {len(filtered_df)} of {len(df)} models**") |
|
|
| |
| display_columns = ['model_id', 'organization', 'model_type', 'analyzed_at'] |
| available_columns = [col for col in display_columns if col in filtered_df.columns] |
|
|
| if available_columns: |
| |
| sort_column = 'analyzed_at' if 'analyzed_at' in filtered_df.columns else 'model_id' |
| display_df = filtered_df[available_columns].sort_values( |
| sort_column, ascending=False |
| ).head(50) |
|
|
| st.dataframe(display_df, use_container_width=True) |
| else: |
| st.warning("Model data structure is not as expected. Please check dataset format.") |
|
|
| else: |
| st.info("**Current Status:** Waiting for first contributions to populate the dataset.") |
|
|
| st.subheader("What You'll See Here") |
| st.write("β’ β
Community-contributed model analyses") |
| st.write("β’ β
Architectural comparisons and insights") |
| st.write("β’ β
Technique evolution tracking") |
| st.write("β’ β
Cross-organizational innovation patterns") |
|
|
| st.subheader("Example Models to Analyze") |
|
|
| examples = [ |
| ("Qwen/Qwen3-8B", "Advanced architecture with RoPE scaling"), |
| ("deepseek-ai/DeepSeek-V3", "Large-scale MoE architecture"), |
| ("THUDM/glm-4-9b", "GLM architecture innovations"), |
| ("meta-llama/Llama-3.1-8B", "Llama 3.1 improvements") |
| ] |
|
|
| for model, description in examples: |
| st.code(model) |
| st.write(f"*{description}*") |
|
|
| st.write("**Start contributing to see your analyses here!**") |
|
|
| |
| def parse_json_field(field_value): |
| """Safely parse JSON field from dataset.""" |
| if isinstance(field_value, str): |
| try: |
| return json.loads(field_value) |
| except: |
| return {} |
| return field_value if field_value else {} |
|
|
| def extract_architecture_metrics(df): |
| """Extract architecture metrics from the dataset.""" |
| metrics = [] |
| for _, row in df.iterrows(): |
| config = parse_json_field(row.get('config', '{}')) |
| techniques = parse_json_field(row.get('techniques', '{}')) |
|
|
| metric = { |
| 'model_id': row['model_id'], |
| 'organization': row.get('organization', 'Unknown'), |
| 'model_type': row.get('model_type', 'Unknown'), |
| 'hidden_size': config.get('hidden_size', 0) or 0, |
| 'num_layers': config.get('num_hidden_layers', config.get('num_layers', 0)) or 0, |
| 'max_position': config.get('max_position_embeddings', 0) or 0, |
| 'vocab_size': config.get('vocab_size', 0) or 0, |
| 'intermediate_size': config.get('intermediate_size', 0) or 0, |
| 'rope_type': techniques.get('rope_type') or techniques.get('positional_encoding') or 'Unknown', |
| 'attention_type': techniques.get('attention_implementation', 'Unknown') or 'Unknown', |
| 'sliding_window': techniques.get('sliding_window_size', 0) or 0 |
| } |
| metrics.append(metric) |
|
|
| return pd.DataFrame(metrics) |
|
|
| with tab3: |
| st.header("π¬ Technical Architecture Analysis") |
|
|
| if not df.empty: |
| |
| arch_df = extract_architecture_metrics(df) |
|
|
| |
| valid_arch_df = arch_df[(arch_df['hidden_size'] > 0) & (arch_df['num_layers'] > 0)] |
|
|
| if not valid_arch_df.empty: |
| st.subheader("ποΈ Architecture Parameter Distribution") |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| |
| fig_size = px.scatter( |
| valid_arch_df, |
| x='hidden_size', |
| y='num_layers', |
| color='organization', |
| size='max_position', |
| hover_data=['model_id', 'vocab_size'], |
| title="Model Architecture: Hidden Size vs Layers", |
| labels={'hidden_size': 'Hidden Size', 'num_layers': 'Number of Layers'} |
| ) |
| fig_size.update_layout(height=400) |
| st.plotly_chart(fig_size, use_container_width=True) |
|
|
| with col2: |
| |
| context_data = valid_arch_df[valid_arch_df['max_position'] > 0] |
| if not context_data.empty: |
| fig_context = px.histogram( |
| context_data, |
| x='max_position', |
| color='organization', |
| title="Context Length Distribution", |
| labels={'max_position': 'Max Position Embeddings', 'count': 'Number of Models'} |
| ) |
| fig_context.update_layout(height=400) |
| st.plotly_chart(fig_context, use_container_width=True) |
|
|
| st.subheader("β‘ Technique Adoption Analysis") |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| |
| rope_counts = valid_arch_df['rope_type'].value_counts() |
| if len(rope_counts) > 1: |
| fig_rope = px.pie( |
| values=rope_counts.values, |
| names=rope_counts.index, |
| title="Positional Encoding Types" |
| ) |
| fig_rope.update_layout(height=300) |
| st.plotly_chart(fig_rope, use_container_width=True) |
|
|
| with col2: |
| |
| attention_counts = valid_arch_df[valid_arch_df['attention_type'] != 'Unknown']['attention_type'].value_counts() |
| if len(attention_counts) > 0: |
| fig_attention = px.bar( |
| x=attention_counts.index, |
| y=attention_counts.values, |
| title="Attention Implementation Types", |
| labels={'x': 'Attention Type', 'y': 'Model Count'} |
| ) |
| fig_attention.update_layout(height=300) |
| st.plotly_chart(fig_attention, use_container_width=True) |
|
|
| st.subheader("π Organization Innovation Patterns") |
|
|
| |
| org_techniques = [] |
| for _, row in df.iterrows(): |
| techniques = parse_json_field(row.get('techniques', '{}')) |
| org = row.get('organization', 'Unknown') |
|
|
| |
| rope_type = techniques.get('rope_type') or techniques.get('positional_encoding') or 'standard' |
| sliding_window_size = techniques.get('sliding_window_size', 0) |
| has_sliding_window = sliding_window_size is not None and sliding_window_size > 0 |
| attention_impl = techniques.get('attention_implementation') or 'standard' |
|
|
| |
| rope_type_str = str(rope_type).lower() if rope_type else 'standard' |
| attention_impl_str = str(attention_impl).lower() if attention_impl else 'standard' |
|
|
| org_techniques.append({ |
| 'Organization': org, |
| 'RoPE_Advanced': 'yes' if 'yarn' in rope_type_str or 'scaled' in rope_type_str else 'no', |
| 'Sliding_Window': 'yes' if has_sliding_window else 'no', |
| 'Flash_Attention': 'yes' if 'flash' in attention_impl_str else 'no' |
| }) |
|
|
| org_tech_df = pd.DataFrame(org_techniques) |
|
|
| |
| if not org_tech_df.empty: |
| heatmap_data = org_tech_df.groupby('Organization').agg({ |
| 'RoPE_Advanced': lambda x: (x == 'yes').sum(), |
| 'Sliding_Window': lambda x: (x == 'yes').sum(), |
| 'Flash_Attention': lambda x: (x == 'yes').sum() |
| }).reset_index() |
|
|
| if len(heatmap_data) > 1: |
| fig_heatmap = px.imshow( |
| heatmap_data.set_index('Organization').T, |
| title="Advanced Technique Adoption by Organization", |
| labels={'x': 'Organization', 'y': 'Technique', 'color': 'Models Using Technique'}, |
| aspect='auto' |
| ) |
| fig_heatmap.update_layout(height=300) |
| st.plotly_chart(fig_heatmap, use_container_width=True) |
|
|
| st.subheader("π Model Architecture Comparison") |
|
|
| |
| model_options = valid_arch_df['model_id'].tolist() |
| if len(model_options) >= 2: |
| selected_models = st.multiselect( |
| "Select models to compare (max 4):", |
| model_options, |
| default=model_options[:2], |
| max_selections=4 |
| ) |
|
|
| if selected_models: |
| comparison_df = valid_arch_df[valid_arch_df['model_id'].isin(selected_models)] |
|
|
| |
| comparison_cols = ['model_id', 'organization', 'hidden_size', 'num_layers', |
| 'max_position', 'vocab_size', 'rope_type', 'attention_type'] |
| display_comparison = comparison_df[comparison_cols] |
| st.dataframe(display_comparison, use_container_width=True) |
|
|
| |
| if len(comparison_df) > 1: |
| |
| comparison_df['est_params_b'] = ( |
| comparison_df['hidden_size'] * comparison_df['num_layers'] * |
| comparison_df['vocab_size'] / 1e9 |
| ).round(2) |
|
|
| fig_efficiency = px.bar( |
| comparison_df, |
| x='model_id', |
| y='est_params_b', |
| title="Estimated Model Size Comparison (Billions of Parameters)", |
| labels={'est_params_b': 'Estimated Parameters (B)'} |
| ) |
| fig_efficiency.update_layout(height=300) |
| st.plotly_chart(fig_efficiency, use_container_width=True) |
|
|
| else: |
| st.warning("Insufficient architecture data for analysis. Models need valid config information.") |
|
|
| else: |
| st.info("**Technical analysis will appear when community data is available!**") |
|
|
| st.markdown(""" |
| ### π¬ What You'll See Here: |
| |
| **ποΈ Architecture Analysis** |
| - Parameter distribution patterns across organizations |
| - Model scaling relationships (size vs capabilities) |
| - Context length and vocabulary trends |
| |
| **β‘ Innovation Tracking** |
| - Technique adoption timelines (RoPE, Flash Attention, etc.) |
| - Cross-organizational innovation patterns |
| - Emerging architecture components |
| |
| **𧬠Model Lineage** |
| - Base model relationships and fine-tuning chains |
| - Architecture family evolution |
| - Research paper connections |
| |
| **βοΈ Comparative Analysis** |
| - Side-by-side technical specifications |
| - Parameter efficiency patterns |
| - Architecture similarity clustering |
| """) |
|
|
| with tab4: |
| st.header("π Access Control & Contributing") |
| st.write("ModelAtlas implements **responsible tiered access** for ablation research:") |
|
|
| |
| with st.expander("π PUBLIC Access", expanded=True): |
| st.write("β’ β
View model architectures and configurations") |
| st.write("β’ β
Compare techniques across models") |
| st.write("β’ β
Analyze innovation timelines") |
| st.write("β’ β No ablation/intervention access") |
|
|
| |
| with st.expander("π CONTRIBUTOR Access"): |
| st.info("**Requirements:** 3+ contributions, 0.8+ quality score, 7+ days active") |
| st.write("β’ β
All public features") |
| st.write("β’ β
Basic intervention mapping") |
| st.write("β’ β
Ablation compatibility analysis") |
| st.write("β’ β
Cross-model intervention insights") |
|
|
| |
| with st.expander("π₯ HERETIC Access"): |
| st.error("**Requirements:** 10+ contributions, 0.9+ quality score, manual approval + community vouching") |
| st.write("β’ β
All contributor features") |
| st.write("β’ β
Advanced ablation strategies") |
| st.write("β’ β
Cross-model transfer analysis") |
| st.write("β’ β
Strategic research methodologies") |
| st.write("β’ β
Heretic community research notes") |
|
|
| st.subheader("π CLI Commands") |
|
|
| commands = """ |
| # Setup community access |
| python atlas.py contribute --setup |
| |
| # Check your access level |
| python atlas.py contribute --status |
| |
| # Submit analyses |
| python atlas.py contribute --submit |
| |
| # Request access upgrades |
| python atlas.py contribute --request-access contributor |
| python atlas.py contribute --request-access heretic |
| |
| # Test access control (requires contributor+) |
| python atlas.py interventions Qwen/Qwen3-8B |
| """ |
|
|
| st.code(commands, language="bash") |
|
|
| st.subheader("π‘οΈ Why Access Control?") |
| st.write("β’ **Protects Innovation:** Sensitive ablation research within trusted community") |
| st.write("β’ **Rewards Quality:** Contributors earn access through meaningful work") |
| st.write("β’ **Builds Trust:** Community vouching creates research networks") |
| st.write("β’ **Enables Progress:** Heretic community advances boundaries responsibly") |
|
|
| |
| st.markdown("---") |
| st.markdown(""" |
| **Community Links:** |
| [π Dataset](https://huggingface.co/datasets/RadicalNotionAI/modelatlas-community) | |
| [π Dashboard](https://huggingface.co/spaces/RadicalNotionAI/modelatlas-dashboard) | |
| [π» CLI Tool](https://github.com/your-org/ModelAtlas) |
| |
| *Built with ModelAtlas - Architectural Intelligence for AI Research* |
| """) |