import streamlit as st import pandas as pd from datasets import load_dataset from datetime import datetime import json import plotly.express as px import plotly.graph_objects as go from collections import defaultdict, Counter # Set page config st.set_page_config( page_title="πŸ—ΊοΈ ModelAtlas Community Dashboard", page_icon="πŸ—ΊοΈ", layout="wide", initial_sidebar_state="collapsed" ) # Load community data @st.cache_data(ttl=300) # Cache for 5 minutes def load_community_data(): try: dataset = load_dataset("RadicalNotionAI/community-analyses", split="train") df = dataset.to_pandas() # Ensure we have a proper DataFrame if not isinstance(df, pd.DataFrame): return pd.DataFrame() return df except Exception as e: st.error(f"Error loading dataset: {e}") return pd.DataFrame() # Main header st.title("πŸ—ΊοΈ ModelAtlas Community Dashboard") st.subheader("Collaborative Intelligence for AI Model Architecture Analysis") # Load data df = load_community_data() # Status badge based on actual data if not df.empty: st.success(f"βœ… Live with {len(df)} Community Models!") else: st.info("⏳ Waiting for First Contributions") # Create tabs tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Overview", "πŸ—‚οΈ Models", "πŸ”¬ Technical Details", "πŸ” Access & Contributing"]) with tab1: st.header("πŸ“Š Community Overview") if not df.empty: # Real statistics from the community data total_models = len(df) organizations = df['organization'].nunique() if 'organization' in df.columns else 0 model_types = len(df['model_type'].unique()) if 'model_type' in df.columns else 0 latest_analysis = df['analyzed_at'].max() if 'analyzed_at' in df.columns else None col1, col2, col3, col4 = st.columns(4) with col1: st.metric("πŸ—ΊοΈ Models Analyzed", total_models) with col2: st.metric("🏒 Organizations", organizations) with col3: st.metric("πŸ”§ Model Types", model_types) with col4: if latest_analysis: latest_date = latest_analysis[:10] if isinstance(latest_analysis, str) else str(latest_analysis)[:10] st.metric("πŸ“… Latest Analysis", latest_date) else: st.metric("πŸ“… Latest Analysis", "Unknown") st.subheader("πŸ—„οΈ Community Dataset") st.write(f"**Location:** [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)") st.write(f"**Status:** Live with {total_models} community contributions!") # Top organizations and model types col1, col2 = st.columns(2) with col1: if 'organization' in df.columns: st.subheader("🏒 Top Organizations") org_counts = df['organization'].value_counts().head(5) for org, count in org_counts.items(): st.write(f"β€’ **{org}**: {count} models") with col2: if 'model_type' in df.columns: st.subheader("πŸ”§ Popular Model Types") type_counts = df['model_type'].value_counts().head(5) for model_type, count in type_counts.items(): st.write(f"β€’ **{model_type}**: {count} models") # Recent models st.subheader("πŸ†• Recent Contributions") if 'analyzed_at' in df.columns: # Sort by analyzed_at as string (works for ISO format dates) recent_df = df.sort_values('analyzed_at', ascending=False).head(5)[['model_id', 'organization', 'analyzed_at']] else: recent_df = df.head(5)[['model_id', 'organization']] for _, row in recent_df.iterrows(): analysis_date = f" ({row['analyzed_at'][:10]})" if 'analyzed_at' in row else "" st.write(f"β€’ `{row['model_id']}` - {row['organization']}{analysis_date}") else: st.write("**Status:** The ModelAtlas community platform is live and ready for contributions!") col1, col2, col3 = st.columns(3) with col1: st.subheader("πŸ—„οΈ Community Dataset") st.write("Central repository for model architecture analyses") st.write("**Location:** [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)") st.write("**Status:** Live and accepting contributions") with col2: st.subheader("πŸ“ˆ Features Available") st.write("Ready for your contributions:") st.write("β€’ Community model browser") st.write("β€’ Innovation timeline analytics") st.write("β€’ Cross-organizational insights") st.write("β€’ Technique adoption tracking") with col3: st.subheader("πŸš€ Getting Started") st.write("Ready to contribute? Follow these steps:") st.write("1. Install ModelAtlas CLI") st.write("2. Setup: `python atlas.py contribute --setup`") st.write("3. Analyze: `python model_test.py model/name`") st.write("4. Submit: `python atlas.py contribute --submit`") with tab2: st.header("πŸ—‚οΈ Community Model Browser") if not df.empty: st.success(f"**Community Models:** {len(df)} models available from the community!") # Filter options col1, col2 = st.columns(2) selected_org = 'All' selected_type = 'All' with col1: if 'organization' in df.columns: orgs = ['All'] + sorted(df['organization'].unique().tolist()) selected_org = st.selectbox("Filter by Organization", orgs) with col2: if 'model_type' in df.columns: types = ['All'] + sorted(df['model_type'].unique().tolist()) selected_type = st.selectbox("Filter by Model Type", types) # Apply filters filtered_df = df.copy() if 'organization' in df.columns and selected_org != 'All': filtered_df = filtered_df[filtered_df['organization'] == selected_org] if 'model_type' in df.columns and selected_type != 'All': filtered_df = filtered_df[filtered_df['model_type'] == selected_type] st.write(f"**Showing {len(filtered_df)} of {len(df)} models**") # Display models display_columns = ['model_id', 'organization', 'model_type', 'analyzed_at'] available_columns = [col for col in display_columns if col in filtered_df.columns] if available_columns: # Sort by date (string format works for ISO dates) or model_id sort_column = 'analyzed_at' if 'analyzed_at' in filtered_df.columns else 'model_id' display_df = filtered_df[available_columns].sort_values( sort_column, ascending=False ).head(50) # Limit to 50 most recent st.dataframe(display_df, use_container_width=True) else: st.warning("Model data structure is not as expected. Please check dataset format.") else: st.info("**Current Status:** Waiting for first contributions to populate the dataset.") st.subheader("What You'll See Here") st.write("β€’ βœ… Community-contributed model analyses") st.write("β€’ βœ… Architectural comparisons and insights") st.write("β€’ βœ… Technique evolution tracking") st.write("β€’ βœ… Cross-organizational innovation patterns") st.subheader("Example Models to Analyze") examples = [ ("Qwen/Qwen3-8B", "Advanced architecture with RoPE scaling"), ("deepseek-ai/DeepSeek-V3", "Large-scale MoE architecture"), ("THUDM/glm-4-9b", "GLM architecture innovations"), ("meta-llama/Llama-3.1-8B", "Llama 3.1 improvements") ] for model, description in examples: st.code(model) st.write(f"*{description}*") st.write("**Start contributing to see your analyses here!**") # Helper functions for technical analysis def parse_json_field(field_value): """Safely parse JSON field from dataset.""" if isinstance(field_value, str): try: return json.loads(field_value) except: return {} return field_value if field_value else {} def extract_architecture_metrics(df): """Extract architecture metrics from the dataset.""" metrics = [] for _, row in df.iterrows(): config = parse_json_field(row.get('config', '{}')) techniques = parse_json_field(row.get('techniques', '{}')) metric = { 'model_id': row['model_id'], 'organization': row.get('organization', 'Unknown'), 'model_type': row.get('model_type', 'Unknown'), 'hidden_size': config.get('hidden_size', 0) or 0, 'num_layers': config.get('num_hidden_layers', config.get('num_layers', 0)) or 0, 'max_position': config.get('max_position_embeddings', 0) or 0, 'vocab_size': config.get('vocab_size', 0) or 0, 'intermediate_size': config.get('intermediate_size', 0) or 0, 'rope_type': techniques.get('rope_type') or techniques.get('positional_encoding') or 'Unknown', 'attention_type': techniques.get('attention_implementation', 'Unknown') or 'Unknown', 'sliding_window': techniques.get('sliding_window_size', 0) or 0 } metrics.append(metric) return pd.DataFrame(metrics) with tab3: st.header("πŸ”¬ Technical Architecture Analysis") if not df.empty: # Extract architecture data arch_df = extract_architecture_metrics(df) # Filter out rows with missing critical data valid_arch_df = arch_df[(arch_df['hidden_size'] > 0) & (arch_df['num_layers'] > 0)] if not valid_arch_df.empty: st.subheader("πŸ—οΈ Architecture Parameter Distribution") col1, col2 = st.columns(2) with col1: # Model size scatter plot fig_size = px.scatter( valid_arch_df, x='hidden_size', y='num_layers', color='organization', size='max_position', hover_data=['model_id', 'vocab_size'], title="Model Architecture: Hidden Size vs Layers", labels={'hidden_size': 'Hidden Size', 'num_layers': 'Number of Layers'} ) fig_size.update_layout(height=400) st.plotly_chart(fig_size, use_container_width=True) with col2: # Context length distribution context_data = valid_arch_df[valid_arch_df['max_position'] > 0] if not context_data.empty: fig_context = px.histogram( context_data, x='max_position', color='organization', title="Context Length Distribution", labels={'max_position': 'Max Position Embeddings', 'count': 'Number of Models'} ) fig_context.update_layout(height=400) st.plotly_chart(fig_context, use_container_width=True) st.subheader("⚑ Technique Adoption Analysis") col1, col2 = st.columns(2) with col1: # RoPE type distribution rope_counts = valid_arch_df['rope_type'].value_counts() if len(rope_counts) > 1: fig_rope = px.pie( values=rope_counts.values, names=rope_counts.index, title="Positional Encoding Types" ) fig_rope.update_layout(height=300) st.plotly_chart(fig_rope, use_container_width=True) with col2: # Attention implementation attention_counts = valid_arch_df[valid_arch_df['attention_type'] != 'Unknown']['attention_type'].value_counts() if len(attention_counts) > 0: fig_attention = px.bar( x=attention_counts.index, y=attention_counts.values, title="Attention Implementation Types", labels={'x': 'Attention Type', 'y': 'Model Count'} ) fig_attention.update_layout(height=300) st.plotly_chart(fig_attention, use_container_width=True) st.subheader("πŸ“Š Organization Innovation Patterns") # Organization vs technique matrix org_techniques = [] for _, row in df.iterrows(): techniques = parse_json_field(row.get('techniques', '{}')) org = row.get('organization', 'Unknown') # Extract key techniques (with None safety) rope_type = techniques.get('rope_type') or techniques.get('positional_encoding') or 'standard' sliding_window_size = techniques.get('sliding_window_size', 0) has_sliding_window = sliding_window_size is not None and sliding_window_size > 0 attention_impl = techniques.get('attention_implementation') or 'standard' # Safe string operations rope_type_str = str(rope_type).lower() if rope_type else 'standard' attention_impl_str = str(attention_impl).lower() if attention_impl else 'standard' org_techniques.append({ 'Organization': org, 'RoPE_Advanced': 'yes' if 'yarn' in rope_type_str or 'scaled' in rope_type_str else 'no', 'Sliding_Window': 'yes' if has_sliding_window else 'no', 'Flash_Attention': 'yes' if 'flash' in attention_impl_str else 'no' }) org_tech_df = pd.DataFrame(org_techniques) # Create technique adoption heatmap data if not org_tech_df.empty: heatmap_data = org_tech_df.groupby('Organization').agg({ 'RoPE_Advanced': lambda x: (x == 'yes').sum(), 'Sliding_Window': lambda x: (x == 'yes').sum(), 'Flash_Attention': lambda x: (x == 'yes').sum() }).reset_index() if len(heatmap_data) > 1: fig_heatmap = px.imshow( heatmap_data.set_index('Organization').T, title="Advanced Technique Adoption by Organization", labels={'x': 'Organization', 'y': 'Technique', 'color': 'Models Using Technique'}, aspect='auto' ) fig_heatmap.update_layout(height=300) st.plotly_chart(fig_heatmap, use_container_width=True) st.subheader("πŸ” Model Architecture Comparison") # Model selection for comparison model_options = valid_arch_df['model_id'].tolist() if len(model_options) >= 2: selected_models = st.multiselect( "Select models to compare (max 4):", model_options, default=model_options[:2], max_selections=4 ) if selected_models: comparison_df = valid_arch_df[valid_arch_df['model_id'].isin(selected_models)] # Create comparison table comparison_cols = ['model_id', 'organization', 'hidden_size', 'num_layers', 'max_position', 'vocab_size', 'rope_type', 'attention_type'] display_comparison = comparison_df[comparison_cols] st.dataframe(display_comparison, use_container_width=True) # Parameter efficiency chart if len(comparison_df) > 1: # Calculate rough parameter estimate comparison_df['est_params_b'] = ( comparison_df['hidden_size'] * comparison_df['num_layers'] * comparison_df['vocab_size'] / 1e9 ).round(2) fig_efficiency = px.bar( comparison_df, x='model_id', y='est_params_b', title="Estimated Model Size Comparison (Billions of Parameters)", labels={'est_params_b': 'Estimated Parameters (B)'} ) fig_efficiency.update_layout(height=300) st.plotly_chart(fig_efficiency, use_container_width=True) else: st.warning("Insufficient architecture data for analysis. Models need valid config information.") else: st.info("**Technical analysis will appear when community data is available!**") st.markdown(""" ### πŸ”¬ What You'll See Here: **πŸ—οΈ Architecture Analysis** - Parameter distribution patterns across organizations - Model scaling relationships (size vs capabilities) - Context length and vocabulary trends **⚑ Innovation Tracking** - Technique adoption timelines (RoPE, Flash Attention, etc.) - Cross-organizational innovation patterns - Emerging architecture components **🧬 Model Lineage** - Base model relationships and fine-tuning chains - Architecture family evolution - Research paper connections **βš–οΈ Comparative Analysis** - Side-by-side technical specifications - Parameter efficiency patterns - Architecture similarity clustering """) with tab4: st.header("πŸ” Access Control & Contributing") st.write("ModelAtlas implements **responsible tiered access** for ablation research:") # Public Access with st.expander("🌍 PUBLIC Access", expanded=True): st.write("β€’ βœ… View model architectures and configurations") st.write("β€’ βœ… Compare techniques across models") st.write("β€’ βœ… Analyze innovation timelines") st.write("β€’ ❌ No ablation/intervention access") # Contributor Access with st.expander("πŸ“Š CONTRIBUTOR Access"): st.info("**Requirements:** 3+ contributions, 0.8+ quality score, 7+ days active") st.write("β€’ βœ… All public features") st.write("β€’ βœ… Basic intervention mapping") st.write("β€’ βœ… Ablation compatibility analysis") st.write("β€’ βœ… Cross-model intervention insights") # Heretic Access with st.expander("πŸ”₯ HERETIC Access"): st.error("**Requirements:** 10+ contributions, 0.9+ quality score, manual approval + community vouching") st.write("β€’ βœ… All contributor features") st.write("β€’ βœ… Advanced ablation strategies") st.write("β€’ βœ… Cross-model transfer analysis") st.write("β€’ βœ… Strategic research methodologies") st.write("β€’ βœ… Heretic community research notes") st.subheader("πŸš€ CLI Commands") commands = """ # Setup community access python atlas.py contribute --setup # Check your access level python atlas.py contribute --status # Submit analyses python atlas.py contribute --submit # Request access upgrades python atlas.py contribute --request-access contributor python atlas.py contribute --request-access heretic # Test access control (requires contributor+) python atlas.py interventions Qwen/Qwen3-8B """ st.code(commands, language="bash") st.subheader("πŸ›‘οΈ Why Access Control?") st.write("β€’ **Protects Innovation:** Sensitive ablation research within trusted community") st.write("β€’ **Rewards Quality:** Contributors earn access through meaningful work") st.write("β€’ **Builds Trust:** Community vouching creates research networks") st.write("β€’ **Enables Progress:** Heretic community advances boundaries responsibly") # Footer st.markdown("---") st.markdown(""" **Community Links:** [πŸ“Š Dataset](https://huggingface.co/datasets/RadicalNotionAI/modelatlas-community) | [πŸš€ Dashboard](https://huggingface.co/spaces/RadicalNotionAI/modelatlas-dashboard) | [πŸ’» CLI Tool](https://github.com/your-org/ModelAtlas) *Built with ModelAtlas - Architectural Intelligence for AI Research* """)