trohrbaugh's picture
Upload app.py with huggingface_hub
5125fa4 verified
import streamlit as st
import pandas as pd
from datasets import load_dataset
from datetime import datetime
import json
import plotly.express as px
import plotly.graph_objects as go
from collections import defaultdict, Counter
# Set page config
st.set_page_config(
page_title="πŸ—ΊοΈ ModelAtlas Community Dashboard",
page_icon="πŸ—ΊοΈ",
layout="wide",
initial_sidebar_state="collapsed"
)
# Load community data
@st.cache_data(ttl=300) # Cache for 5 minutes
def load_community_data():
try:
dataset = load_dataset("RadicalNotionAI/community-analyses", split="train")
df = dataset.to_pandas()
# Ensure we have a proper DataFrame
if not isinstance(df, pd.DataFrame):
return pd.DataFrame()
return df
except Exception as e:
st.error(f"Error loading dataset: {e}")
return pd.DataFrame()
# Main header
st.title("πŸ—ΊοΈ ModelAtlas Community Dashboard")
st.subheader("Collaborative Intelligence for AI Model Architecture Analysis")
# Load data
df = load_community_data()
# Status badge based on actual data
if not df.empty:
st.success(f"βœ… Live with {len(df)} Community Models!")
else:
st.info("⏳ Waiting for First Contributions")
# Create tabs
tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Overview", "πŸ—‚οΈ Models", "πŸ”¬ Technical Details", "πŸ” Access & Contributing"])
with tab1:
st.header("πŸ“Š Community Overview")
if not df.empty:
# Real statistics from the community data
total_models = len(df)
organizations = df['organization'].nunique() if 'organization' in df.columns else 0
model_types = len(df['model_type'].unique()) if 'model_type' in df.columns else 0
latest_analysis = df['analyzed_at'].max() if 'analyzed_at' in df.columns else None
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("πŸ—ΊοΈ Models Analyzed", total_models)
with col2:
st.metric("🏒 Organizations", organizations)
with col3:
st.metric("πŸ”§ Model Types", model_types)
with col4:
if latest_analysis:
latest_date = latest_analysis[:10] if isinstance(latest_analysis, str) else str(latest_analysis)[:10]
st.metric("πŸ“… Latest Analysis", latest_date)
else:
st.metric("πŸ“… Latest Analysis", "Unknown")
st.subheader("πŸ—„οΈ Community Dataset")
st.write(f"**Location:** [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)")
st.write(f"**Status:** Live with {total_models} community contributions!")
# Top organizations and model types
col1, col2 = st.columns(2)
with col1:
if 'organization' in df.columns:
st.subheader("🏒 Top Organizations")
org_counts = df['organization'].value_counts().head(5)
for org, count in org_counts.items():
st.write(f"β€’ **{org}**: {count} models")
with col2:
if 'model_type' in df.columns:
st.subheader("πŸ”§ Popular Model Types")
type_counts = df['model_type'].value_counts().head(5)
for model_type, count in type_counts.items():
st.write(f"β€’ **{model_type}**: {count} models")
# Recent models
st.subheader("πŸ†• Recent Contributions")
if 'analyzed_at' in df.columns:
# Sort by analyzed_at as string (works for ISO format dates)
recent_df = df.sort_values('analyzed_at', ascending=False).head(5)[['model_id', 'organization', 'analyzed_at']]
else:
recent_df = df.head(5)[['model_id', 'organization']]
for _, row in recent_df.iterrows():
analysis_date = f" ({row['analyzed_at'][:10]})" if 'analyzed_at' in row else ""
st.write(f"β€’ `{row['model_id']}` - {row['organization']}{analysis_date}")
else:
st.write("**Status:** The ModelAtlas community platform is live and ready for contributions!")
col1, col2, col3 = st.columns(3)
with col1:
st.subheader("πŸ—„οΈ Community Dataset")
st.write("Central repository for model architecture analyses")
st.write("**Location:** [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)")
st.write("**Status:** Live and accepting contributions")
with col2:
st.subheader("πŸ“ˆ Features Available")
st.write("Ready for your contributions:")
st.write("β€’ Community model browser")
st.write("β€’ Innovation timeline analytics")
st.write("β€’ Cross-organizational insights")
st.write("β€’ Technique adoption tracking")
with col3:
st.subheader("πŸš€ Getting Started")
st.write("Ready to contribute? Follow these steps:")
st.write("1. Install ModelAtlas CLI")
st.write("2. Setup: `python atlas.py contribute --setup`")
st.write("3. Analyze: `python model_test.py model/name`")
st.write("4. Submit: `python atlas.py contribute --submit`")
with tab2:
st.header("πŸ—‚οΈ Community Model Browser")
if not df.empty:
st.success(f"**Community Models:** {len(df)} models available from the community!")
# Filter options
col1, col2 = st.columns(2)
selected_org = 'All'
selected_type = 'All'
with col1:
if 'organization' in df.columns:
orgs = ['All'] + sorted(df['organization'].unique().tolist())
selected_org = st.selectbox("Filter by Organization", orgs)
with col2:
if 'model_type' in df.columns:
types = ['All'] + sorted(df['model_type'].unique().tolist())
selected_type = st.selectbox("Filter by Model Type", types)
# Apply filters
filtered_df = df.copy()
if 'organization' in df.columns and selected_org != 'All':
filtered_df = filtered_df[filtered_df['organization'] == selected_org]
if 'model_type' in df.columns and selected_type != 'All':
filtered_df = filtered_df[filtered_df['model_type'] == selected_type]
st.write(f"**Showing {len(filtered_df)} of {len(df)} models**")
# Display models
display_columns = ['model_id', 'organization', 'model_type', 'analyzed_at']
available_columns = [col for col in display_columns if col in filtered_df.columns]
if available_columns:
# Sort by date (string format works for ISO dates) or model_id
sort_column = 'analyzed_at' if 'analyzed_at' in filtered_df.columns else 'model_id'
display_df = filtered_df[available_columns].sort_values(
sort_column, ascending=False
).head(50) # Limit to 50 most recent
st.dataframe(display_df, use_container_width=True)
else:
st.warning("Model data structure is not as expected. Please check dataset format.")
else:
st.info("**Current Status:** Waiting for first contributions to populate the dataset.")
st.subheader("What You'll See Here")
st.write("β€’ βœ… Community-contributed model analyses")
st.write("β€’ βœ… Architectural comparisons and insights")
st.write("β€’ βœ… Technique evolution tracking")
st.write("β€’ βœ… Cross-organizational innovation patterns")
st.subheader("Example Models to Analyze")
examples = [
("Qwen/Qwen3-8B", "Advanced architecture with RoPE scaling"),
("deepseek-ai/DeepSeek-V3", "Large-scale MoE architecture"),
("THUDM/glm-4-9b", "GLM architecture innovations"),
("meta-llama/Llama-3.1-8B", "Llama 3.1 improvements")
]
for model, description in examples:
st.code(model)
st.write(f"*{description}*")
st.write("**Start contributing to see your analyses here!**")
# Helper functions for technical analysis
def parse_json_field(field_value):
"""Safely parse JSON field from dataset."""
if isinstance(field_value, str):
try:
return json.loads(field_value)
except:
return {}
return field_value if field_value else {}
def extract_architecture_metrics(df):
"""Extract architecture metrics from the dataset."""
metrics = []
for _, row in df.iterrows():
config = parse_json_field(row.get('config', '{}'))
techniques = parse_json_field(row.get('techniques', '{}'))
metric = {
'model_id': row['model_id'],
'organization': row.get('organization', 'Unknown'),
'model_type': row.get('model_type', 'Unknown'),
'hidden_size': config.get('hidden_size', 0) or 0,
'num_layers': config.get('num_hidden_layers', config.get('num_layers', 0)) or 0,
'max_position': config.get('max_position_embeddings', 0) or 0,
'vocab_size': config.get('vocab_size', 0) or 0,
'intermediate_size': config.get('intermediate_size', 0) or 0,
'rope_type': techniques.get('rope_type') or techniques.get('positional_encoding') or 'Unknown',
'attention_type': techniques.get('attention_implementation', 'Unknown') or 'Unknown',
'sliding_window': techniques.get('sliding_window_size', 0) or 0
}
metrics.append(metric)
return pd.DataFrame(metrics)
with tab3:
st.header("πŸ”¬ Technical Architecture Analysis")
if not df.empty:
# Extract architecture data
arch_df = extract_architecture_metrics(df)
# Filter out rows with missing critical data
valid_arch_df = arch_df[(arch_df['hidden_size'] > 0) & (arch_df['num_layers'] > 0)]
if not valid_arch_df.empty:
st.subheader("πŸ—οΈ Architecture Parameter Distribution")
col1, col2 = st.columns(2)
with col1:
# Model size scatter plot
fig_size = px.scatter(
valid_arch_df,
x='hidden_size',
y='num_layers',
color='organization',
size='max_position',
hover_data=['model_id', 'vocab_size'],
title="Model Architecture: Hidden Size vs Layers",
labels={'hidden_size': 'Hidden Size', 'num_layers': 'Number of Layers'}
)
fig_size.update_layout(height=400)
st.plotly_chart(fig_size, use_container_width=True)
with col2:
# Context length distribution
context_data = valid_arch_df[valid_arch_df['max_position'] > 0]
if not context_data.empty:
fig_context = px.histogram(
context_data,
x='max_position',
color='organization',
title="Context Length Distribution",
labels={'max_position': 'Max Position Embeddings', 'count': 'Number of Models'}
)
fig_context.update_layout(height=400)
st.plotly_chart(fig_context, use_container_width=True)
st.subheader("⚑ Technique Adoption Analysis")
col1, col2 = st.columns(2)
with col1:
# RoPE type distribution
rope_counts = valid_arch_df['rope_type'].value_counts()
if len(rope_counts) > 1:
fig_rope = px.pie(
values=rope_counts.values,
names=rope_counts.index,
title="Positional Encoding Types"
)
fig_rope.update_layout(height=300)
st.plotly_chart(fig_rope, use_container_width=True)
with col2:
# Attention implementation
attention_counts = valid_arch_df[valid_arch_df['attention_type'] != 'Unknown']['attention_type'].value_counts()
if len(attention_counts) > 0:
fig_attention = px.bar(
x=attention_counts.index,
y=attention_counts.values,
title="Attention Implementation Types",
labels={'x': 'Attention Type', 'y': 'Model Count'}
)
fig_attention.update_layout(height=300)
st.plotly_chart(fig_attention, use_container_width=True)
st.subheader("πŸ“Š Organization Innovation Patterns")
# Organization vs technique matrix
org_techniques = []
for _, row in df.iterrows():
techniques = parse_json_field(row.get('techniques', '{}'))
org = row.get('organization', 'Unknown')
# Extract key techniques (with None safety)
rope_type = techniques.get('rope_type') or techniques.get('positional_encoding') or 'standard'
sliding_window_size = techniques.get('sliding_window_size', 0)
has_sliding_window = sliding_window_size is not None and sliding_window_size > 0
attention_impl = techniques.get('attention_implementation') or 'standard'
# Safe string operations
rope_type_str = str(rope_type).lower() if rope_type else 'standard'
attention_impl_str = str(attention_impl).lower() if attention_impl else 'standard'
org_techniques.append({
'Organization': org,
'RoPE_Advanced': 'yes' if 'yarn' in rope_type_str or 'scaled' in rope_type_str else 'no',
'Sliding_Window': 'yes' if has_sliding_window else 'no',
'Flash_Attention': 'yes' if 'flash' in attention_impl_str else 'no'
})
org_tech_df = pd.DataFrame(org_techniques)
# Create technique adoption heatmap data
if not org_tech_df.empty:
heatmap_data = org_tech_df.groupby('Organization').agg({
'RoPE_Advanced': lambda x: (x == 'yes').sum(),
'Sliding_Window': lambda x: (x == 'yes').sum(),
'Flash_Attention': lambda x: (x == 'yes').sum()
}).reset_index()
if len(heatmap_data) > 1:
fig_heatmap = px.imshow(
heatmap_data.set_index('Organization').T,
title="Advanced Technique Adoption by Organization",
labels={'x': 'Organization', 'y': 'Technique', 'color': 'Models Using Technique'},
aspect='auto'
)
fig_heatmap.update_layout(height=300)
st.plotly_chart(fig_heatmap, use_container_width=True)
st.subheader("πŸ” Model Architecture Comparison")
# Model selection for comparison
model_options = valid_arch_df['model_id'].tolist()
if len(model_options) >= 2:
selected_models = st.multiselect(
"Select models to compare (max 4):",
model_options,
default=model_options[:2],
max_selections=4
)
if selected_models:
comparison_df = valid_arch_df[valid_arch_df['model_id'].isin(selected_models)]
# Create comparison table
comparison_cols = ['model_id', 'organization', 'hidden_size', 'num_layers',
'max_position', 'vocab_size', 'rope_type', 'attention_type']
display_comparison = comparison_df[comparison_cols]
st.dataframe(display_comparison, use_container_width=True)
# Parameter efficiency chart
if len(comparison_df) > 1:
# Calculate rough parameter estimate
comparison_df['est_params_b'] = (
comparison_df['hidden_size'] * comparison_df['num_layers'] *
comparison_df['vocab_size'] / 1e9
).round(2)
fig_efficiency = px.bar(
comparison_df,
x='model_id',
y='est_params_b',
title="Estimated Model Size Comparison (Billions of Parameters)",
labels={'est_params_b': 'Estimated Parameters (B)'}
)
fig_efficiency.update_layout(height=300)
st.plotly_chart(fig_efficiency, use_container_width=True)
else:
st.warning("Insufficient architecture data for analysis. Models need valid config information.")
else:
st.info("**Technical analysis will appear when community data is available!**")
st.markdown("""
### πŸ”¬ What You'll See Here:
**πŸ—οΈ Architecture Analysis**
- Parameter distribution patterns across organizations
- Model scaling relationships (size vs capabilities)
- Context length and vocabulary trends
**⚑ Innovation Tracking**
- Technique adoption timelines (RoPE, Flash Attention, etc.)
- Cross-organizational innovation patterns
- Emerging architecture components
**🧬 Model Lineage**
- Base model relationships and fine-tuning chains
- Architecture family evolution
- Research paper connections
**βš–οΈ Comparative Analysis**
- Side-by-side technical specifications
- Parameter efficiency patterns
- Architecture similarity clustering
""")
with tab4:
st.header("πŸ” Access Control & Contributing")
st.write("ModelAtlas implements **responsible tiered access** for ablation research:")
# Public Access
with st.expander("🌍 PUBLIC Access", expanded=True):
st.write("β€’ βœ… View model architectures and configurations")
st.write("β€’ βœ… Compare techniques across models")
st.write("β€’ βœ… Analyze innovation timelines")
st.write("β€’ ❌ No ablation/intervention access")
# Contributor Access
with st.expander("πŸ“Š CONTRIBUTOR Access"):
st.info("**Requirements:** 3+ contributions, 0.8+ quality score, 7+ days active")
st.write("β€’ βœ… All public features")
st.write("β€’ βœ… Basic intervention mapping")
st.write("β€’ βœ… Ablation compatibility analysis")
st.write("β€’ βœ… Cross-model intervention insights")
# Heretic Access
with st.expander("πŸ”₯ HERETIC Access"):
st.error("**Requirements:** 10+ contributions, 0.9+ quality score, manual approval + community vouching")
st.write("β€’ βœ… All contributor features")
st.write("β€’ βœ… Advanced ablation strategies")
st.write("β€’ βœ… Cross-model transfer analysis")
st.write("β€’ βœ… Strategic research methodologies")
st.write("β€’ βœ… Heretic community research notes")
st.subheader("πŸš€ CLI Commands")
commands = """
# Setup community access
python atlas.py contribute --setup
# Check your access level
python atlas.py contribute --status
# Submit analyses
python atlas.py contribute --submit
# Request access upgrades
python atlas.py contribute --request-access contributor
python atlas.py contribute --request-access heretic
# Test access control (requires contributor+)
python atlas.py interventions Qwen/Qwen3-8B
"""
st.code(commands, language="bash")
st.subheader("πŸ›‘οΈ Why Access Control?")
st.write("β€’ **Protects Innovation:** Sensitive ablation research within trusted community")
st.write("β€’ **Rewards Quality:** Contributors earn access through meaningful work")
st.write("β€’ **Builds Trust:** Community vouching creates research networks")
st.write("β€’ **Enables Progress:** Heretic community advances boundaries responsibly")
# Footer
st.markdown("---")
st.markdown("""
**Community Links:**
[πŸ“Š Dataset](https://huggingface.co/datasets/RadicalNotionAI/modelatlas-community) |
[πŸš€ Dashboard](https://huggingface.co/spaces/RadicalNotionAI/modelatlas-dashboard) |
[πŸ’» CLI Tool](https://github.com/your-org/ModelAtlas)
*Built with ModelAtlas - Architectural Intelligence for AI Research*
""")