Spaces:

RadicalNotionAI
/

modelatlas-dashboard

Running

App Files Files Community

modelatlas-dashboard / app.py

trohrbaugh

Upload app.py with huggingface_hub

5125fa4 verified 1 day ago

raw

history blame contribute delete

20.7 kB

	import streamlit as st
	import pandas as pd
	from datasets import load_dataset
	from datetime import datetime
	import json
	import plotly.express as px
	import plotly.graph_objects as go
	from collections import defaultdict, Counter

	# Set page config
	st.set_page_config(
	page_title="🗺️ ModelAtlas Community Dashboard",
	page_icon="🗺️",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Load community data
	@st.cache_data(ttl=300) # Cache for 5 minutes
	def load_community_data():
	try:
	dataset = load_dataset("RadicalNotionAI/community-analyses", split="train")
	df = dataset.to_pandas()
	# Ensure we have a proper DataFrame
	if not isinstance(df, pd.DataFrame):
	return pd.DataFrame()
	return df
	except Exception as e:
	st.error(f"Error loading dataset: {e}")
	return pd.DataFrame()

	# Main header
	st.title("🗺️ ModelAtlas Community Dashboard")
	st.subheader("Collaborative Intelligence for AI Model Architecture Analysis")

	# Load data
	df = load_community_data()

	# Status badge based on actual data
	if not df.empty:
	st.success(f"✅ Live with {len(df)} Community Models!")
	else:
	st.info("⏳ Waiting for First Contributions")

	# Create tabs
	tab1, tab2, tab3, tab4 = st.tabs(["📊 Overview", "🗂️ Models", "🔬 Technical Details", "🔐 Access & Contributing"])

	with tab1:
	st.header("📊 Community Overview")

	if not df.empty:
	# Real statistics from the community data
	total_models = len(df)
	organizations = df['organization'].nunique() if 'organization' in df.columns else 0
	model_types = len(df['model_type'].unique()) if 'model_type' in df.columns else 0
	latest_analysis = df['analyzed_at'].max() if 'analyzed_at' in df.columns else None

	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("🗺️ Models Analyzed", total_models)
	with col2:
	st.metric("🏢 Organizations", organizations)
	with col3:
	st.metric("🔧 Model Types", model_types)
	with col4:
	if latest_analysis:
	latest_date = latest_analysis[:10] if isinstance(latest_analysis, str) else str(latest_analysis)[:10]
	st.metric("📅 Latest Analysis", latest_date)
	else:
	st.metric("📅 Latest Analysis", "Unknown")

	st.subheader("🗄️ Community Dataset")
	st.write(f"Location: [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)")
	st.write(f"Status: Live with {total_models} community contributions!")

	# Top organizations and model types
	col1, col2 = st.columns(2)

	with col1:
	if 'organization' in df.columns:
	st.subheader("🏢 Top Organizations")
	org_counts = df['organization'].value_counts().head(5)
	for org, count in org_counts.items():
	st.write(f"• {org}: {count} models")

	with col2:
	if 'model_type' in df.columns:
	st.subheader("🔧 Popular Model Types")
	type_counts = df['model_type'].value_counts().head(5)
	for model_type, count in type_counts.items():
	st.write(f"• {model_type}: {count} models")

	# Recent models
	st.subheader("🆕 Recent Contributions")
	if 'analyzed_at' in df.columns:
	# Sort by analyzed_at as string (works for ISO format dates)
	recent_df = df.sort_values('analyzed_at', ascending=False).head(5)[['model_id', 'organization', 'analyzed_at']]
	else:
	recent_df = df.head(5)[['model_id', 'organization']]

	for _, row in recent_df.iterrows():
	analysis_date = f" ({row['analyzed_at'][:10]})" if 'analyzed_at' in row else ""
	st.write(f"• `{row['model_id']}` - {row['organization']}{analysis_date}")

	else:
	st.write("Status: The ModelAtlas community platform is live and ready for contributions!")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.subheader("🗄️ Community Dataset")
	st.write("Central repository for model architecture analyses")
	st.write("Location: [RadicalNotionAI/community-analyses](https://huggingface.co/datasets/RadicalNotionAI/community-analyses)")
	st.write("Status: Live and accepting contributions")

	with col2:
	st.subheader("📈 Features Available")
	st.write("Ready for your contributions:")
	st.write("• Community model browser")
	st.write("• Innovation timeline analytics")
	st.write("• Cross-organizational insights")
	st.write("• Technique adoption tracking")

	with col3:
	st.subheader("🚀 Getting Started")
	st.write("Ready to contribute? Follow these steps:")
	st.write("1. Install ModelAtlas CLI")
	st.write("2. Setup: `python atlas.py contribute --setup`")
	st.write("3. Analyze: `python model_test.py model/name`")
	st.write("4. Submit: `python atlas.py contribute --submit`")

	with tab2:
	st.header("🗂️ Community Model Browser")

	if not df.empty:
	st.success(f"Community Models: {len(df)} models available from the community!")

	# Filter options
	col1, col2 = st.columns(2)

	selected_org = 'All'
	selected_type = 'All'

	with col1:
	if 'organization' in df.columns:
	orgs = ['All'] + sorted(df['organization'].unique().tolist())
	selected_org = st.selectbox("Filter by Organization", orgs)

	with col2:
	if 'model_type' in df.columns:
	types = ['All'] + sorted(df['model_type'].unique().tolist())
	selected_type = st.selectbox("Filter by Model Type", types)

	# Apply filters
	filtered_df = df.copy()
	if 'organization' in df.columns and selected_org != 'All':
	filtered_df = filtered_df[filtered_df['organization'] == selected_org]
	if 'model_type' in df.columns and selected_type != 'All':
	filtered_df = filtered_df[filtered_df['model_type'] == selected_type]

	st.write(f"Showing {len(filtered_df)} of {len(df)} models")

	# Display models
	display_columns = ['model_id', 'organization', 'model_type', 'analyzed_at']
	available_columns = [col for col in display_columns if col in filtered_df.columns]

	if available_columns:
	# Sort by date (string format works for ISO dates) or model_id
	sort_column = 'analyzed_at' if 'analyzed_at' in filtered_df.columns else 'model_id'
	display_df = filtered_df[available_columns].sort_values(
	sort_column, ascending=False
	).head(50) # Limit to 50 most recent

	st.dataframe(display_df, use_container_width=True)
	else:
	st.warning("Model data structure is not as expected. Please check dataset format.")

	else:
	st.info("Current Status: Waiting for first contributions to populate the dataset.")

	st.subheader("What You'll See Here")
	st.write("• ✅ Community-contributed model analyses")
	st.write("• ✅ Architectural comparisons and insights")
	st.write("• ✅ Technique evolution tracking")
	st.write("• ✅ Cross-organizational innovation patterns")

	st.subheader("Example Models to Analyze")

	examples = [
	("Qwen/Qwen3-8B", "Advanced architecture with RoPE scaling"),
	("deepseek-ai/DeepSeek-V3", "Large-scale MoE architecture"),
	("THUDM/glm-4-9b", "GLM architecture innovations"),
	("meta-llama/Llama-3.1-8B", "Llama 3.1 improvements")
	]

	for model, description in examples:
	st.code(model)
	st.write(f"{description}")

	st.write("Start contributing to see your analyses here!")

	# Helper functions for technical analysis
	def parse_json_field(field_value):
	"""Safely parse JSON field from dataset."""
	if isinstance(field_value, str):
	try:
	return json.loads(field_value)
	except:
	return {}
	return field_value if field_value else {}

	def extract_architecture_metrics(df):
	"""Extract architecture metrics from the dataset."""
	metrics = []
	for _, row in df.iterrows():
	config = parse_json_field(row.get('config', '{}'))
	techniques = parse_json_field(row.get('techniques', '{}'))

	metric = {
	'model_id': row['model_id'],
	'organization': row.get('organization', 'Unknown'),
	'model_type': row.get('model_type', 'Unknown'),
	'hidden_size': config.get('hidden_size', 0) or 0,
	'num_layers': config.get('num_hidden_layers', config.get('num_layers', 0)) or 0,
	'max_position': config.get('max_position_embeddings', 0) or 0,
	'vocab_size': config.get('vocab_size', 0) or 0,
	'intermediate_size': config.get('intermediate_size', 0) or 0,
	'rope_type': techniques.get('rope_type') or techniques.get('positional_encoding') or 'Unknown',
	'attention_type': techniques.get('attention_implementation', 'Unknown') or 'Unknown',
	'sliding_window': techniques.get('sliding_window_size', 0) or 0
	}
	metrics.append(metric)

	return pd.DataFrame(metrics)

	with tab3:
	st.header("🔬 Technical Architecture Analysis")

	if not df.empty:
	# Extract architecture data
	arch_df = extract_architecture_metrics(df)

	# Filter out rows with missing critical data
	valid_arch_df = arch_df[(arch_df['hidden_size'] > 0) & (arch_df['num_layers'] > 0)]

	if not valid_arch_df.empty:
	st.subheader("🏗️ Architecture Parameter Distribution")

	col1, col2 = st.columns(2)

	with col1:
	# Model size scatter plot
	fig_size = px.scatter(
	valid_arch_df,
	x='hidden_size',
	y='num_layers',
	color='organization',
	size='max_position',
	hover_data=['model_id', 'vocab_size'],
	title="Model Architecture: Hidden Size vs Layers",
	labels={'hidden_size': 'Hidden Size', 'num_layers': 'Number of Layers'}
	)
	fig_size.update_layout(height=400)
	st.plotly_chart(fig_size, use_container_width=True)

	with col2:
	# Context length distribution
	context_data = valid_arch_df[valid_arch_df['max_position'] > 0]
	if not context_data.empty:
	fig_context = px.histogram(
	context_data,
	x='max_position',
	color='organization',
	title="Context Length Distribution",
	labels={'max_position': 'Max Position Embeddings', 'count': 'Number of Models'}
	)
	fig_context.update_layout(height=400)
	st.plotly_chart(fig_context, use_container_width=True)

	st.subheader("⚡ Technique Adoption Analysis")

	col1, col2 = st.columns(2)

	with col1:
	# RoPE type distribution
	rope_counts = valid_arch_df['rope_type'].value_counts()
	if len(rope_counts) > 1:
	fig_rope = px.pie(
	values=rope_counts.values,
	names=rope_counts.index,
	title="Positional Encoding Types"
	)
	fig_rope.update_layout(height=300)
	st.plotly_chart(fig_rope, use_container_width=True)

	with col2:
	# Attention implementation
	attention_counts = valid_arch_df[valid_arch_df['attention_type'] != 'Unknown']['attention_type'].value_counts()
	if len(attention_counts) > 0:
	fig_attention = px.bar(
	x=attention_counts.index,
	y=attention_counts.values,
	title="Attention Implementation Types",
	labels={'x': 'Attention Type', 'y': 'Model Count'}
	)
	fig_attention.update_layout(height=300)
	st.plotly_chart(fig_attention, use_container_width=True)

	st.subheader("📊 Organization Innovation Patterns")

	# Organization vs technique matrix
	org_techniques = []
	for _, row in df.iterrows():
	techniques = parse_json_field(row.get('techniques', '{}'))
	org = row.get('organization', 'Unknown')

	# Extract key techniques (with None safety)
	rope_type = techniques.get('rope_type') or techniques.get('positional_encoding') or 'standard'
	sliding_window_size = techniques.get('sliding_window_size', 0)
	has_sliding_window = sliding_window_size is not None and sliding_window_size > 0
	attention_impl = techniques.get('attention_implementation') or 'standard'

	# Safe string operations
	rope_type_str = str(rope_type).lower() if rope_type else 'standard'
	attention_impl_str = str(attention_impl).lower() if attention_impl else 'standard'

	org_techniques.append({
	'Organization': org,
	'RoPE_Advanced': 'yes' if 'yarn' in rope_type_str or 'scaled' in rope_type_str else 'no',
	'Sliding_Window': 'yes' if has_sliding_window else 'no',
	'Flash_Attention': 'yes' if 'flash' in attention_impl_str else 'no'
	})

	org_tech_df = pd.DataFrame(org_techniques)

	# Create technique adoption heatmap data
	if not org_tech_df.empty:
	heatmap_data = org_tech_df.groupby('Organization').agg({
	'RoPE_Advanced': lambda x: (x == 'yes').sum(),
	'Sliding_Window': lambda x: (x == 'yes').sum(),
	'Flash_Attention': lambda x: (x == 'yes').sum()
	}).reset_index()

	if len(heatmap_data) > 1:
	fig_heatmap = px.imshow(
	heatmap_data.set_index('Organization').T,
	title="Advanced Technique Adoption by Organization",
	labels={'x': 'Organization', 'y': 'Technique', 'color': 'Models Using Technique'},
	aspect='auto'
	)
	fig_heatmap.update_layout(height=300)
	st.plotly_chart(fig_heatmap, use_container_width=True)

	st.subheader("🔍 Model Architecture Comparison")

	# Model selection for comparison
	model_options = valid_arch_df['model_id'].tolist()
	if len(model_options) >= 2:
	selected_models = st.multiselect(
	"Select models to compare (max 4):",
	model_options,
	default=model_options[:2],
	max_selections=4
	)

	if selected_models:
	comparison_df = valid_arch_df[valid_arch_df['model_id'].isin(selected_models)]

	# Create comparison table
	comparison_cols = ['model_id', 'organization', 'hidden_size', 'num_layers',
	'max_position', 'vocab_size', 'rope_type', 'attention_type']
	display_comparison = comparison_df[comparison_cols]
	st.dataframe(display_comparison, use_container_width=True)

	# Parameter efficiency chart
	if len(comparison_df) > 1:
	# Calculate rough parameter estimate
	comparison_df['est_params_b'] = (
	comparison_df['hidden_size'] * comparison_df['num_layers'] *
	comparison_df['vocab_size'] / 1e9
	).round(2)

	fig_efficiency = px.bar(
	comparison_df,
	x='model_id',
	y='est_params_b',
	title="Estimated Model Size Comparison (Billions of Parameters)",
	labels={'est_params_b': 'Estimated Parameters (B)'}
	)
	fig_efficiency.update_layout(height=300)
	st.plotly_chart(fig_efficiency, use_container_width=True)

	else:
	st.warning("Insufficient architecture data for analysis. Models need valid config information.")

	else:
	st.info("Technical analysis will appear when community data is available!")

	st.markdown("""
	### 🔬 What You'll See Here:

	🏗️ Architecture Analysis
	- Parameter distribution patterns across organizations
	- Model scaling relationships (size vs capabilities)
	- Context length and vocabulary trends

	⚡ Innovation Tracking
	- Technique adoption timelines (RoPE, Flash Attention, etc.)
	- Cross-organizational innovation patterns
	- Emerging architecture components

	🧬 Model Lineage
	- Base model relationships and fine-tuning chains
	- Architecture family evolution
	- Research paper connections

	⚖️ Comparative Analysis
	- Side-by-side technical specifications
	- Parameter efficiency patterns
	- Architecture similarity clustering
	""")

	with tab4:
	st.header("🔐 Access Control & Contributing")
	st.write("ModelAtlas implements responsible tiered access for ablation research:")

	# Public Access
	with st.expander("🌍 PUBLIC Access", expanded=True):
	st.write("• ✅ View model architectures and configurations")
	st.write("• ✅ Compare techniques across models")
	st.write("• ✅ Analyze innovation timelines")
	st.write("• ❌ No ablation/intervention access")

	# Contributor Access
	with st.expander("📊 CONTRIBUTOR Access"):
	st.info("Requirements: 3+ contributions, 0.8+ quality score, 7+ days active")
	st.write("• ✅ All public features")
	st.write("• ✅ Basic intervention mapping")
	st.write("• ✅ Ablation compatibility analysis")
	st.write("• ✅ Cross-model intervention insights")

	# Heretic Access
	with st.expander("🔥 HERETIC Access"):
	st.error("Requirements: 10+ contributions, 0.9+ quality score, manual approval + community vouching")
	st.write("• ✅ All contributor features")
	st.write("• ✅ Advanced ablation strategies")
	st.write("• ✅ Cross-model transfer analysis")
	st.write("• ✅ Strategic research methodologies")
	st.write("• ✅ Heretic community research notes")

	st.subheader("🚀 CLI Commands")

	commands = """
	# Setup community access
	python atlas.py contribute --setup

	# Check your access level
	python atlas.py contribute --status

	# Submit analyses
	python atlas.py contribute --submit

	# Request access upgrades
	python atlas.py contribute --request-access contributor
	python atlas.py contribute --request-access heretic

	# Test access control (requires contributor+)
	python atlas.py interventions Qwen/Qwen3-8B
	"""

	st.code(commands, language="bash")

	st.subheader("🛡️ Why Access Control?")
	st.write("• Protects Innovation: Sensitive ablation research within trusted community")
	st.write("• Rewards Quality: Contributors earn access through meaningful work")
	st.write("• Builds Trust: Community vouching creates research networks")
	st.write("• Enables Progress: Heretic community advances boundaries responsibly")

	# Footer
	st.markdown("---")
	st.markdown("""
	Community Links:
	[📊 Dataset](https://huggingface.co/datasets/RadicalNotionAI/modelatlas-community) \|
	[🚀 Dashboard](https://huggingface.co/spaces/RadicalNotionAI/modelatlas-dashboard) \|
	[💻 CLI Tool](https://github.com/your-org/ModelAtlas)

	Built with ModelAtlas - Architectural Intelligence for AI Research
	""")