Spaces:

lmgame
/

videoscience-bench

Running

App Files Files Community

videoscience-bench / src /streamlit_app.py

lmgame

Add Kling-v3.0

e6515dd verified 1 day ago

raw

history blame contribute delete

26.2 kB

	import streamlit as st
	import pandas as pd
	import os
	import json
	import streamlit.components.v1 as components
	import plotly.graph_objects as go
	import hashlib

	# Page configuration: set wide layout
	st.set_page_config(page_title="VideoScience-Bench", layout="wide", initial_sidebar_state="collapsed")

	# ===== CSS styling: compact layout and modernized UI =====
	st.markdown("""
	<style>
	/* 1. Reduce top spacing */
	.block-container {
	padding-top: 1.5rem;
	padding-bottom: 2rem;
	padding-left: 2rem;
	padding-right: 2rem;
	}

	/* 2. Compress global component spacing */
	div[data-testid="stVerticalBlock"] > div {
	gap: 0.5rem !important;
	}
	div[data-testid="stHorizontalBlock"] {
	gap: 0.5rem !important;
	}

	/* 3. Refined styling for Tabs */
	.stTabs [data-baseweb="tab-list"] {
	gap: 4px;
	margin-bottom: 0.5rem;
	}
	.stTabs [data-baseweb="tab"] {
	padding: 4px 12px;
	font-size: 14px;
	}

	/* 4. Dropdown styling refinement for compact appearance */
	div[data-baseweb="select"] > div {
	min-height: 32px;
	padding-top: 0;
	padding-bottom: 0;
	}

	/* 5. Enhanced button styling for play controls */
	div.stButton > button {
	width: 100%;
	border-radius: 6px;
	padding: 0.25rem 0.5rem;
	line-height: 1.2;
	}

	/* 6. Container styling for rating bars */
	.rating-container {
	background-color: #f8f9fa;
	border-radius: 6px;
	padding: 8px;
	margin-top: 0px;
	border: 1px solid #eee;
	}

	/* 7. Metric card styling used in leaderboard */
	.metric-card {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 1rem;
	border-radius: 8px;
	color: white;
	text-align: center;
	margin: 0.5rem 0;
	}

	/* Rank badge styling */
	.rank-badge {
	display: inline-block;
	width: 24px;
	height: 24px;
	line-height: 24px;
	border-radius: 50%;
	text-align: center;
	font-weight: bold;
	font-size: 12px;
	margin-right: 6px;
	}
	.rank-1 { background: linear-gradient(135deg, #FFD700, #FFA500); color: #000; }
	.rank-2 { background: linear-gradient(135deg, #C0C0C0, #808080); color: #000; }
	.rank-3 { background: linear-gradient(135deg, #CD7F32, #8B4513); color: #fff; }
	.rank-other { background: linear-gradient(135deg, #e0e0e0, #bdbdbd); color: #000; }

	/* Styling for complete rankings expanders to align with Auto-Judge theme */
	.rankings-section div[data-testid="stExpander"] {
	background: linear-gradient(90deg, rgba(102, 126, 234, 0.15) 0%, rgba(118, 75, 162, 0.15) 100%) !important;
	border: 1px solid rgba(118, 75, 162, 0.3) !important;
	border-radius: 10px !important;
	color: #f0f4ff !important;
	}

	/* Larger, centered text for expander headers */
	.rankings-section div[data-testid="stExpander"] summary {
	font-size: 20px !important;
	font-weight: 600 !important;
	padding: 16px 0 !important;
	display: flex !important;
	align-items: center !important;
	justify-content: center !important;
	text-align: center !important;
	line-height: 1.6 !important;
	min-height: 56px !important;
	width: 100%;
	color: #f0f4ff !important;
	}

	/* Ensure nested text elements in headers remain centered */
	.rankings-section div[data-testid="stExpander"] summary p,
	.rankings-section div[data-testid="stExpander"] summary span,
	.rankings-section div[data-testid="stExpander"] summary div {
	margin: 0 auto !important;
	display: inline-flex !important;
	align-items: center !important;
	justify-content: center !important;
	line-height: 1.6 !important;
	text-align: center !important;
	color: #f0f4ff !important;
	}

	/* Larger metric font inside expanders */
	.rankings-section div[data-testid="stExpander"] [data-testid="stMetricValue"] {
	font-size: 24px !important;
	}

	.rankings-section div[data-testid="stExpander"] [data-testid="stMetricLabel"] {
	font-size: 16px !important;
	}

	/* Adjustments for light mode to match leaderboard color scheme */
	body[data-theme="light"] .rankings-section div[data-testid="stExpander"] {
	background: linear-gradient(90deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%) !important;
	border: 1px solid rgba(118, 75, 162, 0.25) !important;
	color: #1f1f2d !important;
	}

	body[data-theme="light"] .rankings-section div[data-testid="stExpander"] summary,
	body[data-theme="light"] .rankings-section div[data-testid="stExpander"] summary *,
	body[data-theme="light"] .rankings-section div[data-testid="stExpander"] [data-testid="stMetricLabel"],
	body[data-theme="light"] .rankings-section div[data-testid="stExpander"] [data-testid="stMetricValue"] {
	color: #1f1f2d !important;
	}

	/* Keyword tag styling - Notion-like */
	.keyword-tag {
	display: inline-block;
	padding: 3px 10px;
	margin: 3px 4px;
	border-radius: 4px;
	font-size: 13px;
	font-weight: 500;
	white-space: nowrap;
	transition: transform 0.2s ease;
	}

	.keyword-tag:hover {
	transform: translateY(-1px);
	}

	.keywords-container {
	display: inline-flex;
	flex-wrap: wrap;
	align-items: center;
	justify-content: center;
	margin-top: 8px;
	gap: 2px;
	}

	@media (max-width: 768px) {
	.keyword-tag {
	font-size: 11px;
	padding: 2px 8px;
	margin: 2px 3px;
	}
	}
	</style>
	""", unsafe_allow_html=True)

	# ===== Data loading section (actual logic) =====
	RATINGS_FILE = "ratings.json"
	VIDEO_BASE_DIR = "downloaded_videos"
	CSV_FILE = "Examples.csv"

	MODEL_NAME_MAP = {
	"bytedance-seedance-1-pro": "seed-dance",
	"kling-v2-5-turbo-pro": "klingv2.5",
	"minimax-hailuo-2.3": "hailuo2.3",
	"ray-2": "ray-2",
	"sora-2": "sora-2",
	"veo3-quality": "veo3",
	"wan2.5-t2v-preview": "wan2.5",
	}

	MODELS = [
	"bytedance-seedance-1-pro",
	"kling-v2-5-turbo-pro",
	"minimax-hailuo-2.3",
	"ray-2",
	"sora-2",
	"veo3-quality",
	"wan2.5-t2v-preview",
	]

	RATING_DIMENSIONS = [
	("prompt_consistency", "Prompt Consistency"),
	("expected_phenomenon", "Phenomenon Congruency"),
	("dynamism", "Correct Dynamism"),
	("immutability", "Immutability"),
	("coherence", "Spatio-Temporal Coherence"),
	]

	def generate_tag_color(keyword):
	"""Generate a consistent color for each keyword using hash"""
	# Use hash to generate consistent colors
	hash_val = int(hashlib.md5(keyword.encode()).hexdigest(), 16)

	# Color palette inspired by Notion tags
	colors = [
	('#FEE2E2', '#991B1B'), # Red
	('#FFEDD5', '#9A3412'), # Orange
	('#FEF3C7', '#92400E'), # Yellow
	('#D1FAE5', '#065F46'), # Green
	('#DBEAFE', '#1E40AF'), # Blue
	('#E0E7FF', '#3730A3'), # Indigo
	('#F3E8FF', '#6B21A8'), # Purple
	('#FCE7F3', '#9F1239'), # Pink
	('#E5E7EB', '#374151'), # Gray
	('#D1F5FF', '#0369A1'), # Cyan
	]

	return colors[hash_val % len(colors)]

	def render_keywords(keywords_str):
	"""Render keywords as Notion-style tags"""
	if not keywords_str or pd.isna(keywords_str):
	return ""

	keywords = [kw.strip() for kw in str(keywords_str).split(',') if kw.strip()]

	if not keywords:
	return ""

	tags_html = "<div class='keywords-container'>"
	for keyword in keywords:
	bg_color, text_color = generate_tag_color(keyword)
	tags_html += f"""<span class='keyword-tag' style='background-color: {bg_color}; color: {text_color};'>
	{keyword}
	</span>"""
	tags_html += "</div>"

	return tags_html

	@st.cache_data
	def load_ratings():
	try:
	if os.path.exists(RATINGS_FILE):
	with open(RATINGS_FILE, 'r', encoding='utf-8') as f:
	return json.load(f)
	return {}
	except Exception as e:
	st.error(f"Error loading ratings: {e}")
	return {}

	@st.cache_data
	def load_csv_data():
	try:
	if os.path.exists(CSV_FILE):
	df = pd.read_csv(CSV_FILE, encoding='utf-8-sig')
	data_map = {}
	for _, row in df.iterrows():
	unique_id = row.get('Unique ID')
	if pd.notna(unique_id):
	data_map[int(unique_id)] = {
	'prompt': row.get('Prompts', 'N/A'),
	'expected': row.get('Expected phenomenon', 'N/A'),
	'topic': row.get('Example Title', f'Example {unique_id}'),
	'keywords': row.get('Keywords', ''), # Add keywords field
	}
	return data_map
	return {}
	except Exception as e:
	st.error(f"Error loading CSV file: {e}")
	return {}

	def get_rating(ratings_data, video_id, model_name, dimension, run_number):
	video_id_str = str(video_id)
	if video_id_str not in ratings_data or dimension not in ratings_data[video_id_str]:
	return None
	json_model_name = MODEL_NAME_MAP.get(model_name)
	if not json_model_name or json_model_name not in ratings_data[video_id_str][dimension]:
	return None
	ratings_list = ratings_data[video_id_str][dimension][json_model_name]
	if run_number < 1 or run_number > len(ratings_list):
	return None
	return ratings_list[run_number - 1]

	def build_example(topic, prompt, expected, keywords, video_id, model_runs):
	videos = {}
	for model in MODELS:
	run_number = model_runs.get(model, 1)
	video_path = os.path.join(VIDEO_BASE_DIR, model, f"vid_{video_id}_run_{run_number}.mp4")
	videos[model] = video_path
	return {
	"topic": topic,
	"prompt": prompt,
	"expected": expected,
	"keywords": keywords, # Add keywords to example dict
	"video_id": video_id,
	"model_runs": model_runs,
	"videos": videos,
	}

	def build_examples(example_specs, csv_data):
	examples = []
	for spec in example_specs:
	video_id = spec["video_id"]
	model_runs = spec["model_runs"]
	csv_entry = csv_data.get(video_id, {})
	topic = csv_entry.get('topic', f'Example {video_id}')
	prompt = csv_entry.get('prompt', 'N/A')
	expected = csv_entry.get('expected', 'N/A')
	keywords = csv_entry.get('keywords', '') # Get keywords
	examples.append(build_example(topic, prompt, expected, keywords, video_id, model_runs))
	return examples

	# Load actual data
	ratings_data = load_ratings()
	csv_data = load_csv_data()

	example_specs = [
	{"video_id": 113, "model_runs": {"bytedance-seedance-1-pro": 1, "kling-v2-5-turbo-pro": 1, "minimax-hailuo-2.3": 2, "ray-2": 1, "sora-2": 1, "veo3-quality": 1, "wan2.5-t2v-preview": 1}},
	{"video_id": 143, "model_runs": {"bytedance-seedance-1-pro": 2, "kling-v2-5-turbo-pro": 2, "minimax-hailuo-2.3": 3, "ray-2": 2, "sora-2": 2, "veo3-quality": 2, "wan2.5-t2v-preview": 2}},
	{"video_id": 175, "model_runs": {"bytedance-seedance-1-pro": 1, "kling-v2-5-turbo-pro": 1, "minimax-hailuo-2.3": 1, "ray-2": 1, "sora-2": 1, "veo3-quality": 1, "wan2.5-t2v-preview": 1}},
	{"video_id": 138, "model_runs": {"bytedance-seedance-1-pro": 3, "kling-v2-5-turbo-pro": 3, "minimax-hailuo-2.3": 3, "ray-2": 3, "sora-2": 2, "veo3-quality": 1, "wan2.5-t2v-preview": 3}},
	]

	examples = build_examples(example_specs, csv_data)
	# Provide a fallback dummy entry if loading fails
	if not examples:
	examples = [{"video_id": 0, "topic": "No Data", "prompt": "No Data", "expected": "No Data", "keywords": "", "videos": {}, "model_runs": {}}]
	TOTAL_EXAMPLES = len(examples)

	# ===== Responsive rating bar rendering =====
	def render_rating_bars(ratings_data, video_id, model1, model2, run1, run2):
	st.markdown(
	f"""
	<style>
	.rating-container {{
	font-family: monospace;
	overflow-x: auto;
	-webkit-overflow-scrolling: touch;
	color: var(--text-color);
	background-color: transparent;
	}}

	.rating-header {{
	font-size: clamp(12px, 3vw, 20px);
	margin: 12px 0;
	white-space: nowrap;
	}}

	.rating-row {{
	font-size: clamp(12px, 3vw, 20px);
	margin: 10px 0;
	line-height: 1.6;
	white-space: nowrap;
	}}

	.model-left {{
	color: #FF6B6B;
	display: inline-block;
	width: 28%;
	min-width: 120px;
	text-align: right;
	font-weight: 700;
	}}

	.model-right {{
	color: #4ECDC4;
	display: inline-block;
	width: 28%;
	min-width: 120px;
	text-align: left;
	font-weight: 700;
	}}

	.dimension-center {{
	display: inline-block;
	width: 42%;
	min-width: 150px;
	text-align: center;
	font-weight: bold;
	}}

	.score-left {{
	color: #FF6B6B;
	display: inline-block;
	width: 28%;
	min-width: 120px;
	text-align: right;
	font-weight: 600;
	}}

	.score-right {{
	color: #4ECDC4;
	display: inline-block;
	width: 28%;
	min-width: 120px;
	text-align: left;
	font-weight: 600;
	}}

	.dim-name {{
	display: inline-block;
	width: 42%;
	min-width: 150px;
	text-align: center;
	font-weight: 700;
	}}

	@media (max-width: 768px) {{
	.rating-container {{
	padding: 0 8px;
	}}
	}}
	</style>

	<div class='rating-container'>
	<div class='rating-header'>
	<span class='model-left'>{model1}</span>
	<span class='dimension-center'>Rating Dimensions</span>
	<span class='model-right'>{model2}</span>
	</div>
	</div>
	""",
	unsafe_allow_html=True
	)

	for dim_key, dim_name in RATING_DIMENSIONS:
	rating1 = get_rating(ratings_data, video_id, model1, dim_key, run1) or 0
	rating2 = get_rating(ratings_data, video_id, model2, dim_key, run2) or 0

	bar1 = "██" * rating1 + "▍" if rating1 > 0 else ""
	bar2 = "██" * rating2 + "▍" if rating2 > 0 else ""

	left_bar = f"{bar1} {rating1}" if rating1 > 0 else f"{rating1}"
	right_bar = f"{rating2} {bar2}" if rating2 > 0 else f"{rating2}"

	st.markdown(
	f"""
	<div class='rating-container'>
	<div class='rating-row'>
	<span class='score-left'>{left_bar}</span>
	<span class='dim-name'>{dim_name}</span>
	<span class='score-right'>{right_bar}</span>
	</div>
	</div>
	""",
	unsafe_allow_html=True
	)

	if "example_idx" not in st.session_state:
	st.session_state.example_idx = 0
	if "model1" not in st.session_state:
	st.session_state.model1 = MODELS[4] # Sora
	if "model2" not in st.session_state:
	st.session_state.model2 = MODELS[5] # Veo

	# ===== Main layout =====
	st.title("⚛️ VideoScience-Bench")

	tab2, tab3, tab1 = st.tabs(["🤖 Auto-Judge Leaderboard", "👥 Human Evaluation", "📹 Video Comparison"])

	# ===== TAB 1: Video Comparison =====
	with tab1:
	# --- 1. Minimal top navigation bar ---
	col_nav_1, col_nav_2, col_nav_3 = st.columns([1, 10, 1])

	with col_nav_1:
	if st.button("◀", key="prev", use_container_width=True, disabled=(st.session_state.example_idx == 0)):
	st.session_state.example_idx -= 1
	st.rerun()

	with col_nav_2:
	# Display centered example index and topic with keywords
	current = examples[st.session_state.example_idx]
	keywords_html = render_keywords(current.get('keywords', ''))

	st.markdown(
	f"""
	<div style='text-align: center; margin-top: -5px;'>
	<span style='font-size: 14px; color: #888;'>EXAMPLE {st.session_state.example_idx + 1} / {TOTAL_EXAMPLES}</span><br>
	<span style='font-size: 18px; font-weight: 700;'>{current['topic']}</span>
	{keywords_html}
	</div>
	""", unsafe_allow_html=True
	)

	with col_nav_3:
	if st.button("▶", key="next", use_container_width=True, disabled=(st.session_state.example_idx == TOTAL_EXAMPLES - 1)):
	st.session_state.example_idx += 1
	st.rerun()

	# --- 2. Prompt and expectation (expanded by default) ---
	with st.expander("📝 View Prompt & Expectation Details", expanded=True):
	c1, c2 = st.columns(2)
	with c1:
	st.caption("PROMPT")
	st.write(current['prompt'])
	with c2:
	st.caption("EXPECTED PHENOMENON")
	st.write(current['expected'])

	# --- 3. Comparison control panel ---
	st.markdown("<div style='margin-bottom: 5px;'></div>", unsafe_allow_html=True)

	ctrl_col1, ctrl_col2, ctrl_col3 = st.columns([3, 1, 3])

	with ctrl_col1:
	# Hidden label to save vertical space
	model1 = st.selectbox("Model Left", MODELS, index=MODELS.index(st.session_state.model1),
	key="m1_select", label_visibility="collapsed")
	st.session_state.model1 = model1

	with ctrl_col2:
	# Trigger playback of both videos
	play = st.button("▶ Play Both", use_container_width=True, type="primary")

	with ctrl_col3:
	model2 = st.selectbox("Model Right", MODELS, index=MODELS.index(st.session_state.model2),
	key="m2_select", label_visibility="collapsed")
	st.session_state.model2 = model2

	# --- 4. Video playback section ---
	vid_col1, vid_col2 = st.columns(2)

	# JavaScript-based autoplay
	if play:
	components.html("""
	<script>
	setTimeout(() => {
	const videos = window.parent.document.querySelectorAll('video');
	videos.forEach(v => { v.currentTime = 0; v.play(); });
	}, 100);
	</script>
	""", height=0)

	with vid_col1:
	video_path1 = current["videos"].get(model1, "")
	if os.path.exists(video_path1):
	st.video(video_path1)
	else:
	st.error(f"❌ Video not found: {video_path1}")

	with vid_col2:
	video_path2 = current["videos"].get(model2, "")
	if os.path.exists(video_path2):
	st.video(video_path2)
	else:
	st.error(f"❌ Video not found: {video_path2}")

	# --- 5. Integrated rating bars ---
	render_rating_bars(
	ratings_data,
	current['video_id'],
	model1,
	model2,
	current["model_runs"].get(model1, 1),
	current["model_runs"].get(model2, 1)
	)

	# ===== TAB 2: Auto-Judge Leaderboard (CL+CV) =====
	with tab2:
	st.markdown("### 🤖 VideoScience-Judge Leaderboard")
	st.markdown("<small>Scores are computed using an evidence-grounded scheme integrating prompt-specific checklist and CV-based analysis, then averaged across all dimensions and normalized to 1.</small>", unsafe_allow_html=True)

	# Data from Table 2 – CL+CV column
	auto_data = {
	"Model": ["Kling-v3.0", "Sora-2", "Veo-3", "Kling-v2.5", "Wan-2.5", "Seedance-1.0-Pro", "Hailuo-2.3", "Ray2"],
	"Score": [0.78, 0.76, 0.65, 0.59, 0.59, 0.54, 0.50, 0.34]
	}

	df_auto = pd.DataFrame(auto_data).sort_values("Score", ascending=False).reset_index(drop=True)
	df_auto["Rank"] = range(1, len(df_auto) + 1)

	# Build leaderboard visualization
	for idx, row in df_auto.iterrows():
	rank = row["Rank"]
	model = row["Model"]
	score = row["Score"]

	# Assign visual badge style based on rank
	if rank == 1:
	badge_class = "rank-1"
	medal = "🥇"
	elif rank == 2:
	badge_class = "rank-2"
	medal = "🥈"
	elif rank == 3:
	badge_class = "rank-3"
	medal = "🥉"
	else:
	badge_class = "rank-other"
	medal = ""

	# Convert score to progress bar width
	bar_width = score * 100

	st.markdown(f"""
	<div style='background: linear-gradient(90deg, rgba(102,126,234,0.1) 0%, rgba(118,75,162,0.1) 100%);
	padding: 12px; border-radius: 8px; margin: 8px 0; border-left: 4px solid #667eea;'>
	<div style='display: flex; align-items: center; justify-content: space-between;'>
	<div style='display: flex; align-items: center; gap: 12px;'>
	<span class='rank-badge {badge_class}'>{rank}</span>
	<span style='font-weight: 600; font-size: 16px;'>{medal} {model}</span>
	</div>
	<div style='font-weight: 700; font-size: 20px; color: #667eea;'>{score:.2f}</div>
	</div>
	<div style='width: 100%; height: 6px; background: #e0e0e0; border-radius: 3px; margin-top: 8px; overflow: hidden;'>
	<div style='width: {bar_width}%; height: 100%; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);'></div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# ===== TAB 3: Human Evaluation =====
	@st.fragment
	def render_human_rankings_section(df_human):
	# Display full rankings using expandable rows
	# 1. Initialize session state for expander control
	if 'expander_state' not in st.session_state:
	st.session_state['expander_state'] = False

	# 2. Control buttons for expand/collapse all
	col1, col2, col3 = st.columns([6, 1, 1])

	with col1:
	st.markdown("### 📊 Complete Rankings")

	with col2:
	if st.button("➕ Expand All", use_container_width=True, type="secondary"):
	st.session_state['expander_state'] = True
	st.rerun()

	with col3:
	if st.button("➖ Collapse All", use_container_width=True, type="secondary"):
	st.session_state['expander_state'] = False
	st.rerun()

	st.markdown("<div class='rankings-section'>", unsafe_allow_html=True)

	for idx, row in df_human.iterrows():
	rank = idx + 1
	model = row["Model"]
	avg = row["Average"]

	if rank <= 3:
	medals = ["🥇", "🥈", "🥉"]
	medal = medals[rank-1]
	else:
	medal = ""

	# Expandable ranking entry
	with st.expander(f"#{rank} {medal} {model} — Avg: {avg:.2f}", expanded=st.session_state['expander_state']):
	cols = st.columns(5)
	dimensions = [("PCS", "Prompt Consistency"), ("PCG", "Phenomenon Congruency"),
	("CDN", "Correct Dynamism"), ("IMB", "Immutability"), ("STC", "CoherSpatio-Temporal Coherenceence")]

	for col, (key, name) in zip(cols, dimensions):
	score = row[key]
	col.metric(name, f"{score:.2f}", delta=None)

	st.markdown("</div>", unsafe_allow_html=True)

	with tab3:
	st.markdown("### 👥 Human Evaluation Scores")
	st.markdown("<small>Mean annotator scores from a 1–4 Likert scale.</small>", unsafe_allow_html=True)

	# Human evaluation dataset (Table 1)
	human_data = {
	"Model": ["Sora-2", "Veo-3", "Kling-v2.5", "Wan-2.5", "Seedance-1.0-Pro", "Hailuo-2.3", "Ray2"],
	"PCS": [3.32, 3.01, 2.77, 2.87, 2.56, 2.39, 1.65],
	"PCG": [2.56, 2.35, 1.91, 1.84, 1.78, 1.67, 1.26],
	"CDN": [3.33, 2.83, 2.75, 2.83, 2.52, 2.57, 2.13],
	"IMB": [3.73, 3.30, 3.36, 3.36, 3.15, 3.16, 2.44],
	"STC": [3.71, 3.42, 3.60, 3.46, 3.46, 3.46, 2.92]
	}

	df_human = pd.DataFrame(human_data)
	df_human["Average"] = df_human[["PCS", "PCG", "CDN", "IMB", "STC"]].mean(axis=1)
	df_human = df_human.sort_values("Average", ascending=False).reset_index(drop=True)

	# Radar chart for human scores
	fig = go.Figure()

	categories = ['Prompt<br>Consistency', 'Phenomenon<br>Congruency', 'Dynamism', 'Immutability', 'Coherence']

	colors = ['#667eea', '#f093fb', '#4facfe', '#43cea2', '#ff9a9e', '#fbc2eb', '#90f7ec']

	for idx in range(len(df_human)):
	row = df_human.iloc[idx]
	values = [row["PCS"], row["PCG"], row["CDN"], row["IMB"], row["STC"]]

	fig.add_trace(go.Scatterpolar(
	r=values,
	theta=categories,
	fill='toself',
	name=row["Model"],
	line=dict(color=colors[idx % len(colors)], width=2),
	marker=dict(size=8)
	))

	fig.update_layout(
	polar=dict(
	radialaxis=dict(visible=True, range=[0, 4], tickfont=dict(size=10, color='red')),
	angularaxis=dict(tickfont=dict(size=11))
	),
	showlegend=True,
	height=450,
	margin=dict(l=80, r=80, t=40, b=40),
	legend=dict(orientation="h", yanchor="bottom", y=-0.15, xanchor="center", x=0.5)
	)

	st.plotly_chart(fig, use_container_width=True)

	render_human_rankings_section(df_human)