Spaces:

mcpbench
/

mcp-bench

Running

App Files Files Community

mcp-bench / index.html

ztwang

Upload index.html

a05136a verified 6 months ago

raw

history blame contribute delete

7.17 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>MCP Benchmark Leaderboard</title>
	<link rel="stylesheet" href="style.css">
	<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
	<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
	</head>
	<body>
	<div class="container">
	<!-- Paper Information -->
	<header class="paper-header">
	<h1 class="paper-title">MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers</h1>
	<div class="paper-authors">
	<p>Zhenting Wang, Qi Chang, Hemani Patel, Shashank Biju, Cheng-En Wu, Quan Liu, Aolin Ding, Alireza Rezazadeh, Ankit Shah, Yujia Bao, Eugene Siow</p>
	<p class="affiliation">Accenture, UC Berkeley</p>
	</div>
	<div class="paper-links">
	<a href="https://github.com/Accenture/mcp-bench" class="paper-link" target="_blank" rel="noopener noreferrer">
	<i class="fab fa-github"></i> GitHub
	</a>
	<a href="https://arxiv.org/abs/2508.20453" class="paper-link" target="_blank" rel="noopener noreferrer">
	<i class="fas fa-file-pdf"></i> Paper
	</a>
	<a href="#leaderboard" class="paper-link">
	<i class="fas fa-trophy"></i> Leaderboard
	</a>
	</div>
	</header>

	<!-- MCP Diagram -->
	<section class="diagram-section">
	<img src="mcp-bench.png" alt="MCP-Bench Architecture Diagram" class="diagram-image">
	<p class="diagram-caption">
	MCP-Bench is a comprehensive evaluation framework designed to assess Large Language Models' (LLMs) capabilities in tool-use scenarios through the Model Context Protocol (MCP). This benchmark provides an end-to-end pipeline for evaluating how effectively different LLMs can discover, select, and utilize tools to solve real-world tasks.
	</p>
	</section>

	<!-- Ranking Chart -->
	<!-- <section class="chart-section">
	<h2 class="section-title">Performance Ranking</h2>
	<img src="ranking.png" alt="MCP Benchmark Ranking Chart" class="ranking-chart">
	</section> -->

	<!-- Leaderboard Header -->
	<section class="leaderboard-section" id="leaderboard">
	<h2 class="section-title">Detailed Results</h2>

	<div class="controls">
	<div class="search-container">
	<i class="fas fa-search"></i>
	<input type="text" id="searchInput" placeholder="Search models..." class="search-input">
	</div>

	<div class="filter-container">
	<label for="sortSelect">Sort by:</label>
	<select id="sortSelect" class="sort-select">
	<option value="overall_score">Overall Score</option>
	<option value="valid_tool_name_rate">Valid Tool Name Rate</option>
	<option value="schema_compliance">Schema Compliance</option>
	<option value="execution_success">Execution Success</option>
	<option value="task_fulfillment">Task Fulfillment</option>
	<option value="information_grounding">Information Grounding</option>
	<option value="tool_appropriateness">Tool Appropriateness</option>
	<option value="parameter_accuracy">Parameter Accuracy</option>
	<option value="dependency_awareness">Dependency Awareness</option>
	<option value="parallelism_efficiency">Parallelism Efficiency</option>
	</select>

	<button id="sortOrder" class="sort-btn" title="Toggle sort order">
	<i class="fas fa-sort-amount-down"></i>
	</button>
	</div>
	</div>

	<div class="table-container">
	<table class="leaderboard-table" id="leaderboardTable">
	<thead>
	<tr>
	<th class="model-col">
	<strong>Model</strong>
	</th>
	<th class="score-col">
	<strong>Overall Score</strong>
	</th>
	<th class="metric-col">
	Valid Tool<br>Name Rate
	</th>
	<th class="metric-col">
	Schema<br>Compliance
	</th>
	<th class="metric-col">
	Execution<br>Success
	</th>
	<th class="metric-col">
	Task<br>Fulfillment
	</th>
	<th class="metric-col">
	Information<br>Grounding
	</th>
	<th class="metric-col">
	Tool<br>Appropriateness
	</th>
	<th class="metric-col">
	Parameter<br>Accuracy
	</th>
	<th class="metric-col">
	Dependency<br>Awareness
	</th>
	<th class="metric-col">
	Parallelism<br>and Efficiency
	</th>
	</tr>
	</thead>
	<tbody id="tableBody">
	<!-- Table rows will be generated by JavaScript -->
	</tbody>
	</table>
	</div>

	<div class="loading" id="loading">
	<i class="fas fa-spinner fa-spin"></i>
	Loading leaderboard data...
	</div>

	</section>

	<!-- Citation Section -->
	<section class="citation-section">
	<h2 class="section-title">Citation</h2>
	<div class="citation-box">
	<pre class="citation-text">@article{wang2024mcpbench,
	title={MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers},
	author={Wang, Zhenting and Chang, Qi and Patel, Hemani and Biju, Shashank and Wu, Cheng-En and Liu, Quan and Ding, Aolin and Rezazadeh, Alireza and Shah, Ankit and Bao, Yujia and Siow, Eugene},
	journal={arXiv preprint arXiv:2508.20453},
	year={2025}
	}</pre>
	<button class="copy-citation-btn" onclick="copyCitation()">
	<i class="fas fa-copy"></i> Copy Citation
	</button>
	</div>
	</section>

	<footer class="footer">
	<p>Last updated: <span id="lastUpdated"></span></p>
	<p>Data source: MCP-Bench Results (ArXiv: 2508.20453)</p>
	</footer>
	</div>

	<script src="script.js"></script>
	</body>
	</html>