thinkwee commited on
Commit
a8df6b3
·
1 Parent(s): 00d015d

update v1 with LFS for images

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/*.png filter=lfs diff=lfs merge=lfs -text
assets/agency.png ADDED

Git LFS Details

  • SHA256: 1ae7ecf88b6a4d557c1dc2012c135db073e6391229a426767d2486cfaa9b63d8
  • Pointer size: 130 Bytes
  • Size of remote file: 86.8 kB
assets/framework_pipeline.png ADDED

Git LFS Details

  • SHA256: d0131b96b0130149101127f4bff59ba15134b7fff2770d81fcf3ac158a20288d
  • Pointer size: 131 Bytes
  • Size of remote file: 270 kB
assets/framework_task.png ADDED

Git LFS Details

  • SHA256: bfb35c2b2b1ff7aa44d04beee25a6f4299622e49882985bcbb4f639fd765bf33
  • Pointer size: 131 Bytes
  • Size of remote file: 491 kB
assets/hallucination.png ADDED

Git LFS Details

  • SHA256: 8e5ccfac024439a29948249b6d6317b4b2b491367cf09199d7354a863cde8304
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
assets/memory.png ADDED

Git LFS Details

  • SHA256: 19d17c014e18f2b4c45ca02161d6f1724bdbe5bccda9ef067b190e5def2453f6
  • Pointer size: 131 Bytes
  • Size of remote file: 116 kB
assets/overall.png ADDED

Git LFS Details

  • SHA256: d7caca9c5bdd7cde61b67d8641223e665756bf7e7934f0d187268b1d1b972ea3
  • Pointer size: 131 Bytes
  • Size of remote file: 409 kB
assets/qwenfamily.png ADDED

Git LFS Details

  • SHA256: ebd95f453ba7595fd08d471d5c0134926e7bb08d100c545bc7e02f8aa8637938
  • Pointer size: 131 Bytes
  • Size of remote file: 214 kB
assets/reasoning.png ADDED

Git LFS Details

  • SHA256: 7373dc93173c3bd3fb12daab6ff5217cfdd5579ca8c61ba472631cc1c2aa4c9c
  • Pointer size: 130 Bytes
  • Size of remote file: 81.5 kB
assets/trustworthiness.png ADDED

Git LFS Details

  • SHA256: 417702efebe57948c4b3b7c9942ff9d02559d936bdb9cd1fe5259fa7ecaeaab4
  • Pointer size: 130 Bytes
  • Size of remote file: 98.1 kB
charts.js CHANGED
@@ -1,35 +1,34 @@
1
  // DDR-Bench Interactive Charts with Smooth Animations
2
  // Using Plotly.js with animate for smooth transitions
3
 
4
- // Common Plotly layout settings for dark theme
5
- // Common Plotly layout settings for Apple Minimalist theme
6
  const darkLayout = {
7
  paper_bgcolor: 'rgba(0,0,0,0)',
8
  plot_bgcolor: 'rgba(0,0,0,0)',
9
  font: {
10
- family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", "Helvetica Neue", sans-serif',
11
- color: '#000000', // Pure black for max contrast
12
- size: 12 // Base font size increased
13
  },
14
  xaxis: {
15
- gridcolor: '#d1d1d6', // Darker grid lines
16
- linecolor: '#d1d1d6',
17
- tickfont: { color: '#515154', size: 11 }, // Larger and darker ticks
18
- title: { font: { color: '#000000', size: 12, weight: 600 } },
19
- zerolinecolor: '#d1d1d6'
20
  },
21
  yaxis: {
22
- gridcolor: '#d1d1d6',
23
- linecolor: '#d1d1d6',
24
- tickfont: { color: '#515154', size: 11 },
25
- title: { font: { color: '#000000', size: 12, weight: 600 } },
26
- zerolinecolor: '#d1d1d6'
27
  },
28
  legend: {
29
  bgcolor: 'rgba(0,0,0,0)',
30
  bordercolor: 'rgba(0,0,0,0)',
31
  borderwidth: 0,
32
- font: { color: '#000000', size: 11 },
33
  orientation: 'h',
34
  y: 0.99,
35
  x: 0.5,
@@ -38,12 +37,12 @@ const darkLayout = {
38
  },
39
  hoverlabel: {
40
  bgcolor: '#ffffff',
41
- bordercolor: 'rgba(0,0,0,0.1)',
42
- font: { color: '#000000', size: 12 },
43
  namelength: -1
44
  },
45
- hovermode: 'closest', // Highlight closest point/element on hover
46
- margin: { t: 30, r: 20, b: 60, l: 60 } // Increased margins
47
  };
48
 
49
  const plotlyConfig = {
@@ -267,11 +266,14 @@ function initScalingCharts() {
267
 
268
  const yRange = SCALING_Y_RANGES[scenario] || [0, 100];
269
 
 
 
 
270
  const layout = {
271
  ...darkLayout,
272
  xaxis: {
273
  ...darkLayout.xaxis,
274
- title: { text: 'Number of Interaction Turns', font: { size: 11, color: '#1d1d1f' } },
275
  type: 'linear', // ALWAYS LINEAR
276
  range: [-0.05, 1.05], // FIXED RANGE
277
  tickmode: 'array',
@@ -281,8 +283,8 @@ function initScalingCharts() {
281
  },
282
  yaxis: {
283
  ...darkLayout.yaxis,
284
- title: { text: 'Accuracy (%)', font: { size: 11, color: '#1d1d1f' } },
285
- dtick: 5,
286
  range: yRange
287
  },
288
  showlegend: false // Use shared legend instead
@@ -724,11 +726,11 @@ document.addEventListener('DOMContentLoaded', () => {
724
  // ============================================================================
725
  const TURN_DISPLAY_NAMES = {
726
  'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
727
- 'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B',
728
- 'qwen3-next-80b-a3b-instruct-note': 'Qwen3-Next-80BA3B-Note',
729
- 'qwen3-next-80b-a3b-instruct-noreasoning': 'Qwen3-Next-80BA3B-NoR',
730
- 'qwen3-next-80b-a3b-instruct-longreasoning': 'Qwen3-Next-80BA3B-LR',
731
- 'qwen3-next-80b-a3b-instruct-shortreasoning': 'Qwen3-Next-80BA3B-SR',
732
  'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M',
733
  'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M',
734
  'qwen2.5-14B-Instruct': 'Qwen2.5-14B',
@@ -851,8 +853,8 @@ function initTurnCharts() {
851
  ...darkLayout,
852
  xaxis: {
853
  ...darkLayout.xaxis,
854
- title: { text: 'Number of Turns', font: { size: 12, color: '#1d1d1f' } },
855
- range: [0, 100],
856
  dtick: 20
857
  },
858
  yaxis: {
@@ -860,12 +862,13 @@ function initTurnCharts() {
860
  tickmode: 'array',
861
  tickvals: displayData.map((_, i) => i + 0.35),
862
  ticktext: displayData.map(m => getTurnDisplayName(m.model)),
 
863
  automargin: true,
864
  range: [-0.5, displayData.length],
865
  showgrid: false,
866
  zeroline: false
867
  },
868
- margin: { ...darkLayout.margin, l: 140 },
869
  showlegend: false
870
  };
871
 
@@ -1022,7 +1025,7 @@ function initErrorChart() {
1022
  },
1023
  text: data.map(d => `${d.percentage}%`),
1024
  textposition: 'outside',
1025
- textfont: { size: 11, color: '#1d1d1f' },
1026
  hovertemplate: '<b>%{x}</b><br>%{y:.1f}%<br>Count: %{customdata}<extra></extra>',
1027
  customdata: data.map(d => d.count),
1028
  showlegend: false
@@ -1039,7 +1042,7 @@ function initErrorChart() {
1039
  y: maxPct * 1.15,
1040
  text: `<b>${catName}</b>`,
1041
  showarrow: false,
1042
- font: { size: 10, color: '#1d1d1f' },
1043
  xanchor: 'center',
1044
  yanchor: 'bottom'
1045
  });
@@ -1049,12 +1052,12 @@ function initErrorChart() {
1049
  ...darkLayout,
1050
  xaxis: {
1051
  ...darkLayout.xaxis,
1052
- tickangle: -30,
1053
- tickfont: { size: 10, color: '#515154' }
1054
  },
1055
  yaxis: {
1056
  ...darkLayout.yaxis,
1057
- title: { text: 'Percentage (%)', font: { size: 11, color: '#1d1d1f' } },
1058
  range: [0, maxPct * 1.25]
1059
  },
1060
  annotations: annotations,
@@ -1184,13 +1187,13 @@ function renderEntropyCharts(scenario) {
1184
  ...darkLayout,
1185
  xaxis: {
1186
  ...darkLayout.xaxis,
1187
- title: { text: 'Entropy', font: { size: 10, color: '#1d1d1f' } },
1188
  range: [0.6, 1.05],
1189
  dtick: 0.1
1190
  },
1191
  yaxis: {
1192
  ...darkLayout.yaxis,
1193
- title: { text: 'Coverage', font: { size: 10, color: '#1d1d1f' } },
1194
  range: [-0.05, yMax]
1195
  },
1196
  margin: { t: 20, r: 20, b: 50, l: 50 }
 
1
  // DDR-Bench Interactive Charts with Smooth Animations
2
  // Using Plotly.js with animate for smooth transitions
3
 
4
+ // Common Plotly layout settings for DDR-Bench design system
 
5
  const darkLayout = {
6
  paper_bgcolor: 'rgba(0,0,0,0)',
7
  plot_bgcolor: 'rgba(0,0,0,0)',
8
  font: {
9
+ family: "-apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Helvetica Neue', sans-serif",
10
+ color: '#1d1d1f',
11
+ size: 15
12
  },
13
  xaxis: {
14
+ gridcolor: '#d2d2d7',
15
+ linecolor: '#d2d2d7',
16
+ tickfont: { color: '#424245', size: 14 },
17
+ title: { font: { color: '#1d1d1f', size: 15, weight: 600 } },
18
+ zerolinecolor: '#d2d2d7'
19
  },
20
  yaxis: {
21
+ gridcolor: '#d2d2d7',
22
+ linecolor: '#d2d2d7',
23
+ tickfont: { color: '#424245', size: 14 },
24
+ title: { font: { color: '#1d1d1f', size: 15, weight: 600 } },
25
+ zerolinecolor: '#d2d2d7'
26
  },
27
  legend: {
28
  bgcolor: 'rgba(0,0,0,0)',
29
  bordercolor: 'rgba(0,0,0,0)',
30
  borderwidth: 0,
31
+ font: { color: '#1d1d1f', size: 14 },
32
  orientation: 'h',
33
  y: 0.99,
34
  x: 0.5,
 
37
  },
38
  hoverlabel: {
39
  bgcolor: '#ffffff',
40
+ bordercolor: '#d2d2d7',
41
+ font: { color: '#1d1d1f', size: 14 },
42
  namelength: -1
43
  },
44
+ hovermode: 'closest',
45
+ margin: { t: 20, r: 10, b: 40, l: 50 }, // Reduced margins specifically for compact cards
46
  };
47
 
48
  const plotlyConfig = {
 
266
 
267
  const yRange = SCALING_Y_RANGES[scenario] || [0, 100];
268
 
269
+ // Sparse ticks for 10k scenario
270
+ const dtickVal = scenario === '10k' ? 10 : 5;
271
+
272
  const layout = {
273
  ...darkLayout,
274
  xaxis: {
275
  ...darkLayout.xaxis,
276
+ title: { text: 'Number of Interaction Turns', font: { size: 15, color: '#1d1d1f' } }, // Explicit larger font
277
  type: 'linear', // ALWAYS LINEAR
278
  range: [-0.05, 1.05], // FIXED RANGE
279
  tickmode: 'array',
 
283
  },
284
  yaxis: {
285
  ...darkLayout.yaxis,
286
+ title: { text: 'Accuracy (%)', font: { size: 15, color: '#1d1d1f' } }, // Explicit larger font
287
+ dtick: dtickVal,
288
  range: yRange
289
  },
290
  showlegend: false // Use shared legend instead
 
726
  // ============================================================================
727
  const TURN_DISPLAY_NAMES = {
728
  'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
729
+ 'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80A3B',
730
+ 'qwen3-next-80b-a3b-instruct-note': 'Qwen3-Next-80A3B-Note',
731
+ 'qwen3-next-80b-a3b-instruct-noreasoning': 'Qwen3-Next-80A3B-NoR',
732
+ 'qwen3-next-80b-a3b-instruct-longreasoning': 'Qwen3-Next-80A3B-LR',
733
+ 'qwen3-next-80b-a3b-instruct-shortreasoning': 'Qwen3-Next-80A3B-SR',
734
  'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M',
735
  'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M',
736
  'qwen2.5-14B-Instruct': 'Qwen2.5-14B',
 
853
  ...darkLayout,
854
  xaxis: {
855
  ...darkLayout.xaxis,
856
+ title: { text: 'Number of Turns', font: { size: 14, color: '#1d1d1f' } }, // Larger axis title
857
+ range: scenario === 'globem' ? [0, 40] : [0, 80],
858
  dtick: 20
859
  },
860
  yaxis: {
 
862
  tickmode: 'array',
863
  tickvals: displayData.map((_, i) => i + 0.35),
864
  ticktext: displayData.map(m => getTurnDisplayName(m.model)),
865
+ tickfont: { size: 10, color: '#424245' }, // Small font for model names as requested
866
  automargin: true,
867
  range: [-0.5, displayData.length],
868
  showgrid: false,
869
  zeroline: false
870
  },
871
+ margin: { ...darkLayout.margin, l: 85 }, // Reduced left margin for turn chart (was 140)
872
  showlegend: false
873
  };
874
 
 
1025
  },
1026
  text: data.map(d => `${d.percentage}%`),
1027
  textposition: 'outside',
1028
+ textfont: { size: 14, color: '#1d1d1f' }, // Larger bar text
1029
  hovertemplate: '<b>%{x}</b><br>%{y:.1f}%<br>Count: %{customdata}<extra></extra>',
1030
  customdata: data.map(d => d.count),
1031
  showlegend: false
 
1042
  y: maxPct * 1.15,
1043
  text: `<b>${catName}</b>`,
1044
  showarrow: false,
1045
+ font: { size: 13, color: '#1d1d1f' }, // Larger category labels
1046
  xanchor: 'center',
1047
  yanchor: 'bottom'
1048
  });
 
1052
  ...darkLayout,
1053
  xaxis: {
1054
  ...darkLayout.xaxis,
1055
+ tickangle: 0,
1056
+ tickfont: { size: 14, color: '#515154' } // Larger ticks
1057
  },
1058
  yaxis: {
1059
  ...darkLayout.yaxis,
1060
+ title: { text: 'Percentage (%)', font: { size: 15, color: '#1d1d1f' } }, // Larger axis title
1061
  range: [0, maxPct * 1.25]
1062
  },
1063
  annotations: annotations,
 
1187
  ...darkLayout,
1188
  xaxis: {
1189
  ...darkLayout.xaxis,
1190
+ title: { text: 'Entropy', font: { size: 16, color: '#1d1d1f' } }, // Larger
1191
  range: [0.6, 1.05],
1192
  dtick: 0.1
1193
  },
1194
  yaxis: {
1195
  ...darkLayout.yaxis,
1196
+ title: { text: 'Coverage', font: { size: 16, color: '#1d1d1f' } }, // Larger
1197
  range: [-0.05, yMax]
1198
  },
1199
  margin: { t: 20, r: 20, b: 50, l: 50 }
data.js CHANGED
The diff for this file is too large to render. See raw diff
 
index.html CHANGED
@@ -8,10 +8,16 @@
8
  <title>DDR-Bench | Deep Data Research Benchmark</title>
9
  <link rel="preconnect" href="https://fonts.googleapis.com">
10
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
11
- <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
12
  <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
 
 
 
 
 
13
  <script src="data.js" defer></script>
14
  <script src="entropy_data.js" defer></script>
 
 
15
  <script src="charts.js" defer></script>
16
  <link rel="stylesheet" href="styles.css">
17
  <style>
@@ -21,7 +27,7 @@
21
  align-items: center;
22
  justify-content: center;
23
  min-height: 300px;
24
- color: #86868b;
25
  font-size: 14px;
26
  }
27
 
@@ -45,28 +51,25 @@
45
  </head>
46
 
47
  <body>
48
- <!-- Hero Section -->
49
  <header class="hero">
50
  <div class="hero-content">
51
- <div class="badge">🔬 Research Benchmark</div>
52
- <h1>DDR-Bench</h1>
53
- <p class="subtitle">Deep Data Research Agent Benchmark for Large Language Models</p>
54
  <p class="description">
55
- A comprehensive evaluation framework measuring AI agents' ability to conduct deep, iterative data
56
- exploration across medical records (MIMIC), financial filings (10-K), and behavioral data (GLOBEM).
 
57
  </p>
58
- <div class="stats-row">
59
- <div class="stat-item">
60
- <span class="stat-value">22+</span>
61
- <span class="stat-label">Models Evaluated</span>
62
  </div>
63
- <div class="stat-item">
64
- <span class="stat-value">3</span>
65
- <span class="stat-label">Diverse Datasets</span>
66
  </div>
67
- <div class="stat-item">
68
- <span class="stat-value">5</span>
69
- <span class="stat-label">Analysis Dimensions</span>
 
70
  </div>
71
  </div>
72
  </div>
@@ -75,16 +78,179 @@
75
  <!-- Main Content - All sections visible -->
76
  <main class="content">
77
 
78
- <!-- 1. Scaling Analysis Section -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  <section id="scaling" class="section visible">
80
  <div class="section-header">
81
- <h2>📈 Scaling Analysis</h2>
 
 
 
 
 
 
 
 
82
  <p>Explore how model performance scales with interaction turns, token usage, and inference cost.</p>
83
  </div>
84
  <div class="dimension-toggle">
85
- <button class="dim-btn active" data-dim="turn">🔄 Turns</button>
86
- <button class="dim-btn" data-dim="token">📊 Tokens</button>
87
- <button class="dim-btn" data-dim="cost">💰 Cost</button>
88
  </div>
89
  <div id="scaling-legend" class="shared-legend"></div>
90
  <div class="charts-grid three-col">
@@ -106,13 +272,30 @@
106
  <!-- 2. Ranking Comparison Section -->
107
  <section id="ranking" class="section visible">
108
  <div class="section-header">
109
- <h2>🏆 Ranking Comparison</h2>
110
- <p>Novelty (Bradley-Terry) vs Accuracy ranking. = Novelty, ◇ = Accuracy. Purple = Proprietary, Green =
111
- Open-source.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  </div>
113
  <div class="dimension-toggle">
114
- <button class="dim-btn ranking-dim active" data-mode="novelty">🎯 Sort by Novelty</button>
115
- <button class="dim-btn ranking-dim" data-mode="accuracy">📊 Sort by Accuracy</button>
116
  </div>
117
  <div class="charts-grid three-col">
118
 
@@ -135,7 +318,14 @@
135
  <!-- 3. Turn Distribution Section -->
136
  <section id="turn" class="section visible">
137
  <div class="section-header">
138
- <h2>🔄 Turn Distribution</h2>
 
 
 
 
 
 
 
139
  <p>Analyze the distribution of interaction turns across different models and datasets.</p>
140
  </div>
141
  <div class="charts-grid three-col">
@@ -157,7 +347,17 @@
157
  <!-- 4. Entropy Analysis Section -->
158
  <section id="entropy" class="section visible">
159
  <div class="section-header">
160
- <h2>🔬 Entropy Analysis</h2>
 
 
 
 
 
 
 
 
 
 
161
  <p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
162
  = more uniform access; Higher coverage = more fields explored.</p>
163
  </div>
@@ -196,7 +396,15 @@
196
  <!-- 5. Error Analysis Section -->
197
  <section id="error" class="section visible">
198
  <div class="section-header">
199
- <h2>⚠️ Error Analysis</h2>
 
 
 
 
 
 
 
 
200
  <p>Breakdown of error types encountered during agent interactions, grouped by main categories.</p>
201
  </div>
202
  <div class="charts-grid single">
@@ -209,7 +417,14 @@
209
  <!-- 6. Probing Results Section -->
210
  <section id="probing" class="section visible">
211
  <div class="section-header">
212
- <h2>🔍 Probing Results</h2>
 
 
 
 
 
 
 
213
  <p>Analyze the average log probability of FINISH messages across conversation turns and progress.</p>
214
  </div>
215
  <div id="probing-legend" class="shared-legend"></div>
@@ -232,10 +447,122 @@
232
 
233
  <!-- Footer -->
234
  <footer class="footer">
235
- <p>DDR-Bench © 2026 | Deep Data Research Agent Benchmark</p>
236
  </footer>
237
 
238
- <!-- Scripts loaded via defer in head for better parallelization -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  </body>
240
 
241
  </html>
 
8
  <title>DDR-Bench | Deep Data Research Benchmark</title>
9
  <link rel="preconnect" href="https://fonts.googleapis.com">
10
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 
11
  <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
12
+ <script src="https://cdn.jsdelivr.net/npm/marked@4.3.0/marked.min.js"></script>
13
+ <link rel="stylesheet"
14
+ href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css">
15
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
16
+ <script src="https://unpkg.com/sql-formatter@4.0.2/dist/sql-formatter.min.js"></script>
17
  <script src="data.js" defer></script>
18
  <script src="entropy_data.js" defer></script>
19
+ <script src="trajectory_data.js" defer></script>
20
+ <script src="trajectory.js" defer></script>
21
  <script src="charts.js" defer></script>
22
  <link rel="stylesheet" href="styles.css">
23
  <style>
 
27
  align-items: center;
28
  justify-content: center;
29
  min-height: 300px;
30
+ color: var(--color-text-muted, #64748B);
31
  font-size: 14px;
32
  }
33
 
 
51
  </head>
52
 
53
  <body>
 
54
  <header class="hero">
55
  <div class="hero-content">
56
+ <h1>DDR-Bench: Hunt Instead of Wait: Evaluating Deep Data Research on Large Language Models</h1>
 
 
57
  <p class="description">
58
+ An open-ended task requiring LLMs to delve into databases, proactively exploring data and
59
+ identifying insights without predefined questions or objectives. Evaluating investigatory intelligence
60
+ across medical records (MIMIC), financial filings (10-K), and behavioral data (GLOBEM).
61
  </p>
62
+ <div class="meta-info">
63
+ <div class="meta-row authors">
64
+ <span class="meta-item">Wei Liu, Peijie Yu, Michele Orini, Yali Du, Yulan He</span>
 
65
  </div>
66
+ <div class="meta-row affiliations">
67
+ <span class="meta-item">King's College London · Tencent · The Alan Turing Institute</span>
 
68
  </div>
69
+ <div class="meta-row links">
70
+ <span class="meta-item"><a href="https://huggingface.co/spaces/DDR-Bench">🤗 Data</a></span>
71
+ <span class="meta-item"><a href="https://github.com/DDR-Bench">💻 Code</a></span>
72
+ <span class="meta-item"><a href="https://arxiv.org/abs/xxx">📄 Paper</a></span>
73
  </div>
74
  </div>
75
  </div>
 
78
  <!-- Main Content - All sections visible -->
79
  <main class="content">
80
 
81
+ <!-- 1. Framework Overview Section -->
82
+ <section id="framework" class="section visible framework-section">
83
+ <div class="section-header">
84
+ <h2>
85
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
86
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
87
+ <rect width="18" height="18" x="3" y="3" rx="2" ry="2" />
88
+ <line x1="3" x2="21" y1="9" y2="9" />
89
+ <line x1="9" x2="9" y1="21" y2="9" />
90
+ </svg>
91
+ Framework Overview
92
+ </h2>
93
+ <p>System architecture and evaluation pipeline of DDR-Bench.</p>
94
+ </div>
95
+ <div class="framework-grid">
96
+ <div class="framework-card">
97
+ <img src="assets/framework_task.png" alt="Task Formulation Framework"
98
+ style="border-radius: var(--radius-md);">
99
+ <h3>Task Formulation</h3>
100
+ </div>
101
+ <div class="framework-card">
102
+ <img src="assets/framework_pipeline.png" alt="Evaluation Pipeline Framework"
103
+ style="border-radius: var(--radius-md);">
104
+ <h3>Evaluation Pipeline</h3>
105
+ </div>
106
+ </div>
107
+ </section>
108
+
109
+
110
+
111
+ <!-- 1.5. Agent Trajectory Section -->
112
+ <section id="trajectory" class="section visible trajectory-section">
113
+ <div class="section-header">
114
+ <h2>
115
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
116
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
117
+ <polyline points="22 12 18 12 15 21 9 3 6 12 2 12"></polyline>
118
+ </svg>
119
+ Agent Trajectory
120
+ </h2>
121
+ <p>Observe the autonomous decision-making process of the agent across different scenarios.</p>
122
+ </div>
123
+
124
+ <div class="dimension-toggle">
125
+ <button class="dim-btn active" data-traj-scenario="mimic">MIMIC</button>
126
+ <button class="dim-btn" data-traj-scenario="10k">10-K</button>
127
+ <button class="dim-btn" data-traj-scenario="globem">GLOBEM</button>
128
+ </div>
129
+
130
+ <div class="trajectory-container">
131
+ <div id="chat-window" class="chat-window">
132
+ <!-- Messages will be injected here via JS -->
133
+ <div class="loading-message">Loading trajectory data...</div>
134
+ </div>
135
+ </div>
136
+ </section>
137
+
138
+ <!-- 2. Experiment Results Section -->
139
+ <section id="results" class="section visible results-section">
140
+ <div class="section-header">
141
+ <h2>
142
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
143
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
144
+ <path d="M3 3v18h18" />
145
+ <path d="m19 9-5 5-4-4-3 3" />
146
+ </svg>
147
+ Experiment Results
148
+ </h2>
149
+ <p>Main benchmark results and in-depth analysis of agent capabilities.</p>
150
+ </div>
151
+
152
+ <!-- Carousel Container -->
153
+ <div class="carousel-wrapper">
154
+ <button class="carousel-btn carousel-prev" aria-label="Previous">
155
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
156
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
157
+ <path d="m15 18-6-6 6-6" />
158
+ </svg>
159
+ </button>
160
+
161
+ <div class="carousel-track" id="results-carousel">
162
+ <!-- 1. Overall -->
163
+ <div class="carousel-card">
164
+ <img src="assets/overall.png" alt="Overall Performance">
165
+ <h4>Overall Performance</h4>
166
+ <p class="card-caption">Systematic evaluation of mainstream LLMs across MIMIC, 10-K, and GLOBEM
167
+ datasets reveals persistent limitations in frontier models.</p>
168
+ </div>
169
+
170
+ <!-- 2. Qwen Family -->
171
+ <div class="carousel-card">
172
+ <img src="assets/qwenfamily.png" alt="Qwen Family Performance">
173
+ <h4>Qwen Family Analysis</h4>
174
+ <p class="card-caption">Performance scaling and behavioral differences within the Qwen model
175
+ series (Qwen3-Next-80B vs 30B).</p>
176
+ </div>
177
+
178
+ <!-- 3. Reasoning -->
179
+ <div class="carousel-card">
180
+ <img src="assets/reasoning.png" alt="Reasoning Budget">
181
+ <h4>Reasoning Budget</h4>
182
+ <p class="card-caption">Increasing the reasoning budget reduces interaction rounds but
183
+ illustrates
184
+ a
185
+ trade-off between reasoning depth and exploration efficiency.</p>
186
+ </div>
187
+
188
+ <!-- 4. Memory -->
189
+ <div class="carousel-card">
190
+ <img src="assets/memory.png" alt="Memory Mechanism">
191
+ <h4>Memory Mechanism</h4>
192
+ <p class="card-caption">Long-short-term memory can create unpredictable behavior, often
193
+ increasing
194
+ tool usage without consistently improving final accuracy.</p>
195
+ </div>
196
+
197
+ <!-- 5. Agency -->
198
+ <div class="carousel-card">
199
+ <img src="assets/agency.png" alt="Proactive vs Reactive">
200
+ <h4>Proactive vs Reactive</h4>
201
+ <p class="card-caption">Models perform significantly better with explicit queries (Reactive),
202
+ highlighting the difficulty of true proactive goal formulation.</p>
203
+ </div>
204
+
205
+ <!-- 6. Hallucination -->
206
+ <div class="carousel-card">
207
+ <img src="assets/hallucination.png" alt="Hallucination Analysis">
208
+ <h4>Hallucination Analysis</h4>
209
+ <p class="card-caption">Hallucination rates show almost no correlation with final accuracy,
210
+ indicating
211
+ robustness against metric inflation via memorization.</p>
212
+ </div>
213
+
214
+ <!-- 7. Trustworthiness -->
215
+ <div class="carousel-card">
216
+ <img src="assets/trustworthiness.png" alt="Trustworthiness">
217
+ <h4>Trustworthiness</h4>
218
+ <p class="card-caption">Verification of the LLM-as-a-Checker pipeline demonstrating high
219
+ alignment
220
+ with human expert judgments.</p>
221
+ </div>
222
+ </div>
223
+
224
+ <button class="carousel-btn carousel-next" aria-label="Next">
225
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
226
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
227
+ <path d="m9 18 6-6-6-6" />
228
+ </svg>
229
+ </button>
230
+ </div>
231
+
232
+ <!-- Carousel Dots -->
233
+ <div class="carousel-dots" id="results-dots"></div>
234
+ </section>
235
+
236
+ <!-- 3. Scaling Analysis Section -->
237
  <section id="scaling" class="section visible">
238
  <div class="section-header">
239
+ <h2>
240
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
241
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
242
+ <line x1="12" x2="12" y1="20" y2="10" />
243
+ <line x1="18" x2="18" y1="20" y2="4" />
244
+ <line x1="6" x2="6" y1="20" y2="16" />
245
+ </svg>
246
+ Scaling Analysis
247
+ </h2>
248
  <p>Explore how model performance scales with interaction turns, token usage, and inference cost.</p>
249
  </div>
250
  <div class="dimension-toggle">
251
+ <button class="dim-btn active" data-dim="turn">Turns</button>
252
+ <button class="dim-btn" data-dim="token">Tokens</button>
253
+ <button class="dim-btn" data-dim="cost">Cost</button>
254
  </div>
255
  <div id="scaling-legend" class="shared-legend"></div>
256
  <div class="charts-grid three-col">
 
272
  <!-- 2. Ranking Comparison Section -->
273
  <section id="ranking" class="section visible">
274
  <div class="section-header">
275
+ <h2>
276
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
277
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
278
+ <path d="M6 9H4.5a2.5 2.5 0 0 1 0-5H6" />
279
+ <path d="M18 9h1.5a2.5 2.5 0 0 0 0-5H18" />
280
+ <path d="M4 22h16" />
281
+ <path d="M10 14.66V17c0 .55-.47.98-.97 1.21C7.85 18.75 7 20.24 7 22" />
282
+ <path d="M14 14.66V17c0 .55.47.98.97 1.21C16.15 18.75 17 20.24 17 22" />
283
+ <path d="M18 2H6v7a6 6 0 0 0 12 0V2Z" />
284
+ </svg>
285
+ Ranking Comparison
286
+ </h2>
287
+ <p>
288
+ Novelty (Bradley-Terry) vs Accuracy ranking
289
+ <br>
290
+ ● = Novelty, ◇ = Accuracy.
291
+ <br>
292
+ <span class="model-badge proprietary">Purple = Proprietary</span>
293
+ <span class="model-badge opensource">Green = Open-source</span>
294
+ </p>
295
  </div>
296
  <div class="dimension-toggle">
297
+ <button class="dim-btn ranking-dim active" data-mode="novelty">Sort by Novelty</button>
298
+ <button class="dim-btn ranking-dim" data-mode="accuracy">Sort by Accuracy</button>
299
  </div>
300
  <div class="charts-grid three-col">
301
 
 
318
  <!-- 3. Turn Distribution Section -->
319
  <section id="turn" class="section visible">
320
  <div class="section-header">
321
+ <h2>
322
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
323
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
324
+ <path d="M21 12a9 9 0 1 1-9-9c2.52 0 4.93 1 6.74 2.74L21 8" />
325
+ <path d="M21 3v5h-5" />
326
+ </svg>
327
+ Turn Distribution
328
+ </h2>
329
  <p>Analyze the distribution of interaction turns across different models and datasets.</p>
330
  </div>
331
  <div class="charts-grid three-col">
 
347
  <!-- 4. Entropy Analysis Section -->
348
  <section id="entropy" class="section visible">
349
  <div class="section-header">
350
+ <h2>
351
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
352
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
353
+ <circle cx="7.5" cy="7.5" r="1.5" />
354
+ <circle cx="18.5" cy="5.5" r="1.5" />
355
+ <circle cx="11.5" cy="11.5" r="1.5" />
356
+ <circle cx="7.5" cy="16.5" r="1.5" />
357
+ <circle cx="17.5" cy="14.5" r="1.5" />
358
+ </svg>
359
+ Entropy Analysis
360
+ </h2>
361
  <p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
362
  = more uniform access; Higher coverage = more fields explored.</p>
363
  </div>
 
396
  <!-- 5. Error Analysis Section -->
397
  <section id="error" class="section visible">
398
  <div class="section-header">
399
+ <h2>
400
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
401
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
402
+ <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z" />
403
+ <line x1="12" x2="12" y1="9" y2="13" />
404
+ <line x1="12" x2="12.01" y1="17" y2="17" />
405
+ </svg>
406
+ Error Analysis
407
+ </h2>
408
  <p>Breakdown of error types encountered during agent interactions, grouped by main categories.</p>
409
  </div>
410
  <div class="charts-grid single">
 
417
  <!-- 6. Probing Results Section -->
418
  <section id="probing" class="section visible">
419
  <div class="section-header">
420
+ <h2>
421
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none"
422
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
423
+ <circle cx="11" cy="11" r="8" />
424
+ <path d="m21 21-4.3-4.3" />
425
+ </svg>
426
+ Probing Results
427
+ </h2>
428
  <p>Analyze the average log probability of FINISH messages across conversation turns and progress.</p>
429
  </div>
430
  <div id="probing-legend" class="shared-legend"></div>
 
447
 
448
  <!-- Footer -->
449
  <footer class="footer">
450
+ <p>DDR-Bench © 2026 | King's College London · Tencent · The Alan Turing Institute</p>
451
  </footer>
452
 
453
+ <!-- Carousel Script -->
454
+ <script>
455
+ document.addEventListener('DOMContentLoaded', function () {
456
+ const track = document.getElementById('results-carousel');
457
+ const dotsContainer = document.getElementById('results-dots');
458
+ const prevBtn = document.querySelector('.carousel-prev');
459
+ const nextBtn = document.querySelector('.carousel-next');
460
+
461
+ if (!track) return;
462
+
463
+ const cards = Array.from(track.querySelectorAll('.carousel-card'));
464
+ const cardCount = cards.length;
465
+ let currentIndex = 0;
466
+
467
+ // Create dots
468
+ for (let i = 0; i < cardCount; i++) {
469
+ const dot = document.createElement('button');
470
+ dot.className = 'carousel-dot' + (i === 0 ? ' active' : '');
471
+ dot.setAttribute('aria-label', `Go to slide ${i + 1}`);
472
+ dot.addEventListener('click', () => goToSlide(i));
473
+ dotsContainer.appendChild(dot);
474
+ }
475
+
476
+ const dots = dotsContainer.querySelectorAll('.carousel-dot');
477
+
478
+ function updateCarousel() {
479
+ // Card is 66.666% width, calculate offset to center current card
480
+ const cardWidth = track.offsetWidth * 0.66666;
481
+ const gap = 32; // --space-lg
482
+ const offset = (track.offsetWidth - cardWidth) / 2 - currentIndex * (cardWidth + gap);
483
+
484
+ track.style.transform = `translateX(${offset}px)`;
485
+
486
+ // Update card states
487
+ cards.forEach((card, i) => {
488
+ card.classList.remove('active', 'side');
489
+ if (i === currentIndex) {
490
+ card.classList.add('active');
491
+ } else {
492
+ card.classList.add('side');
493
+ }
494
+ });
495
+
496
+ // Update dots
497
+ dots.forEach((dot, i) => {
498
+ dot.classList.toggle('active', i === currentIndex);
499
+ });
500
+ }
501
+
502
+ function goToSlide(index) {
503
+ // Infinite loop
504
+ if (index < 0) {
505
+ currentIndex = cardCount - 1;
506
+ } else if (index >= cardCount) {
507
+ currentIndex = 0;
508
+ } else {
509
+ currentIndex = index;
510
+ }
511
+ updateCarousel();
512
+ }
513
+
514
+ function nextSlide() { goToSlide(currentIndex + 1); }
515
+ function prevSlide() { goToSlide(currentIndex - 1); }
516
+
517
+ // Navigation buttons
518
+ prevBtn.addEventListener('click', prevSlide);
519
+ nextBtn.addEventListener('click', nextSlide);
520
+
521
+ // Keyboard navigation
522
+ document.addEventListener('keydown', (e) => {
523
+ if (e.key === 'ArrowLeft') prevSlide();
524
+ if (e.key === 'ArrowRight') nextSlide();
525
+ });
526
+
527
+ // Touch/swipe support
528
+ let touchStartX = 0;
529
+ track.addEventListener('touchstart', (e) => {
530
+ touchStartX = e.changedTouches[0].screenX;
531
+ }, { passive: true });
532
+
533
+ track.addEventListener('touchend', (e) => {
534
+ const diff = touchStartX - e.changedTouches[0].screenX;
535
+ if (Math.abs(diff) > 50) {
536
+ if (diff > 0) nextSlide();
537
+ else prevSlide();
538
+ }
539
+ }, { passive: true });
540
+
541
+ // Auto Play Logic
542
+ let autoPlayInterval;
543
+ const AUTO_PLAY_DELAY = 5000;
544
+
545
+ function startAutoPlay() {
546
+ stopAutoPlay(); // Clear existing to specify
547
+ autoPlayInterval = setInterval(nextSlide, AUTO_PLAY_DELAY);
548
+ }
549
+
550
+ function stopAutoPlay() {
551
+ if (autoPlayInterval) clearInterval(autoPlayInterval);
552
+ }
553
+
554
+ // Pause on hover
555
+ const carouselWrapper = document.querySelector('.carousel-wrapper');
556
+ if (carouselWrapper) {
557
+ carouselWrapper.addEventListener('mouseenter', stopAutoPlay);
558
+ carouselWrapper.addEventListener('mouseleave', startAutoPlay);
559
+ }
560
+
561
+ // Initialize
562
+ updateCarousel();
563
+ startAutoPlay();
564
+ });
565
+ </script>
566
  </body>
567
 
568
  </html>
styles.css CHANGED
@@ -1,48 +1,91 @@
1
- /* Apple Style Minimalist Theme */
 
 
 
 
 
 
 
2
  :root {
3
- --primary: #0071e3;
4
- /* Apple Blue */
5
- --primary-hover: #0077ed;
6
- --bg-body: #f5f5f7;
7
- /* Light grey background */
8
- --bg-card: #ffffff;
9
- --text-primary: #1d1d1f;
10
- /* Apple Black */
11
- --text-secondary: #515154;
12
- /* Darker grey */
13
- --border: #d2d2d7;
14
- --shadow-card: 0 8px 30px rgba(0, 0, 0, 0.08);
15
- /* Stronger shadow */
16
- --radius-card: 20px;
17
- --radius-btn: 980px;
18
- /* Capsule */
19
- --font-stack: "SF Pro Text", "SF Pro Icons", "Helvetica Neue", "Helvetica", "Arial", sans-serif;
20
- }
21
-
22
- /* Reset & Base */
23
- *,
24
- *::before,
25
- *::after {
26
- box-sizing: border-box;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  margin: 0;
28
  padding: 0;
 
 
 
 
 
 
 
29
  }
30
 
31
  body {
32
- font-family: var(--font-stack);
33
- background-color: var(--bg-body);
34
- color: var(--text-primary);
35
  line-height: 1.47059;
36
  letter-spacing: -0.022em;
37
  min-height: 100vh;
38
- -webkit-font-smoothing: antialiased;
39
  }
40
 
41
- /* Hero Section */
 
 
42
  .hero {
43
- padding: 4rem 2rem 2rem;
 
44
  text-align: center;
45
- background: var(--bg-body);
46
  }
47
 
48
  .hero-content {
@@ -51,163 +94,218 @@ body {
51
  }
52
 
53
  .badge {
54
- display: inline-block;
55
- color: var(--primary);
 
56
  font-size: 12px;
57
  font-weight: 600;
58
- margin-bottom: 0.8rem;
59
- letter-spacing: 0.05em;
60
  text-transform: uppercase;
 
 
 
 
 
 
 
61
  }
62
 
63
  .hero h1 {
64
- font-size: 48px;
65
- line-height: 1.08349;
66
  font-weight: 700;
67
- letter-spacing: -0.003em;
68
- margin-bottom: 0.5rem;
69
- color: var(--text-primary);
 
70
  }
71
 
72
- .subtitle {
73
  font-size: 24px;
74
- line-height: 1.16667;
75
- font-weight: 400;
76
- letter-spacing: 0.009em;
77
- color: var(--text-primary);
78
- margin-bottom: 1rem;
79
  }
80
 
81
- .description {
82
  font-size: 17px;
83
- line-height: 1.47059;
84
- font-weight: 400;
85
- letter-spacing: -0.022em;
86
- color: var(--text-secondary);
87
- max-width: 700px;
88
- margin: 0 auto 2rem;
89
  }
90
 
91
- .stats-row {
 
 
92
  display: flex;
 
 
 
 
 
 
 
 
 
 
 
 
93
  justify-content: center;
94
- gap: 3rem;
95
- margin-top: 2rem;
96
  }
97
 
98
- .stat-item {
99
- text-align: center;
 
 
100
  }
101
 
102
- .stat-value {
103
- display: block;
104
- font-size: 28px;
105
- font-weight: 600;
106
- color: var(--text-primary);
107
  }
108
 
109
- .stat-label {
110
- font-size: 13px;
111
- color: var(--text-secondary);
112
- font-weight: 500;
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
 
115
- /* Main Content */
 
 
116
  .content {
117
- max-width: 1800px;
118
- /* Maximize width for charts */
119
  margin: 0 auto;
120
- padding: 2rem;
121
  }
122
 
123
- /* Sections */
 
 
124
  .section {
125
- margin-bottom: 4rem;
 
126
  }
127
 
128
  .section-header {
129
- margin-bottom: 2rem;
130
  text-align: center;
 
131
  }
132
 
133
  .section-header h2 {
134
- font-size: 32px;
135
- line-height: 1.125;
136
- font-weight: 700;
137
- letter-spacing: 0.004em;
138
- margin-bottom: 0.5rem;
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
 
141
  .section-header p {
142
  font-size: 17px;
143
- color: var(--text-secondary);
 
 
 
144
  }
145
 
146
- /* Toggle Buttons */
147
- .dimension-toggle {
148
- display: flex;
149
- justify-content: center;
150
- gap: 1rem;
151
- margin-bottom: 1.5rem;
 
 
152
  }
153
 
154
- .dim-btn {
155
- padding: 8px 16px;
156
- background: rgba(0, 0, 0, 0.05);
157
- border: none;
158
- border-radius: var(--radius-btn);
159
- color: var(--text-primary);
160
- font-size: 14px;
161
- font-weight: 400;
162
- cursor: pointer;
163
- transition: all 0.2s ease;
164
- font-family: inherit;
165
  }
166
 
167
- .dim-btn:hover {
168
- background: rgba(0, 0, 0, 0.1);
 
169
  }
170
 
171
- .dim-btn.active {
172
- background: var(--text-primary);
173
- /* Black active state like Apple */
174
- color: white;
 
 
 
 
175
  }
176
 
 
177
  .toggle-btn {
 
 
 
178
  padding: 10px 20px;
179
- background: rgba(0, 0, 0, 0.05);
180
- border: none;
181
- border-radius: var(--radius-btn);
182
- color: var(--text-primary);
183
  font-size: 14px;
184
  font-weight: 500;
185
  cursor: pointer;
186
- transition: all 0.3s ease;
187
- font-family: inherit;
188
  }
189
 
 
190
  .toggle-btn:hover {
191
- background: rgba(0, 0, 0, 0.12);
 
192
  }
193
 
 
194
  .toggle-btn.active {
195
- background: var(--text-primary);
196
- color: white;
197
- box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
198
  }
199
 
200
- /* Shared Legend for Scaling and Probing */
 
 
201
  .shared-legend {
202
  display: flex;
203
  justify-content: center;
204
- flex-wrap: wrap;
205
- gap: 1.5rem;
206
- margin-bottom: 1.5rem;
207
- padding: 1rem;
208
- background: var(--bg-card);
209
- border-radius: 12px;
210
- box-shadow: var(--shadow-card);
211
  }
212
 
213
  .legend-item {
@@ -215,120 +313,625 @@ body {
215
  align-items: center;
216
  gap: 0.5rem;
217
  font-size: 13px;
218
- color: var(--text-primary);
219
  }
220
 
221
  .legend-color {
222
- width: 24px;
223
  height: 3px;
224
  border-radius: 2px;
225
  }
226
 
227
- /* Charts Grid */
 
 
228
  .charts-grid {
229
  display: grid;
230
- gap: 16px;
231
- /* Tighter gap */
232
  }
233
 
234
  .charts-grid.three-col {
235
  grid-template-columns: repeat(3, 1fr);
236
  }
237
 
 
 
 
 
238
  .charts-grid.single {
239
  grid-template-columns: 1fr;
240
- max-width: 1000px;
241
- margin: 0 auto;
242
  }
243
 
244
- /* Chart Card */
 
 
245
  .chart-card {
246
- background: var(--bg-card);
247
- border-radius: var(--radius-card);
248
- padding: 24px;
 
 
 
249
  box-shadow: var(--shadow-card);
250
- transition: transform 0.3s ease, box-shadow 0.3s ease;
251
  }
252
 
253
  .chart-card:hover {
254
- box-shadow: 0 8px 24px rgba(0, 0, 0, 0.08);
 
255
  }
256
 
257
  .chart-card h3 {
258
- font-size: 14px;
 
259
  font-weight: 600;
260
- color: var(--text-secondary);
261
- margin-bottom: 1rem;
262
  text-align: center;
263
  text-transform: uppercase;
264
- letter-spacing: 0.05em;
 
265
  }
266
 
267
  .chart-card.wide {
268
- padding: 32px;
269
  }
270
 
271
- /* Chart Containers */
 
 
272
  .chart-container {
273
- height: 300px;
274
- /* Reduced height */
275
  width: 100%;
276
- transition: opacity 0.3s ease;
277
  }
278
 
279
  .chart-container-tall {
280
- height: 450px;
281
- /* Reduced height */
282
- transition: opacity 0.3s ease;
283
  width: 100%;
284
  }
285
 
286
  .chart-container-double {
287
- height: 600px;
288
- /* Double height for error analysis */
289
  width: 100%;
290
  }
291
 
292
- /* Footer */
293
- .footer {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  text-align: center;
295
- padding: 3rem 1rem;
296
- color: var(--text-secondary);
297
- font-size: 12px;
298
- background: var(--bg-body);
299
- border-top: 1px solid var(--border);
300
  }
301
 
302
- /* Responsive */
303
- @media (max-width: 1024px) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  .charts-grid.three-col {
305
  grid-template-columns: repeat(2, 1fr);
306
  }
307
  }
308
 
309
- @media (max-width: 768px) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  .hero h1 {
311
  font-size: 36px;
312
  }
313
 
314
- .subtitle {
315
  font-size: 20px;
316
  }
317
 
318
- .charts-grid.three-col {
319
- grid-template-columns: 1fr;
320
  }
321
 
322
- .chart-container {
323
- height: 300px;
324
  }
325
 
326
- .chart-container-tall {
327
- height: 400px;
 
 
328
  }
329
  }
330
 
331
- /* Plotly Overrides */
332
- .js-plotly-plot .plotly .modebar {
333
- display: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  }
 
1
+ /* DDR-Bench Dashboard - Apple-Inspired Design System */
2
+ /* Full-width layout with landscape charts */
3
+
4
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
5
+
6
+ /* ========================= */
7
+ /* DESIGN TOKENS */
8
+ /* ========================= */
9
  :root {
10
+ /* Color Palette - Apple Inspired */
11
+ --color-text: #1d1d1f;
12
+ --color-text-secondary: #424245;
13
+ --color-text-muted: #6e6e73;
14
+ --color-background: #f5f5f7;
15
+ /* Reverted to original light gray */
16
+ --color-surface: #ffffff;
17
+ /* Reverted to pure white cards */
18
+ --color-border: #d2d2d7;
19
+ /* Reverted border color */
20
+
21
+ /* Accent Colors */
22
+ --color-primary: #0071e3;
23
+ --color-primary-hover: #0077ed;
24
+
25
+ /* Model Type Colors */
26
+ --color-proprietary: #7c3aed;
27
+ --color-proprietary-bg: rgba(124, 58, 237, 0.08);
28
+ --color-opensource: #059669;
29
+ --color-opensource-bg: rgba(5, 150, 105, 0.08);
30
+
31
+ /* Shadows - Soft and premium */
32
+ /* Shadows - Flat & Light */
33
+ --shadow-sm: none;
34
+ --shadow-card: 0 1px 3px rgba(0, 0, 0, 0.02);
35
+ /* Very subtle shadow */
36
+ --shadow-card-hover: 0 4px 12px rgba(0, 0, 0, 0.05);
37
+ /* Gentle hover lift */
38
+
39
+ /* Spacing */
40
+ --space-xs: 0.5rem;
41
+ --space-sm: 1rem;
42
+ --space-md: 1.5rem;
43
+ --space-lg: 2rem;
44
+ --space-xl: 3rem;
45
+ --space-2xl: 4rem;
46
+
47
+ /* Border Radius */
48
+ --radius-sm: 8px;
49
+ --radius-md: 12px;
50
+ --radius-lg: 16px;
51
+ --radius-pill: 980px;
52
+
53
+ /* Typography */
54
+ --font-system: -apple-system, BlinkMacSystemFont, "SF Pro Display", "SF Pro Text", "Helvetica Neue", "Segoe UI", system-ui, sans-serif;
55
+ --font-mono: "SF Mono", SFMono-Regular, ui-monospace, Menlo, Monaco, Consolas, monospace;
56
+ }
57
+
58
+ /* ========================= */
59
+ /* BASE STYLES */
60
+ /* ========================= */
61
+ * {
62
  margin: 0;
63
  padding: 0;
64
+ box-sizing: border-box;
65
+ }
66
+
67
+ html {
68
+ font-size: 17px;
69
+ -webkit-font-smoothing: antialiased;
70
+ -moz-osx-font-smoothing: grayscale;
71
  }
72
 
73
  body {
74
+ font-family: var(--font-system);
75
+ background: var(--color-background);
76
+ color: var(--color-text);
77
  line-height: 1.47059;
78
  letter-spacing: -0.022em;
79
  min-height: 100vh;
 
80
  }
81
 
82
+ /* ========================= */
83
+ /* HERO SECTION */
84
+ /* ========================= */
85
  .hero {
86
+ background: linear-gradient(180deg, #ffffff 0%, var(--color-background) 100%);
87
+ padding: var(--space-2xl) var(--space-lg);
88
  text-align: center;
 
89
  }
90
 
91
  .hero-content {
 
94
  }
95
 
96
  .badge {
97
+ display: inline-flex;
98
+ align-items: center;
99
+ gap: 0.5rem;
100
  font-size: 12px;
101
  font-weight: 600;
102
+ letter-spacing: 0.02em;
 
103
  text-transform: uppercase;
104
+ color: var(--color-primary);
105
+ margin-bottom: var(--space-md);
106
+ }
107
+
108
+ .badge svg {
109
+ width: 16px;
110
+ height: 16px;
111
  }
112
 
113
  .hero h1 {
114
+ font-size: 56px;
 
115
  font-weight: 700;
116
+ letter-spacing: -0.035em;
117
+ line-height: 1.05;
118
+ margin-bottom: var(--space-sm);
119
+ color: var(--color-text);
120
  }
121
 
122
+ .hero .subtitle {
123
  font-size: 24px;
124
+ font-weight: 500;
125
+ color: var(--color-text-secondary);
126
+ letter-spacing: -0.02em;
127
+ margin-bottom: var(--space-md);
 
128
  }
129
 
130
+ .hero .description {
131
  font-size: 17px;
132
+ line-height: 1.5;
133
+ color: var(--color-text-muted);
134
+ max-width: 720px;
135
+ margin: 0 auto var(--space-lg);
 
 
136
  }
137
 
138
+ /* Meta Information */
139
+ /* Meta Information */
140
+ .meta-info {
141
  display: flex;
142
+ flex-direction: column;
143
+ /* Stack vertically as requested */
144
+ align-items: center;
145
+ gap: var(--space-sm);
146
+ margin-top: var(--space-md);
147
+ padding-top: var(--space-md);
148
+ border-top: 1px solid var(--color-border);
149
+ }
150
+
151
+ .meta-row {
152
+ display: flex;
153
+ flex-wrap: wrap;
154
  justify-content: center;
155
+ gap: var(--space-md);
 
156
  }
157
 
158
+ .meta-row.authors {
159
+ font-size: 16px;
160
+ font-weight: 500;
161
+ color: var(--color-text);
162
  }
163
 
164
+ .meta-row.affiliations {
165
+ font-size: 15px;
166
+ color: var(--color-text-secondary);
 
 
167
  }
168
 
169
+ .meta-row.links {
170
+ margin-top: var(--space-xs);
171
+ }
172
+
173
+ .meta-item {
174
+ font-size: inherit;
175
+ /* Inherit from row */
176
+ color: inherit;
177
+ }
178
+
179
+ .meta-item a {
180
+ color: var(--color-primary);
181
+ text-decoration: none;
182
+ }
183
+
184
+ .meta-item a:hover {
185
+ text-decoration: underline;
186
  }
187
 
188
+ /* ========================= */
189
+ /* MAIN CONTENT */
190
+ /* ========================= */
191
  .content {
192
+ max-width: 1600px;
193
+ width: 95%;
194
  margin: 0 auto;
195
+ padding: 0 var(--space-md) var(--space-2xl);
196
  }
197
 
198
+ /* ========================= */
199
+ /* SECTION STYLES */
200
+ /* ========================= */
201
  .section {
202
+ margin-bottom: var(--space-2xl);
203
+ padding-top: var(--space-lg);
204
  }
205
 
206
  .section-header {
 
207
  text-align: center;
208
+ margin-bottom: var(--space-md);
209
  }
210
 
211
  .section-header h2 {
212
+ display: inline-flex;
213
+ align-items: center;
214
+ justify-content: center;
215
+ gap: 0.75rem;
216
+ font-size: 36px;
217
+ font-weight: 600;
218
+ letter-spacing: -0.025em;
219
+ line-height: 1.1;
220
+ margin-bottom: var(--space-xs);
221
+ color: var(--color-text);
222
+ }
223
+
224
+ .section-header h2 svg {
225
+ width: 36px;
226
+ height: 36px;
227
+ color: var(--color-text-secondary);
228
  }
229
 
230
  .section-header p {
231
  font-size: 17px;
232
+ color: var(--color-text-muted);
233
+ max-width: 680px;
234
+ margin: 0 auto;
235
+ line-height: 1.5;
236
  }
237
 
238
+ /* Color-coded model type badges */
239
+ .section-header .model-badge {
240
+ display: inline-flex;
241
+ align-items: center;
242
+ padding: 2px 10px;
243
+ border-radius: var(--radius-pill);
244
+ font-size: 14px;
245
+ font-weight: 600;
246
  }
247
 
248
+ .section-header .model-badge.proprietary {
249
+ background: var(--color-proprietary-bg);
250
+ color: var(--color-proprietary);
 
 
 
 
 
 
 
 
251
  }
252
 
253
+ .section-header .model-badge.opensource {
254
+ background: var(--color-opensource-bg);
255
+ color: var(--color-opensource);
256
  }
257
 
258
+ /* ========================= */
259
+ /* DIMENSION TOGGLE */
260
+ /* ========================= */
261
+ .dimension-toggle {
262
+ display: flex;
263
+ justify-content: center;
264
+ gap: 10px;
265
+ margin-bottom: var(--space-md);
266
  }
267
 
268
+ .dim-btn,
269
  .toggle-btn {
270
+ display: inline-flex;
271
+ align-items: center;
272
+ justify-content: center;
273
  padding: 10px 20px;
274
+ background: var(--color-surface);
275
+ border: 1px solid var(--color-border);
276
+ border-radius: var(--radius-pill);
277
+ color: var(--color-text);
278
  font-size: 14px;
279
  font-weight: 500;
280
  cursor: pointer;
281
+ transition: all 0.2s ease;
282
+ font-family: var(--font-system);
283
  }
284
 
285
+ .dim-btn:hover,
286
  .toggle-btn:hover {
287
+ background: var(--color-background);
288
+ border-color: var(--color-text-muted);
289
  }
290
 
291
+ .dim-btn.active,
292
  .toggle-btn.active {
293
+ background: var(--color-text);
294
+ border-color: var(--color-text);
295
+ color: #ffffff;
296
  }
297
 
298
+ /* ========================= */
299
+ /* SHARED LEGEND */
300
+ /* ========================= */
301
  .shared-legend {
302
  display: flex;
303
  justify-content: center;
304
+ gap: var(--space-md);
305
+ margin-bottom: var(--space-sm);
306
+ padding: var(--space-xs) var(--space-md);
307
+ background: var(--color-surface);
308
+ border-radius: var(--radius-md);
 
 
309
  }
310
 
311
  .legend-item {
 
313
  align-items: center;
314
  gap: 0.5rem;
315
  font-size: 13px;
316
+ color: var(--color-text-secondary);
317
  }
318
 
319
  .legend-color {
320
+ width: 12px;
321
  height: 3px;
322
  border-radius: 2px;
323
  }
324
 
325
+ /* ========================= */
326
+ /* CHARTS GRID */
327
+ /* ========================= */
328
  .charts-grid {
329
  display: grid;
330
+ gap: var(--space-md);
 
331
  }
332
 
333
  .charts-grid.three-col {
334
  grid-template-columns: repeat(3, 1fr);
335
  }
336
 
337
+ .charts-grid.two-col {
338
+ grid-template-columns: repeat(2, 1fr);
339
+ }
340
+
341
  .charts-grid.single {
342
  grid-template-columns: 1fr;
 
 
343
  }
344
 
345
+ /* ========================= */
346
+ /* CHART CARD */
347
+ /* ========================= */
348
  .chart-card {
349
+ background: var(--color-surface);
350
+ border-radius: var(--radius-lg);
351
+ padding: 1rem;
352
+ /* Keeping compact padding */
353
+ border: 1px solid rgba(0, 0, 0, 0.03);
354
+ /* Extremely subtle border */
355
  box-shadow: var(--shadow-card);
356
+ transition: all 0.2s ease;
357
  }
358
 
359
  .chart-card:hover {
360
+ box-shadow: var(--shadow-card-hover);
361
+ transform: translateY(-2px);
362
  }
363
 
364
  .chart-card h3 {
365
+ font-family: var(--font-mono);
366
+ font-size: 12px;
367
  font-weight: 600;
368
+ color: var(--color-text-secondary);
 
369
  text-align: center;
370
  text-transform: uppercase;
371
+ letter-spacing: 0.08em;
372
+ margin-bottom: var(--space-xs);
373
  }
374
 
375
  .chart-card.wide {
376
+ padding: var(--space-md);
377
  }
378
 
379
+ /* ========================= */
380
+ /* CHART CONTAINERS */
381
+ /* ========================= */
382
  .chart-container {
383
+ height: 280px;
 
384
  width: 100%;
 
385
  }
386
 
387
  .chart-container-tall {
388
+ height: 380px;
 
 
389
  width: 100%;
390
  }
391
 
392
  .chart-container-double {
393
+ height: 450px;
 
394
  width: 100%;
395
  }
396
 
397
+ /* ========================= */
398
+ /* FRAMEWORK SECTION */
399
+ /* ========================= */
400
+ .framework-section {
401
+ margin-bottom: var(--space-2xl);
402
+ }
403
+
404
+ .framework-grid {
405
+ display: grid;
406
+ grid-template-columns: 1fr;
407
+ gap: var(--space-xl);
408
+ justify-items: center;
409
+ }
410
+
411
+ .framework-card {
412
+ background: var(--color-surface);
413
+ border-radius: var(--radius-lg);
414
+ padding: var(--space-md);
415
+ box-shadow: var(--shadow-card);
416
+ display: flex;
417
+ flex-direction: column;
418
+ align-items: center;
419
+ width: 100%;
420
+ max-width: 1000px;
421
+ }
422
+
423
+ .framework-card .placeholder {
424
+ width: 100%;
425
+ height: 640px;
426
+ background: linear-gradient(135deg, #f0f0f2 0%, #e8e8ed 100%);
427
+ border-radius: var(--radius-md);
428
+ display: flex;
429
+ align-items: center;
430
+ justify-content: center;
431
+ color: var(--color-text-muted);
432
+ font-size: 14px;
433
+ font-weight: 500;
434
+ border: 2px dashed var(--color-border);
435
+ }
436
+
437
+ .framework-card h3 {
438
+ font-size: 15px;
439
+ font-weight: 600;
440
+ color: var(--color-text);
441
+ margin-top: var(--space-sm);
442
  text-align: center;
 
 
 
 
 
443
  }
444
 
445
+ .framework-card img,
446
+ .framework-card object {
447
+ width: 100%;
448
+ height: auto;
449
+ object-fit: contain;
450
+ border-radius: var(--radius-md);
451
+ }
452
+
453
+ /* ========================= */
454
+ /* CAROUSEL / RESULTS */
455
+ /* ========================= */
456
+ .carousel-wrapper {
457
+ position: relative;
458
+ overflow: hidden;
459
+ }
460
+
461
+ /* Gradient fade overlays */
462
+ .carousel-wrapper::before,
463
+ .carousel-wrapper::after {
464
+ content: '';
465
+ position: absolute;
466
+ top: 0;
467
+ bottom: 0;
468
+ width: 120px;
469
+ z-index: 2;
470
+ pointer-events: none;
471
+ }
472
+
473
+ .carousel-wrapper::before {
474
+ left: 0;
475
+ background: linear-gradient(to right, var(--color-background) 0%, transparent 100%);
476
+ }
477
+
478
+ .carousel-wrapper::after {
479
+ right: 0;
480
+ background: linear-gradient(to left, var(--color-background) 0%, transparent 100%);
481
+ }
482
+
483
+ .carousel-track {
484
+ display: flex;
485
+ gap: var(--space-lg);
486
+ padding: var(--space-sm) 0;
487
+ transition: transform 0.5s cubic-bezier(0.25, 0.1, 0.25, 1);
488
+ will-change: transform;
489
+ }
490
+
491
+ .carousel-card {
492
+ flex: 0 0 66.666%;
493
+ min-width: 0;
494
+ background: var(--color-surface);
495
+ border-radius: var(--radius-lg);
496
+ padding: var(--space-md);
497
+ box-shadow: var(--shadow-card);
498
+ transition: transform 0.4s ease, box-shadow 0.4s ease, opacity 0.4s ease;
499
+ }
500
+
501
+ .carousel-card.active {
502
+ transform: scale(1);
503
+ opacity: 1;
504
+ }
505
+
506
+ .carousel-card.side {
507
+ transform: scale(0.92);
508
+ opacity: 0.6;
509
+ }
510
+
511
+ .carousel-card .placeholder {
512
+ width: 100%;
513
+ height: 420px;
514
+ background: linear-gradient(135deg, #f0f0f2 0%, #e8e8ed 100%);
515
+ border-radius: var(--radius-md);
516
+ display: flex;
517
+ align-items: center;
518
+ justify-content: center;
519
+ color: var(--color-text-muted);
520
+ font-size: 14px;
521
+ font-weight: 500;
522
+ border: 2px dashed var(--color-border);
523
+ }
524
+
525
+ .carousel-card h4 {
526
+ font-size: 15px;
527
+ font-weight: 600;
528
+ color: var(--color-text);
529
+ margin-top: var(--space-sm);
530
+ text-align: center;
531
+ }
532
+
533
+ .carousel-card img {
534
+ width: 100%;
535
+ height: 420px;
536
+ object-fit: contain;
537
+ border-radius: var(--radius-md);
538
+ }
539
+
540
+ /* Carousel Navigation Buttons */
541
+ .carousel-btn {
542
+ position: absolute;
543
+ top: 50%;
544
+ transform: translateY(-50%);
545
+ z-index: 3;
546
+ width: 48px;
547
+ height: 48px;
548
+ border-radius: 50%;
549
+ background: rgba(255, 255, 255, 0.9);
550
+ backdrop-filter: blur(10px);
551
+ border: 1px solid var(--color-border);
552
+ color: var(--color-text);
553
+ cursor: pointer;
554
+ display: flex;
555
+ align-items: center;
556
+ justify-content: center;
557
+ transition: all 0.2s ease;
558
+ box-shadow: 0 2px 12px rgba(0, 0, 0, 0.1);
559
+ }
560
+
561
+ .carousel-prev {
562
+ left: 20px;
563
+ }
564
+
565
+ .carousel-next {
566
+ right: 20px;
567
+ }
568
+
569
+ .carousel-btn:hover {
570
+ background: var(--color-text);
571
+ color: #ffffff;
572
+ border-color: var(--color-text);
573
+ }
574
+
575
+ .carousel-btn svg {
576
+ width: 22px;
577
+ height: 22px;
578
+ }
579
+
580
+ /* Carousel Dots */
581
+ .carousel-dots {
582
+ display: flex;
583
+ justify-content: center;
584
+ gap: 8px;
585
+ margin-top: var(--space-md);
586
+ }
587
+
588
+ .carousel-dot {
589
+ width: 8px;
590
+ height: 8px;
591
+ border-radius: 50%;
592
+ background: var(--color-border);
593
+ border: none;
594
+ cursor: pointer;
595
+ transition: all 0.2s ease;
596
+ padding: 0;
597
+ }
598
+
599
+ .carousel-dot:hover {
600
+ background: var(--color-text-muted);
601
+ }
602
+
603
+ .carousel-dot.active {
604
+ background: var(--color-text);
605
+ width: 24px;
606
+ border-radius: 4px;
607
+ }
608
+
609
+ /* ========================= */
610
+ /* FOOTER */
611
+ /* ========================= */
612
+ footer {
613
+ text-align: center;
614
+ padding: var(--space-xl) var(--space-lg);
615
+ background: var(--color-background);
616
+ border-top: 1px solid var(--color-border);
617
+ }
618
+
619
+ footer p {
620
+ font-size: 13px;
621
+ color: var(--color-text-muted);
622
+ }
623
+
624
+ footer a {
625
+ color: var(--color-primary);
626
+ text-decoration: none;
627
+ }
628
+
629
+ footer a:hover {
630
+ text-decoration: underline;
631
+ }
632
+
633
+ /* ========================= */
634
+ /* RESPONSIVE */
635
+ /* ========================= */
636
+ @media (max-width: 1200px) {
637
  .charts-grid.three-col {
638
  grid-template-columns: repeat(2, 1fr);
639
  }
640
  }
641
 
642
+ @media (max-width: 900px) {
643
+
644
+ .charts-grid.three-col,
645
+ .charts-grid.two-col {
646
+ grid-template-columns: 1fr;
647
+ }
648
+
649
+ .framework-grid {
650
+ grid-template-columns: 1fr;
651
+ }
652
+
653
+ .hero h1 {
654
+ font-size: 44px;
655
+ }
656
+
657
+ .section-header h2 {
658
+ font-size: 28px;
659
+ }
660
+
661
+ .content {
662
+ width: 100%;
663
+ padding: 0 var(--space-sm) var(--space-2xl);
664
+ }
665
+ }
666
+
667
+ @media (max-width: 600px) {
668
+ html {
669
+ font-size: 16px;
670
+ }
671
+
672
  .hero h1 {
673
  font-size: 36px;
674
  }
675
 
676
+ .hero .subtitle {
677
  font-size: 20px;
678
  }
679
 
680
+ .section-header h2 {
681
+ font-size: 24px;
682
  }
683
 
684
+ .dimension-toggle {
685
+ flex-wrap: wrap;
686
  }
687
 
688
+ .dim-btn,
689
+ .toggle-btn {
690
+ padding: 8px 16px;
691
+ font-size: 13px;
692
  }
693
  }
694
 
695
+ /* ========================= */
696
+ /* TRAJECTORY / CHAT */
697
+ /* ========================= */
698
+ .trajectory-container {
699
+ max-width: 1000px;
700
+ margin: 0 auto;
701
+ background: var(--color-surface);
702
+ border-radius: var(--radius-lg);
703
+ box-shadow: var(--shadow-card);
704
+ border: 1px solid rgba(0, 0, 0, 0.05);
705
+ /* Subtle border */
706
+ overflow: hidden;
707
+ /* Ensure rounded corners */
708
+ }
709
+
710
+ .chat-window {
711
+ height: 600px;
712
+ /* Fixed height for scrolling */
713
+ overflow-y: auto;
714
+ /* Enable vertical scroll */
715
+ padding: var(--space-md);
716
+ background: #f5f5f7;
717
+ /* Chat background */
718
+ font-size: 14px;
719
+ }
720
+
721
+ .chat-message {
722
+ display: flex;
723
+ flex-direction: column;
724
+ margin-bottom: var(--space-md);
725
+ max-width: 85%;
726
+ /* Limit width */
727
+ animation: fadeIn 0.3s ease;
728
+ }
729
+
730
+ .chat-message.role-agent {
731
+ align-self: flex-end;
732
+ /* Align right */
733
+ align-items: flex-end;
734
+ /* Align content inside to right */
735
+ margin-left: auto;
736
+ }
737
+
738
+ .chat-message.role-environment {
739
+ align-self: flex-start;
740
+ /* Align left */
741
+ align-items: flex-start;
742
+ margin-right: auto;
743
+ }
744
+
745
+ .message-role-label {
746
+ font-size: 11px;
747
+ color: var(--color-text-muted);
748
+ margin-bottom: 4px;
749
+ font-weight: 500;
750
+ text-transform: uppercase;
751
+ letter-spacing: 0.05em;
752
+ padding: 0 4px;
753
+ }
754
+
755
+ .message-bubble {
756
+ padding: 12px 16px;
757
+ border-radius: 12px;
758
+ line-height: 1.5;
759
+ position: relative;
760
+ word-wrap: break-word;
761
+ overflow-wrap: break-word;
762
+ max-width: 100%;
763
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
764
+ }
765
+
766
+ .role-agent .message-bubble {
767
+ background: #0071e3;
768
+ /* Apple Blue */
769
+ color: white;
770
+ border-bottom-right-radius: 2px;
771
+ /* Chat bubble effect */
772
+ }
773
+
774
+ /* Fix links/code in agent bubble to be readable on blue */
775
+ .role-agent .message-bubble a {
776
+ color: white;
777
+ text-decoration: underline;
778
+ }
779
+
780
+ .role-agent .message-bubble code {
781
+ background: rgba(255, 255, 255, 0.2);
782
+ color: white;
783
+ }
784
+
785
+ .role-agent .message-bubble pre {
786
+ background: rgba(255, 255, 255, 0.1);
787
+ border: 1px solid rgba(255, 255, 255, 0.2);
788
+ }
789
+
790
+ .role-agent .message-bubble pre code {
791
+ background: transparent;
792
+ color: white;
793
+ }
794
+
795
+ .role-environment .message-bubble {
796
+ background: white;
797
+ color: var(--color-text);
798
+ border: 1px solid var(--color-border);
799
+ border-bottom-left-radius: 2px;
800
+ /* Chat bubble effect */
801
+ font-family: var(--font-mono);
802
+ /* Monospace for data */
803
+ font-size: 13px;
804
+ }
805
+
806
+ .role-environment .message-bubble pre {
807
+ margin: 0;
808
+ white-space: pre-wrap;
809
+ /* Wrap long lines */
810
+ background: transparent;
811
+ border: none;
812
+ padding: 0;
813
+ }
814
+
815
+ /* Tool Call Styling */
816
+ .tool-call-block {
817
+ margin-top: 10px;
818
+ padding-top: 10px;
819
+ border-top: 1px solid rgba(255, 255, 255, 0.3);
820
+ }
821
+
822
+ /* Force NO Wrap on Code and Allow Scroll */
823
+ .message-bubble pre,
824
+ .message-bubble code {
825
+ white-space: pre !important;
826
+ /* No wrapping */
827
+ word-break: normal !important;
828
+ overflow-x: auto;
829
+ /* Allow scroll */
830
+ }
831
+
832
+ /* Markdown Content Styling */
833
+ .message-bubble table {
834
+ display: block;
835
+ /* Enable scroll behavior */
836
+ overflow-x: auto;
837
+ width: 100%;
838
+ border-collapse: collapse;
839
+ margin: 12px 0;
840
+ font-size: 13px;
841
+ font-family: var(--font-sans);
842
+ white-space: nowrap;
843
+ /* Force wide tables to scroll */
844
+ }
845
+
846
+ .message-bubble th,
847
+ .message-bubble td {
848
+ padding: 8px 12px;
849
+ border: 1px solid rgba(0, 0, 0, 0.1);
850
+ text-align: left;
851
+ }
852
+
853
+ .message-bubble th {
854
+ background: rgba(0, 0, 0, 0.03);
855
+ font-weight: 600;
856
+ }
857
+
858
+ .message-bubble tr:nth-child(even) {
859
+ background: rgba(0, 0, 0, 0.01);
860
+ }
861
+
862
+ .message-bubble p {
863
+ margin-bottom: 0.5em;
864
+ }
865
+
866
+ .message-bubble p:last-child {
867
+ margin-bottom: 0;
868
+ }
869
+
870
+ /* Terminal / Code Block Styling - Mac Shell Style */
871
+ .message-bubble pre {
872
+ background: #282c34 !important;
873
+ /* Atom One Dark BG */
874
+ border-radius: 8px;
875
+ margin: 12px 0;
876
+ position: relative;
877
+ padding-top: 32px;
878
+ /* Space for header */
879
+ overflow: hidden;
880
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
881
+ border: 1px solid rgba(255, 255, 255, 0.1);
882
+ }
883
+
884
+ .message-bubble pre::before {
885
+ content: "";
886
+ /* Or maybe "Terminal" or content from attribute? */
887
+ position: absolute;
888
+ top: 0;
889
+ left: 0;
890
+ right: 0;
891
+ height: 30px;
892
+ background: #21252b;
893
+ /* Darker header */
894
+ border-bottom: 1px solid rgba(0, 0, 0, 0.5);
895
+ z-index: 10;
896
+ /* Removed Traffic lights */
897
+ }
898
+
899
+ .message-bubble pre code {
900
+ display: block;
901
+ padding: 12px 16px;
902
+ /* Removed manual color to let highlight.js work */
903
+ font-family: "SF Mono", "Monaco", "Inconsolata", "Fira Mono", "Droid Sans Mono", "Source Code Pro", monospace;
904
+ font-size: 13px;
905
+ line-height: 1.6;
906
+ background: transparent !important;
907
+ }
908
+
909
+ /* Scrollbar styling */
910
+ .chat-window::-webkit-scrollbar {
911
+ width: 8px;
912
+ }
913
+
914
+ .chat-window::-webkit-scrollbar-track {
915
+ background: transparent;
916
+ }
917
+
918
+ .chat-window::-webkit-scrollbar-thumb {
919
+ background-color: rgba(0, 0, 0, 0.1);
920
+ border-radius: 4px;
921
+ }
922
+
923
+ .chat-window::-webkit-scrollbar-thumb:hover {
924
+ background-color: rgba(0, 0, 0, 0.2);
925
+ }
926
+
927
+ @keyframes fadeIn {
928
+ from {
929
+ opacity: 0;
930
+ transform: translateY(10px);
931
+ }
932
+
933
+ to {
934
+ opacity: 1;
935
+ transform: translateY(0);
936
+ }
937
  }
trajectory.js ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // ============================================================================
3
+ // AGENT TRAJECTORY - Chat Interface
4
+ // ============================================================================
5
+
6
+ let currentTrajScenario = 'mimic';
7
+
8
+ function initTrajectory() {
9
+ if (typeof TRAJECTORY_DATA === 'undefined') {
10
+ setTimeout(initTrajectory, 500); // Wait for data load
11
+ return;
12
+ }
13
+
14
+ // Configure marked with highlight.js
15
+ if (typeof marked !== 'undefined') {
16
+ marked.setOptions({
17
+ highlight: function (code, lang) {
18
+ // Auto Format SQL
19
+ if (lang === 'sql' && typeof sqlFormatter !== 'undefined') {
20
+ try {
21
+ code = sqlFormatter.format(code, { language: 'sql', indent: ' ' });
22
+ } catch (e) {
23
+ console.warn('SQL Formatting failed:', e);
24
+ }
25
+ }
26
+
27
+ // Highlight
28
+ if (lang && hljs.getLanguage(lang)) {
29
+ return hljs.highlight(code, { language: lang }).value;
30
+ }
31
+ return hljs.highlightAuto(code).value;
32
+ },
33
+ breaks: true,
34
+ langPrefix: 'hljs language-' // Ensure .hljs class is added for themes
35
+ });
36
+ }
37
+
38
+ // Setup toggle buttons
39
+ document.querySelectorAll('[data-traj-scenario]').forEach(btn => {
40
+ btn.addEventListener('click', () => {
41
+ document.querySelectorAll('[data-traj-scenario]').forEach(b => b.classList.remove('active'));
42
+ btn.classList.add('active');
43
+ currentTrajScenario = btn.dataset.trajScenario;
44
+ renderTrajectory(currentTrajScenario);
45
+ });
46
+ });
47
+
48
+ // Initial Render
49
+ renderTrajectory('mimic');
50
+ }
51
+
52
+ function renderTrajectory(scenario) {
53
+ const container = document.getElementById('chat-window');
54
+ const messages = TRAJECTORY_DATA[scenario];
55
+
56
+ if (!container || !messages) return;
57
+
58
+ container.innerHTML = '';
59
+
60
+ messages.forEach(msg => {
61
+ const msgDiv = document.createElement('div');
62
+ msgDiv.className = `chat-message role-${msg.role}`;
63
+
64
+ // Role Label
65
+ const roleLabel = document.createElement('div');
66
+ roleLabel.className = 'message-role-label';
67
+ // Capitalize
68
+ roleLabel.textContent = msg.role.charAt(0).toUpperCase() + msg.role.slice(1);
69
+ if (msg.role === 'user') roleLabel.textContent = 'User Prompt';
70
+ msgDiv.appendChild(roleLabel);
71
+
72
+ // Bubble
73
+ const bubble = document.createElement('div');
74
+ bubble.className = 'message-bubble';
75
+
76
+ // Parse Markdown
77
+ let htmlContent = '';
78
+ if (typeof marked !== 'undefined') {
79
+ htmlContent = marked.parse(msg.content);
80
+ } else {
81
+ // Fallback
82
+ htmlContent = escapeHtml(msg.content).replace(/\n/g, '<br>');
83
+ }
84
+
85
+ const contentDiv = document.createElement('div');
86
+ contentDiv.innerHTML = htmlContent;
87
+ bubble.appendChild(contentDiv);
88
+ msgDiv.appendChild(bubble);
89
+ container.appendChild(msgDiv); // Append to DOM to calculate height
90
+
91
+ // Height Check (Post-render)
92
+ // 3 lines is approx 80-100px.
93
+ const COLLAPSE_HEIGHT = 200;
94
+
95
+ if (contentDiv.scrollHeight > COLLAPSE_HEIGHT + 20) {
96
+ contentDiv.style.cssText = `max-height: ${COLLAPSE_HEIGHT}px; overflow: hidden; position: relative; mask-image: linear-gradient(to bottom, black 60%, transparent 100%); -webkit-mask-image: linear-gradient(to bottom, black 60%, transparent 100%);`;
97
+
98
+ const btn = document.createElement('button');
99
+ btn.textContent = 'Show More';
100
+ btn.style.cssText = 'display:block; margin: 8px auto 0; border:none; background: rgba(0,0,0,0.05); color: #0071e3; border-radius: 12px; padding: 4px 12px; cursor:pointer; font-size:12px; font-weight:500; transition: all 0.2s;';
101
+
102
+ if (msg.role === 'agent') {
103
+ btn.style.background = 'rgba(255,255,255,0.2)';
104
+ btn.style.color = 'white';
105
+ }
106
+
107
+ btn.onclick = () => {
108
+ if (contentDiv.style.maxHeight !== 'none') {
109
+ // Expand
110
+ contentDiv.style.maxHeight = 'none';
111
+ contentDiv.style.maskImage = 'none';
112
+ contentDiv.style.webkitMaskImage = 'none';
113
+ btn.textContent = 'Show Less';
114
+ } else {
115
+ // Collapse
116
+ contentDiv.style.maxHeight = `${COLLAPSE_HEIGHT}px`;
117
+ contentDiv.style.maskImage = 'linear-gradient(to bottom, black 60%, transparent 100%)';
118
+ contentDiv.style.webkitMaskImage = 'linear-gradient(to bottom, black 60%, transparent 100%)';
119
+ btn.textContent = 'Show More';
120
+ }
121
+ };
122
+
123
+ bubble.appendChild(btn);
124
+ }
125
+ });
126
+
127
+ container.scrollTop = 0;
128
+ }
129
+
130
+ function escapeHtml(text) {
131
+ if (!text) return '';
132
+ return text
133
+ .replace(/&/g, "&amp;")
134
+ .replace(/</g, "&lt;")
135
+ .replace(/>/g, "&gt;")
136
+ .replace(/"/g, "&quot;")
137
+ .replace(/'/g, "&#039;");
138
+ }
139
+
140
+ document.addEventListener('DOMContentLoaded', () => {
141
+ initTrajectory();
142
+ });
trajectory_data.js ADDED
The diff for this file is too large to render. See raw diff