thinkwee commited on
Commit
9f54287
·
1 Parent(s): 9711e49

update links

Browse files
Files changed (3) hide show
  1. index.html +67 -41
  2. styles.css +28 -7
  3. trajectory.js +13 -0
index.html CHANGED
@@ -59,18 +59,25 @@
59
  We distinguish <em>investigatory intelligence</em> (autonomously setting goals and exploring) from
60
  <em>executional intelligence</em> (completing assigned tasks), arguing that true agency requires the
61
  former.
62
- <br>
63
  To evaluate this, we introduce <strong>Deep Data Research (DDR)</strong>, an open-ended task where LLMs
64
  autonomously extract insights from databases, and <strong>DDR-Bench</strong>, a large-scale,
65
  checklist-based benchmark enabling verifiable evaluation.
66
- <br>
67
  Results show that while frontier models display emerging agency, long-horizon exploration remains
68
  challenging, with effective investigatory intelligence depending on intrinsic agentic strategies beyond
69
  mere scaffolding or scaling.
70
  </p>
71
  <div class="meta-info">
72
  <div class="meta-row authors">
73
- <span class="meta-item">Wei Liu, Peijie Yu, Michele Orini, Yali Du, Yulan He</span>
 
 
 
 
 
 
 
 
 
74
  </div>
75
  <div class="meta-row affiliations">
76
  <a href="https://kclnlp.github.io/" target="_blank" rel="noopener noreferrer">
@@ -84,12 +91,12 @@
84
  </a>
85
  </div>
86
  <div class="meta-row links">
87
- <a href="https://huggingface.co/spaces/DDR-Bench" class="platform-btn huggingface-btn">
88
  <img src="assets/hf-logo-pirate.svg" alt="HuggingFace" width="30" height="30"
89
  class="platform-icon">
90
  Data
91
  </a>
92
- <a href="https://github.com/DDR-Bench" class="platform-btn github-btn">
93
  <svg viewBox="0 0 24 24" width="30" height="30" fill="currentColor">
94
  <path
95
  d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.17 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.34-3.369-1.34-.454-1.156-1.11-1.463-1.11-1.463-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.831.092-.646.35-1.086.636-1.336-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.269 2.75 1.025A9.578 9.578 0 0112 6.836c.85.004 1.705.114 2.504.336 1.909-1.294 2.747-1.025 2.747-1.025.546 1.377.203 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.167 22 16.418 22 12c0-5.523-4.477-10-10-10z" />
@@ -121,24 +128,29 @@
121
  </svg>
122
  Framework Overview
123
  </h2>
124
- <p>System architecture and evaluation pipeline of DDR-Bench.</p>
125
  </div>
126
  <div class="framework-grid">
127
  <div class="framework-card">
128
  <img src="assets/framework_task.png" alt="Task Formulation Framework"
129
  style="border-radius: var(--radius-md);">
130
  <h3>Task Formulation</h3>
131
- <p class="framework-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do
132
- eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
133
- nostrud exercitation ullamco laboris.</p>
 
 
134
  </div>
135
  <div class="framework-card">
136
  <img src="assets/framework_pipeline.png" alt="Evaluation Pipeline Framework"
137
  style="border-radius: var(--radius-md);">
138
  <h3>Evaluation Pipeline</h3>
139
- <p class="framework-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do
140
- eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
141
- nostrud exercitation ullamco laboris.</p>
 
 
 
142
  </div>
143
  </div>
144
  </section>
@@ -164,6 +176,11 @@
164
  <button class="dim-btn" data-traj-scenario="globem">GLOBEM</button>
165
  </div>
166
 
 
 
 
 
 
167
  <div class="trajectory-container">
168
  <div id="chat-window" class="chat-window">
169
  <!-- Messages will be injected here via JS -->
@@ -177,9 +194,6 @@
177
  <span>Scroll to see more</span>
178
  </div>
179
  </div>
180
- <p class="trajectory-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
181
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
182
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
183
  </section>
184
 
185
  <!-- 2. Experiment Results Section -->
@@ -217,9 +231,11 @@
217
  <!-- 2. Qwen Family -->
218
  <div class="carousel-card">
219
  <img src="assets/qwenfamily.png" alt="Qwen Family Performance">
220
- <h4>Qwen Family Analysis</h4>
221
- <p class="card-caption">Performance scaling and behavioral differences within the Qwen model
222
- series (Qwen3-Next-80B vs 30B).</p>
 
 
223
  </div>
224
 
225
  <!-- 3. Reasoning -->
@@ -253,7 +269,9 @@
253
  <div class="carousel-card">
254
  <img src="assets/hallucination.png" alt="Hallucination Analysis">
255
  <h4>Hallucination Analysis</h4>
256
- <p class="card-caption">Hallucination rates is low.</p>
 
 
257
  </div>
258
 
259
  <!-- 6.5 Hallucination-Accuracy Correlation -->
@@ -271,7 +289,7 @@
271
  <h4>Trustworthiness</h4>
272
  <p class="card-caption">Verification of the LLM-as-a-Checker pipeline demonstrating high
273
  alignment
274
- with human expert judgments.</p>
275
  </div>
276
  </div>
277
 
@@ -321,9 +339,9 @@
321
  <div id="scaling-globem" class="chart-container"></div>
322
  </div>
323
  </div>
324
- <p class="section-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
325
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
326
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
327
  </section>
328
 
329
  <!-- 2. Ranking Comparison Section -->
@@ -370,9 +388,9 @@
370
 
371
  </div>
372
  </div>
373
- <p class="section-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
374
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
375
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
376
  </section>
377
 
378
  <!-- 3. Turn Distribution Section -->
@@ -402,9 +420,9 @@
402
  <div id="turn-globem" class="chart-container-tall"></div>
403
  </div>
404
  </div>
405
- <p class="section-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
406
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
407
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
408
  </section>
409
 
410
  <!-- 4. Entropy Analysis Section -->
@@ -419,7 +437,7 @@
419
  <circle cx="7.5" cy="16.5" r="1.5" />
420
  <circle cx="17.5" cy="14.5" r="1.5" />
421
  </svg>
422
- Entropy Analysis
423
  </h2>
424
  <p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
425
  = more uniform access; Higher coverage = more fields explored.</p>
@@ -454,9 +472,9 @@
454
  <div id="entropy-model-5" class="chart-container-tall"></div>
455
  </div>
456
  </div>
457
- <p class="section-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
458
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
459
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
460
  </section>
461
 
462
  <!-- 5. Error Analysis Section -->
@@ -478,9 +496,14 @@
478
  <div id="error-chart" class="chart-container-double"></div>
479
  </div>
480
  </div>
481
- <p class="section-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
482
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
483
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
 
 
 
 
 
484
  </section>
485
 
486
  <!-- 6. Probing Results Section -->
@@ -492,9 +515,9 @@
492
  <circle cx="11" cy="11" r="8" />
493
  <path d="m21 21-4.3-4.3" />
494
  </svg>
495
- Probing Results
496
  </h2>
497
- <p>Analyze the average log probability of FINISH messages across conversation turns and progress.</p>
498
  </div>
499
  <div id="probing-legend" class="shared-legend"></div>
500
  <div class="charts-grid three-col">
@@ -511,9 +534,12 @@
511
  <div id="probing-10k" class="chart-container-tall"></div>
512
  </div>
513
  </div>
514
- <p class="section-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
515
- tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
516
- ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
 
 
 
517
  </section>
518
  </main>
519
 
 
59
  We distinguish <em>investigatory intelligence</em> (autonomously setting goals and exploring) from
60
  <em>executional intelligence</em> (completing assigned tasks), arguing that true agency requires the
61
  former.
 
62
  To evaluate this, we introduce <strong>Deep Data Research (DDR)</strong>, an open-ended task where LLMs
63
  autonomously extract insights from databases, and <strong>DDR-Bench</strong>, a large-scale,
64
  checklist-based benchmark enabling verifiable evaluation.
 
65
  Results show that while frontier models display emerging agency, long-horizon exploration remains
66
  challenging, with effective investigatory intelligence depending on intrinsic agentic strategies beyond
67
  mere scaffolding or scaling.
68
  </p>
69
  <div class="meta-info">
70
  <div class="meta-row authors">
71
+ <span class="meta-item">
72
+ <a href="https://thinkwee.top/about" target="_blank" rel="noopener noreferrer">Wei Liu</a>,
73
+ <a href="https://github.com/yupeijei1997" target="_blank" rel="noopener noreferrer">Peijie
74
+ Yu</a>,
75
+ <a href="https://www.kcl.ac.uk/people/michele-orini" target="_blank"
76
+ rel="noopener noreferrer">Michele Orini</a>,
77
+ <a href="https://yalidu.github.io/" target="_blank" rel="noopener noreferrer">Yali Du</a>,
78
+ <a href="https://sites.google.com/view/yulanhe/home" target="_blank"
79
+ rel="noopener noreferrer">Yulan He</a>
80
+ </span>
81
  </div>
82
  <div class="meta-row affiliations">
83
  <a href="https://kclnlp.github.io/" target="_blank" rel="noopener noreferrer">
 
91
  </a>
92
  </div>
93
  <div class="meta-row links">
94
+ <a href="https://huggingface.co/collections/thinkwee/ddrbench" class="platform-btn huggingface-btn">
95
  <img src="assets/hf-logo-pirate.svg" alt="HuggingFace" width="30" height="30"
96
  class="platform-icon">
97
  Data
98
  </a>
99
+ <a href="https://github.com/thinkwee/DDR_Bench" class="platform-btn github-btn">
100
  <svg viewBox="0 0 24 24" width="30" height="30" fill="currentColor">
101
  <path
102
  d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.17 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.34-3.369-1.34-.454-1.156-1.11-1.463-1.11-1.463-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.831.092-.646.35-1.086.636-1.336-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.269 2.75 1.025A9.578 9.578 0 0112 6.836c.85.004 1.705.114 2.504.336 1.909-1.294 2.747-1.025 2.747-1.025.546 1.377.203 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.167 22 16.418 22 12c0-5.523-4.477-10-10-10z" />
 
128
  </svg>
129
  Framework Overview
130
  </h2>
131
+ <p>Overview of DDR-Bench.</p>
132
  </div>
133
  <div class="framework-grid">
134
  <div class="framework-card">
135
  <img src="assets/framework_task.png" alt="Task Formulation Framework"
136
  style="border-radius: var(--radius-md);">
137
  <h3>Task Formulation</h3>
138
+ <p class="framework-description">A case of Claude Sonnet 4.5's trajectory and evaluation checklist
139
+ in the MIMIC scenario of DDR-Bench. Verified fact and supporting insights are
140
+ <u>underlined</u>. The agent is asked to perform multiple ReAct turns to explore the database
141
+ without predefined targets or queries, autonomously mine insights from the exploration.
142
+ </p>
143
  </div>
144
  <div class="framework-card">
145
  <img src="assets/framework_pipeline.png" alt="Evaluation Pipeline Framework"
146
  style="border-radius: var(--radius-md);">
147
  <h3>Evaluation Pipeline</h3>
148
+ <p class="framework-description"><b>Left</b>: Compared with previous tasks, <i>DDR</i> maximises
149
+ exploration openness and agency, focusing on the direct evaluation of insight quality.
150
+ <b>Right</b>: Overview of the DDR-Bench. The checklist derived from the freeform parts of the
151
+ database is used to evaluate the agent generated insights from the exploration on the structured
152
+ parts of the database.
153
+ </p>
154
  </div>
155
  </div>
156
  </section>
 
176
  <button class="dim-btn" data-traj-scenario="globem">GLOBEM</button>
177
  </div>
178
 
179
+ <p id="trajectory-scenario-description" class="trajectory-description">
180
+ Exploring clinical patterns and patient outcomes in a large-scale electronic health record (EHR)
181
+ database.
182
+ </p>
183
+
184
  <div class="trajectory-container">
185
  <div id="chat-window" class="chat-window">
186
  <!-- Messages will be injected here via JS -->
 
194
  <span>Scroll to see more</span>
195
  </div>
196
  </div>
 
 
 
197
  </section>
198
 
199
  <!-- 2. Experiment Results Section -->
 
231
  <!-- 2. Qwen Family -->
232
  <div class="carousel-card">
233
  <img src="assets/qwenfamily.png" alt="Qwen Family Performance">
234
+ <h4>Training-time Factors Analysis</h4>
235
+ <p class="card-caption">Training-time factors study within the Qwen family. From left to right,
236
+ the three columns examine inference-time scaling performance across all scenarios for models
237
+ with different parameter scales, context optimisation methods, and model generations with
238
+ different training strategies.</p>
239
  </div>
240
 
241
  <!-- 3. Reasoning -->
 
269
  <div class="carousel-card">
270
  <img src="assets/hallucination.png" alt="Hallucination Analysis">
271
  <h4>Hallucination Analysis</h4>
272
+ <p class="card-caption">Hallucination rates (%) across models in DDR-Bench, measured as the
273
+ proportion of insights containing factual but unfaithful information that are not derivable
274
+ from the provided inputs, which is low.</p>
275
  </div>
276
 
277
  <!-- 6.5 Hallucination-Accuracy Correlation -->
 
289
  <h4>Trustworthiness</h4>
290
  <p class="card-caption">Verification of the LLM-as-a-Checker pipeline demonstrating high
291
  alignment
292
+ with human expert judgments, and it is stable across multiple runs.</p>
293
  </div>
294
  </div>
295
 
 
339
  <div id="scaling-globem" class="chart-container"></div>
340
  </div>
341
  </div>
342
+ <p class="section-description">LLMs extract more accurate insights from delaying commitment, and they
343
+ concentrate reasoning into a small number of highly valuable late-stage interactions. These targeted
344
+ interactions are built upon longer early exploration.</p>
345
  </section>
346
 
347
  <!-- 2. Ranking Comparison Section -->
 
388
 
389
  </div>
390
  </div>
391
+ <p class="section-description">The ranking induced by novel insight usefulness closely aligns with the
392
+ ranking based on checklist accuracy. Differences between the two rankings are small, especially among
393
+ the top-performing models.</p>
394
  </section>
395
 
396
  <!-- 3. Turn Distribution Section -->
 
420
  <div id="turn-globem" class="chart-container-tall"></div>
421
  </div>
422
  </div>
423
+ <p class="section-description">Stronger models tend to explore for more rounds without external prompting.
424
+ Knowledge-intensive databases such as 10-K and MIMIC induce more interaction rounds than signal-based
425
+ datasets such as GLOBEM, and the resulting distributions are also more uniform.</p>
426
  </section>
427
 
428
  <!-- 4. Entropy Analysis Section -->
 
437
  <circle cx="7.5" cy="16.5" r="1.5" />
438
  <circle cx="17.5" cy="14.5" r="1.5" />
439
  </svg>
440
+ Exploration Pattern Analysis
441
  </h2>
442
  <p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
443
  = more uniform access; Higher coverage = more fields explored.</p>
 
472
  <div id="entropy-model-5" class="chart-container-tall"></div>
473
  </div>
474
  </div>
475
+ <p class="section-description">Advanced LLMs tend to operate in a balanced exploration regime that combines
476
+ adequate coverage with focused access. Such a regime is consistently observed across different
477
+ scenarios.</p>
478
  </section>
479
 
480
  <!-- 5. Error Analysis Section -->
 
496
  <div id="error-chart" class="chart-container-double"></div>
497
  </div>
498
  </div>
499
+ <p class="section-description">Our findings revealed that 58% of errors stemmed from insufficient
500
+ exploration, both in terms of breadth and depth. This imbalance in exploration often leads to suboptimal
501
+ results, regardless of the model’s overall capability.
502
+ Additionally, around 40% of the errors were attributed to other factors. For more powerful models,
503
+ over-reasoning was common, where the model made assumptions not fully supported by the data. In other
504
+ cases, models misinterpreted the insights, such as mistaking a downward trend for an upward one. Less
505
+ capable models, on the other hand, tended to make more fundamental errors, such as repeatedly debugging
506
+ or struggling with missing data, which could disrupt the overall coherence of the analysis.</p>
507
  </section>
508
 
509
  <!-- 6. Probing Results Section -->
 
515
  <circle cx="11" cy="11" r="8" />
516
  <path d="m21 21-4.3-4.3" />
517
  </svg>
518
+ Self-Termination
519
  </h2>
520
+ <p>Analyze the willingness of models to terminate their own analysis.</p>
521
  </div>
522
  <div id="probing-legend" class="shared-legend"></div>
523
  <div class="charts-grid three-col">
 
534
  <div id="probing-10k" class="chart-container-tall"></div>
535
  </div>
536
  </div>
537
+ <p class="section-description"> Clear differences emerge across model generations. Qwen3 and Qwen3-Next
538
+ exhibit a consistently increasing probability, indicating growing confidence that a complete report can
539
+ be produced as more information is accumulated, whereas the Qwen2.5 series shows pronounced fluctuations
540
+ and remains uncertain about whether exploration can be terminated at the current step. Moreover,
541
+ Qwen3-Next maintains higher confidence with lower variance throughout, suggesting that it has more
542
+ confidence that exploration is progressing towards a more comprehensive and deeper report.</p>
543
  </section>
544
  </main>
545
 
styles.css CHANGED
@@ -150,7 +150,7 @@ img {
150
  font-size: 21px;
151
  line-height: 1.7;
152
  color: var(--color-text-muted);
153
- max-width: 900px;
154
  margin: 0 auto var(--space-lg);
155
  text-align: left;
156
  }
@@ -179,6 +179,26 @@ img {
179
  font-size: 22px;
180
  font-weight: 500;
181
  color: var(--color-text);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  }
183
 
184
  .meta-row.affiliations {
@@ -552,7 +572,8 @@ img {
552
  line-height: 1.6;
553
  color: var(--color-text-muted);
554
  text-align: center;
555
- margin-top: var(--space-md);
 
556
  max-width: 1400px;
557
  margin-left: auto;
558
  margin-right: auto;
@@ -849,9 +870,8 @@ footer a:hover {
849
  margin: 0 auto;
850
  background: var(--color-surface);
851
  border-radius: var(--radius-lg);
852
- box-shadow: var(--shadow-card);
853
- border: 1px solid rgba(0, 0, 0, 0.05);
854
- /* Subtle border */
855
  overflow: hidden;
856
  /* Ensure rounded corners */
857
  position: relative;
@@ -907,8 +927,9 @@ footer a:hover {
907
  overflow-y: auto;
908
  /* Enable vertical scroll */
909
  padding: var(--space-md);
910
- background: #f5f5f7;
911
- /* Chat background */
 
912
  font-size: 16px;
913
  }
914
 
 
150
  font-size: 21px;
151
  line-height: 1.7;
152
  color: var(--color-text-muted);
153
+ max-width: 1200px;
154
  margin: 0 auto var(--space-lg);
155
  text-align: left;
156
  }
 
179
  font-size: 22px;
180
  font-weight: 500;
181
  color: var(--color-text);
182
+ margin-bottom: var(--space-xs);
183
+ }
184
+
185
+ .meta-row.authors a {
186
+ color: var(--color-text);
187
+ text-decoration: none;
188
+ position: relative;
189
+ padding: 2px 6px;
190
+ margin: 0 -2px;
191
+ border-radius: 6px;
192
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
193
+ /* 初始状态下非常淡的下划线,作为微妙的视觉暗示 */
194
+ border-bottom: 1.5px solid rgba(0, 113, 227, 0.1);
195
+ }
196
+
197
+ .meta-row.authors a:hover {
198
+ color: var(--color-primary);
199
+ background-color: rgba(0, 113, 227, 0.05);
200
+ border-bottom-color: var(--color-primary);
201
+ transform: translateY(-1px);
202
  }
203
 
204
  .meta-row.affiliations {
 
572
  line-height: 1.6;
573
  color: var(--color-text-muted);
574
  text-align: center;
575
+ margin-top: 0;
576
+ margin-bottom: var(--space-md);
577
  max-width: 1400px;
578
  margin-left: auto;
579
  margin-right: auto;
 
870
  margin: 0 auto;
871
  background: var(--color-surface);
872
  border-radius: var(--radius-lg);
873
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.04);
874
+ border: 1px solid rgba(0, 0, 0, 0.08);
 
875
  overflow: hidden;
876
  /* Ensure rounded corners */
877
  position: relative;
 
927
  overflow-y: auto;
928
  /* Enable vertical scroll */
929
  padding: var(--space-md);
930
+ background: #f4f9f4;
931
+ box-shadow: inset 0 1px 4px rgba(0, 0, 0, 0.02);
932
+ /* Soft light mint green background */
933
  font-size: 16px;
934
  }
935
 
trajectory.js CHANGED
@@ -3,6 +3,12 @@
3
  // AGENT TRAJECTORY - Chat Interface
4
  // ============================================================================
5
 
 
 
 
 
 
 
6
  let currentTrajScenario = 'mimic';
7
 
8
  function initTrajectory() {
@@ -41,6 +47,13 @@ function initTrajectory() {
41
  document.querySelectorAll('[data-traj-scenario]').forEach(b => b.classList.remove('active'));
42
  btn.classList.add('active');
43
  currentTrajScenario = btn.dataset.trajScenario;
 
 
 
 
 
 
 
44
  renderTrajectory(currentTrajScenario);
45
  });
46
  });
 
3
  // AGENT TRAJECTORY - Chat Interface
4
  // ============================================================================
5
 
6
+ const SCENARIO_DESCRIPTIONS = {
7
+ 'mimic': 'Exploring clinical patterns and patient outcomes in a large-scale electronic health record (EHR) database.',
8
+ '10k': 'Extracting deep insights from SEC 10-K annual reports for longitudinal financial performance analysis.',
9
+ 'globem': 'Analyzing multi-modal longitudinal behavioral and sensor data for detecting mental health trends.'
10
+ };
11
+
12
  let currentTrajScenario = 'mimic';
13
 
14
  function initTrajectory() {
 
47
  document.querySelectorAll('[data-traj-scenario]').forEach(b => b.classList.remove('active'));
48
  btn.classList.add('active');
49
  currentTrajScenario = btn.dataset.trajScenario;
50
+
51
+ // Update description
52
+ const descEl = document.getElementById('trajectory-scenario-description');
53
+ if (descEl && SCENARIO_DESCRIPTIONS[currentTrajScenario]) {
54
+ descEl.textContent = SCENARIO_DESCRIPTIONS[currentTrajScenario];
55
+ }
56
+
57
  renderTrajectory(currentTrajScenario);
58
  });
59
  });