thinkwee commited on
Commit ·
9f54287
1
Parent(s): 9711e49
update links
Browse files- index.html +67 -41
- styles.css +28 -7
- trajectory.js +13 -0
index.html
CHANGED
|
@@ -59,18 +59,25 @@
|
|
| 59 |
We distinguish <em>investigatory intelligence</em> (autonomously setting goals and exploring) from
|
| 60 |
<em>executional intelligence</em> (completing assigned tasks), arguing that true agency requires the
|
| 61 |
former.
|
| 62 |
-
<br>
|
| 63 |
To evaluate this, we introduce <strong>Deep Data Research (DDR)</strong>, an open-ended task where LLMs
|
| 64 |
autonomously extract insights from databases, and <strong>DDR-Bench</strong>, a large-scale,
|
| 65 |
checklist-based benchmark enabling verifiable evaluation.
|
| 66 |
-
<br>
|
| 67 |
Results show that while frontier models display emerging agency, long-horizon exploration remains
|
| 68 |
challenging, with effective investigatory intelligence depending on intrinsic agentic strategies beyond
|
| 69 |
mere scaffolding or scaling.
|
| 70 |
</p>
|
| 71 |
<div class="meta-info">
|
| 72 |
<div class="meta-row authors">
|
| 73 |
-
<span class="meta-item">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
</div>
|
| 75 |
<div class="meta-row affiliations">
|
| 76 |
<a href="https://kclnlp.github.io/" target="_blank" rel="noopener noreferrer">
|
|
@@ -84,12 +91,12 @@
|
|
| 84 |
</a>
|
| 85 |
</div>
|
| 86 |
<div class="meta-row links">
|
| 87 |
-
<a href="https://huggingface.co/
|
| 88 |
<img src="assets/hf-logo-pirate.svg" alt="HuggingFace" width="30" height="30"
|
| 89 |
class="platform-icon">
|
| 90 |
Data
|
| 91 |
</a>
|
| 92 |
-
<a href="https://github.com/
|
| 93 |
<svg viewBox="0 0 24 24" width="30" height="30" fill="currentColor">
|
| 94 |
<path
|
| 95 |
d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.17 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.34-3.369-1.34-.454-1.156-1.11-1.463-1.11-1.463-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.831.092-.646.35-1.086.636-1.336-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.269 2.75 1.025A9.578 9.578 0 0112 6.836c.85.004 1.705.114 2.504.336 1.909-1.294 2.747-1.025 2.747-1.025.546 1.377.203 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.167 22 16.418 22 12c0-5.523-4.477-10-10-10z" />
|
|
@@ -121,24 +128,29 @@
|
|
| 121 |
</svg>
|
| 122 |
Framework Overview
|
| 123 |
</h2>
|
| 124 |
-
<p>
|
| 125 |
</div>
|
| 126 |
<div class="framework-grid">
|
| 127 |
<div class="framework-card">
|
| 128 |
<img src="assets/framework_task.png" alt="Task Formulation Framework"
|
| 129 |
style="border-radius: var(--radius-md);">
|
| 130 |
<h3>Task Formulation</h3>
|
| 131 |
-
<p class="framework-description">
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
| 134 |
</div>
|
| 135 |
<div class="framework-card">
|
| 136 |
<img src="assets/framework_pipeline.png" alt="Evaluation Pipeline Framework"
|
| 137 |
style="border-radius: var(--radius-md);">
|
| 138 |
<h3>Evaluation Pipeline</h3>
|
| 139 |
-
<p class="framework-description">
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
| 142 |
</div>
|
| 143 |
</div>
|
| 144 |
</section>
|
|
@@ -164,6 +176,11 @@
|
|
| 164 |
<button class="dim-btn" data-traj-scenario="globem">GLOBEM</button>
|
| 165 |
</div>
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
<div class="trajectory-container">
|
| 168 |
<div id="chat-window" class="chat-window">
|
| 169 |
<!-- Messages will be injected here via JS -->
|
|
@@ -177,9 +194,6 @@
|
|
| 177 |
<span>Scroll to see more</span>
|
| 178 |
</div>
|
| 179 |
</div>
|
| 180 |
-
<p class="trajectory-description">Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod
|
| 181 |
-
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
|
| 182 |
-
ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
|
| 183 |
</section>
|
| 184 |
|
| 185 |
<!-- 2. Experiment Results Section -->
|
|
@@ -217,9 +231,11 @@
|
|
| 217 |
<!-- 2. Qwen Family -->
|
| 218 |
<div class="carousel-card">
|
| 219 |
<img src="assets/qwenfamily.png" alt="Qwen Family Performance">
|
| 220 |
-
<h4>
|
| 221 |
-
<p class="card-caption">
|
| 222 |
-
|
|
|
|
|
|
|
| 223 |
</div>
|
| 224 |
|
| 225 |
<!-- 3. Reasoning -->
|
|
@@ -253,7 +269,9 @@
|
|
| 253 |
<div class="carousel-card">
|
| 254 |
<img src="assets/hallucination.png" alt="Hallucination Analysis">
|
| 255 |
<h4>Hallucination Analysis</h4>
|
| 256 |
-
<p class="card-caption">Hallucination rates
|
|
|
|
|
|
|
| 257 |
</div>
|
| 258 |
|
| 259 |
<!-- 6.5 Hallucination-Accuracy Correlation -->
|
|
@@ -271,7 +289,7 @@
|
|
| 271 |
<h4>Trustworthiness</h4>
|
| 272 |
<p class="card-caption">Verification of the LLM-as-a-Checker pipeline demonstrating high
|
| 273 |
alignment
|
| 274 |
-
with human expert judgments.</p>
|
| 275 |
</div>
|
| 276 |
</div>
|
| 277 |
|
|
@@ -321,9 +339,9 @@
|
|
| 321 |
<div id="scaling-globem" class="chart-container"></div>
|
| 322 |
</div>
|
| 323 |
</div>
|
| 324 |
-
<p class="section-description">
|
| 325 |
-
|
| 326 |
-
|
| 327 |
</section>
|
| 328 |
|
| 329 |
<!-- 2. Ranking Comparison Section -->
|
|
@@ -370,9 +388,9 @@
|
|
| 370 |
|
| 371 |
</div>
|
| 372 |
</div>
|
| 373 |
-
<p class="section-description">
|
| 374 |
-
|
| 375 |
-
|
| 376 |
</section>
|
| 377 |
|
| 378 |
<!-- 3. Turn Distribution Section -->
|
|
@@ -402,9 +420,9 @@
|
|
| 402 |
<div id="turn-globem" class="chart-container-tall"></div>
|
| 403 |
</div>
|
| 404 |
</div>
|
| 405 |
-
<p class="section-description">
|
| 406 |
-
|
| 407 |
-
|
| 408 |
</section>
|
| 409 |
|
| 410 |
<!-- 4. Entropy Analysis Section -->
|
|
@@ -419,7 +437,7 @@
|
|
| 419 |
<circle cx="7.5" cy="16.5" r="1.5" />
|
| 420 |
<circle cx="17.5" cy="14.5" r="1.5" />
|
| 421 |
</svg>
|
| 422 |
-
|
| 423 |
</h2>
|
| 424 |
<p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
|
| 425 |
= more uniform access; Higher coverage = more fields explored.</p>
|
|
@@ -454,9 +472,9 @@
|
|
| 454 |
<div id="entropy-model-5" class="chart-container-tall"></div>
|
| 455 |
</div>
|
| 456 |
</div>
|
| 457 |
-
<p class="section-description">
|
| 458 |
-
|
| 459 |
-
|
| 460 |
</section>
|
| 461 |
|
| 462 |
<!-- 5. Error Analysis Section -->
|
|
@@ -478,9 +496,14 @@
|
|
| 478 |
<div id="error-chart" class="chart-container-double"></div>
|
| 479 |
</div>
|
| 480 |
</div>
|
| 481 |
-
<p class="section-description">
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
</section>
|
| 485 |
|
| 486 |
<!-- 6. Probing Results Section -->
|
|
@@ -492,9 +515,9 @@
|
|
| 492 |
<circle cx="11" cy="11" r="8" />
|
| 493 |
<path d="m21 21-4.3-4.3" />
|
| 494 |
</svg>
|
| 495 |
-
|
| 496 |
</h2>
|
| 497 |
-
<p>Analyze the
|
| 498 |
</div>
|
| 499 |
<div id="probing-legend" class="shared-legend"></div>
|
| 500 |
<div class="charts-grid three-col">
|
|
@@ -511,9 +534,12 @@
|
|
| 511 |
<div id="probing-10k" class="chart-container-tall"></div>
|
| 512 |
</div>
|
| 513 |
</div>
|
| 514 |
-
<p class="section-description">
|
| 515 |
-
|
| 516 |
-
|
|
|
|
|
|
|
|
|
|
| 517 |
</section>
|
| 518 |
</main>
|
| 519 |
|
|
|
|
| 59 |
We distinguish <em>investigatory intelligence</em> (autonomously setting goals and exploring) from
|
| 60 |
<em>executional intelligence</em> (completing assigned tasks), arguing that true agency requires the
|
| 61 |
former.
|
|
|
|
| 62 |
To evaluate this, we introduce <strong>Deep Data Research (DDR)</strong>, an open-ended task where LLMs
|
| 63 |
autonomously extract insights from databases, and <strong>DDR-Bench</strong>, a large-scale,
|
| 64 |
checklist-based benchmark enabling verifiable evaluation.
|
|
|
|
| 65 |
Results show that while frontier models display emerging agency, long-horizon exploration remains
|
| 66 |
challenging, with effective investigatory intelligence depending on intrinsic agentic strategies beyond
|
| 67 |
mere scaffolding or scaling.
|
| 68 |
</p>
|
| 69 |
<div class="meta-info">
|
| 70 |
<div class="meta-row authors">
|
| 71 |
+
<span class="meta-item">
|
| 72 |
+
<a href="https://thinkwee.top/about" target="_blank" rel="noopener noreferrer">Wei Liu</a>,
|
| 73 |
+
<a href="https://github.com/yupeijei1997" target="_blank" rel="noopener noreferrer">Peijie
|
| 74 |
+
Yu</a>,
|
| 75 |
+
<a href="https://www.kcl.ac.uk/people/michele-orini" target="_blank"
|
| 76 |
+
rel="noopener noreferrer">Michele Orini</a>,
|
| 77 |
+
<a href="https://yalidu.github.io/" target="_blank" rel="noopener noreferrer">Yali Du</a>,
|
| 78 |
+
<a href="https://sites.google.com/view/yulanhe/home" target="_blank"
|
| 79 |
+
rel="noopener noreferrer">Yulan He</a>
|
| 80 |
+
</span>
|
| 81 |
</div>
|
| 82 |
<div class="meta-row affiliations">
|
| 83 |
<a href="https://kclnlp.github.io/" target="_blank" rel="noopener noreferrer">
|
|
|
|
| 91 |
</a>
|
| 92 |
</div>
|
| 93 |
<div class="meta-row links">
|
| 94 |
+
<a href="https://huggingface.co/collections/thinkwee/ddrbench" class="platform-btn huggingface-btn">
|
| 95 |
<img src="assets/hf-logo-pirate.svg" alt="HuggingFace" width="30" height="30"
|
| 96 |
class="platform-icon">
|
| 97 |
Data
|
| 98 |
</a>
|
| 99 |
+
<a href="https://github.com/thinkwee/DDR_Bench" class="platform-btn github-btn">
|
| 100 |
<svg viewBox="0 0 24 24" width="30" height="30" fill="currentColor">
|
| 101 |
<path
|
| 102 |
d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.17 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.34-3.369-1.34-.454-1.156-1.11-1.463-1.11-1.463-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.831.092-.646.35-1.086.636-1.336-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.269 2.75 1.025A9.578 9.578 0 0112 6.836c.85.004 1.705.114 2.504.336 1.909-1.294 2.747-1.025 2.747-1.025.546 1.377.203 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.167 22 16.418 22 12c0-5.523-4.477-10-10-10z" />
|
|
|
|
| 128 |
</svg>
|
| 129 |
Framework Overview
|
| 130 |
</h2>
|
| 131 |
+
<p>Overview of DDR-Bench.</p>
|
| 132 |
</div>
|
| 133 |
<div class="framework-grid">
|
| 134 |
<div class="framework-card">
|
| 135 |
<img src="assets/framework_task.png" alt="Task Formulation Framework"
|
| 136 |
style="border-radius: var(--radius-md);">
|
| 137 |
<h3>Task Formulation</h3>
|
| 138 |
+
<p class="framework-description">A case of Claude Sonnet 4.5's trajectory and evaluation checklist
|
| 139 |
+
in the MIMIC scenario of DDR-Bench. Verified fact and supporting insights are
|
| 140 |
+
<u>underlined</u>. The agent is asked to perform multiple ReAct turns to explore the database
|
| 141 |
+
without predefined targets or queries, autonomously mine insights from the exploration.
|
| 142 |
+
</p>
|
| 143 |
</div>
|
| 144 |
<div class="framework-card">
|
| 145 |
<img src="assets/framework_pipeline.png" alt="Evaluation Pipeline Framework"
|
| 146 |
style="border-radius: var(--radius-md);">
|
| 147 |
<h3>Evaluation Pipeline</h3>
|
| 148 |
+
<p class="framework-description"><b>Left</b>: Compared with previous tasks, <i>DDR</i> maximises
|
| 149 |
+
exploration openness and agency, focusing on the direct evaluation of insight quality.
|
| 150 |
+
<b>Right</b>: Overview of the DDR-Bench. The checklist derived from the freeform parts of the
|
| 151 |
+
database is used to evaluate the agent generated insights from the exploration on the structured
|
| 152 |
+
parts of the database.
|
| 153 |
+
</p>
|
| 154 |
</div>
|
| 155 |
</div>
|
| 156 |
</section>
|
|
|
|
| 176 |
<button class="dim-btn" data-traj-scenario="globem">GLOBEM</button>
|
| 177 |
</div>
|
| 178 |
|
| 179 |
+
<p id="trajectory-scenario-description" class="trajectory-description">
|
| 180 |
+
Exploring clinical patterns and patient outcomes in a large-scale electronic health record (EHR)
|
| 181 |
+
database.
|
| 182 |
+
</p>
|
| 183 |
+
|
| 184 |
<div class="trajectory-container">
|
| 185 |
<div id="chat-window" class="chat-window">
|
| 186 |
<!-- Messages will be injected here via JS -->
|
|
|
|
| 194 |
<span>Scroll to see more</span>
|
| 195 |
</div>
|
| 196 |
</div>
|
|
|
|
|
|
|
|
|
|
| 197 |
</section>
|
| 198 |
|
| 199 |
<!-- 2. Experiment Results Section -->
|
|
|
|
| 231 |
<!-- 2. Qwen Family -->
|
| 232 |
<div class="carousel-card">
|
| 233 |
<img src="assets/qwenfamily.png" alt="Qwen Family Performance">
|
| 234 |
+
<h4>Training-time Factors Analysis</h4>
|
| 235 |
+
<p class="card-caption">Training-time factors study within the Qwen family. From left to right,
|
| 236 |
+
the three columns examine inference-time scaling performance across all scenarios for models
|
| 237 |
+
with different parameter scales, context optimisation methods, and model generations with
|
| 238 |
+
different training strategies.</p>
|
| 239 |
</div>
|
| 240 |
|
| 241 |
<!-- 3. Reasoning -->
|
|
|
|
| 269 |
<div class="carousel-card">
|
| 270 |
<img src="assets/hallucination.png" alt="Hallucination Analysis">
|
| 271 |
<h4>Hallucination Analysis</h4>
|
| 272 |
+
<p class="card-caption">Hallucination rates (%) across models in DDR-Bench, measured as the
|
| 273 |
+
proportion of insights containing factual but unfaithful information that are not derivable
|
| 274 |
+
from the provided inputs, which is low.</p>
|
| 275 |
</div>
|
| 276 |
|
| 277 |
<!-- 6.5 Hallucination-Accuracy Correlation -->
|
|
|
|
| 289 |
<h4>Trustworthiness</h4>
|
| 290 |
<p class="card-caption">Verification of the LLM-as-a-Checker pipeline demonstrating high
|
| 291 |
alignment
|
| 292 |
+
with human expert judgments, and it is stable across multiple runs.</p>
|
| 293 |
</div>
|
| 294 |
</div>
|
| 295 |
|
|
|
|
| 339 |
<div id="scaling-globem" class="chart-container"></div>
|
| 340 |
</div>
|
| 341 |
</div>
|
| 342 |
+
<p class="section-description">LLMs extract more accurate insights from delaying commitment, and they
|
| 343 |
+
concentrate reasoning into a small number of highly valuable late-stage interactions. These targeted
|
| 344 |
+
interactions are built upon longer early exploration.</p>
|
| 345 |
</section>
|
| 346 |
|
| 347 |
<!-- 2. Ranking Comparison Section -->
|
|
|
|
| 388 |
|
| 389 |
</div>
|
| 390 |
</div>
|
| 391 |
+
<p class="section-description">The ranking induced by novel insight usefulness closely aligns with the
|
| 392 |
+
ranking based on checklist accuracy. Differences between the two rankings are small, especially among
|
| 393 |
+
the top-performing models.</p>
|
| 394 |
</section>
|
| 395 |
|
| 396 |
<!-- 3. Turn Distribution Section -->
|
|
|
|
| 420 |
<div id="turn-globem" class="chart-container-tall"></div>
|
| 421 |
</div>
|
| 422 |
</div>
|
| 423 |
+
<p class="section-description">Stronger models tend to explore for more rounds without external prompting.
|
| 424 |
+
Knowledge-intensive databases such as 10-K and MIMIC induce more interaction rounds than signal-based
|
| 425 |
+
datasets such as GLOBEM, and the resulting distributions are also more uniform.</p>
|
| 426 |
</section>
|
| 427 |
|
| 428 |
<!-- 4. Entropy Analysis Section -->
|
|
|
|
| 437 |
<circle cx="7.5" cy="16.5" r="1.5" />
|
| 438 |
<circle cx="17.5" cy="14.5" r="1.5" />
|
| 439 |
</svg>
|
| 440 |
+
Exploration Pattern Analysis
|
| 441 |
</h2>
|
| 442 |
<p>Scatter plot showing Access Entropy vs Coverage by model. Opacity represents accuracy. Higher entropy
|
| 443 |
= more uniform access; Higher coverage = more fields explored.</p>
|
|
|
|
| 472 |
<div id="entropy-model-5" class="chart-container-tall"></div>
|
| 473 |
</div>
|
| 474 |
</div>
|
| 475 |
+
<p class="section-description">Advanced LLMs tend to operate in a balanced exploration regime that combines
|
| 476 |
+
adequate coverage with focused access. Such a regime is consistently observed across different
|
| 477 |
+
scenarios.</p>
|
| 478 |
</section>
|
| 479 |
|
| 480 |
<!-- 5. Error Analysis Section -->
|
|
|
|
| 496 |
<div id="error-chart" class="chart-container-double"></div>
|
| 497 |
</div>
|
| 498 |
</div>
|
| 499 |
+
<p class="section-description">Our findings revealed that 58% of errors stemmed from insufficient
|
| 500 |
+
exploration, both in terms of breadth and depth. This imbalance in exploration often leads to suboptimal
|
| 501 |
+
results, regardless of the model’s overall capability.
|
| 502 |
+
Additionally, around 40% of the errors were attributed to other factors. For more powerful models,
|
| 503 |
+
over-reasoning was common, where the model made assumptions not fully supported by the data. In other
|
| 504 |
+
cases, models misinterpreted the insights, such as mistaking a downward trend for an upward one. Less
|
| 505 |
+
capable models, on the other hand, tended to make more fundamental errors, such as repeatedly debugging
|
| 506 |
+
or struggling with missing data, which could disrupt the overall coherence of the analysis.</p>
|
| 507 |
</section>
|
| 508 |
|
| 509 |
<!-- 6. Probing Results Section -->
|
|
|
|
| 515 |
<circle cx="11" cy="11" r="8" />
|
| 516 |
<path d="m21 21-4.3-4.3" />
|
| 517 |
</svg>
|
| 518 |
+
Self-Termination
|
| 519 |
</h2>
|
| 520 |
+
<p>Analyze the willingness of models to terminate their own analysis.</p>
|
| 521 |
</div>
|
| 522 |
<div id="probing-legend" class="shared-legend"></div>
|
| 523 |
<div class="charts-grid three-col">
|
|
|
|
| 534 |
<div id="probing-10k" class="chart-container-tall"></div>
|
| 535 |
</div>
|
| 536 |
</div>
|
| 537 |
+
<p class="section-description"> Clear differences emerge across model generations. Qwen3 and Qwen3-Next
|
| 538 |
+
exhibit a consistently increasing probability, indicating growing confidence that a complete report can
|
| 539 |
+
be produced as more information is accumulated, whereas the Qwen2.5 series shows pronounced fluctuations
|
| 540 |
+
and remains uncertain about whether exploration can be terminated at the current step. Moreover,
|
| 541 |
+
Qwen3-Next maintains higher confidence with lower variance throughout, suggesting that it has more
|
| 542 |
+
confidence that exploration is progressing towards a more comprehensive and deeper report.</p>
|
| 543 |
</section>
|
| 544 |
</main>
|
| 545 |
|
styles.css
CHANGED
|
@@ -150,7 +150,7 @@ img {
|
|
| 150 |
font-size: 21px;
|
| 151 |
line-height: 1.7;
|
| 152 |
color: var(--color-text-muted);
|
| 153 |
-
max-width:
|
| 154 |
margin: 0 auto var(--space-lg);
|
| 155 |
text-align: left;
|
| 156 |
}
|
|
@@ -179,6 +179,26 @@ img {
|
|
| 179 |
font-size: 22px;
|
| 180 |
font-weight: 500;
|
| 181 |
color: var(--color-text);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
}
|
| 183 |
|
| 184 |
.meta-row.affiliations {
|
|
@@ -552,7 +572,8 @@ img {
|
|
| 552 |
line-height: 1.6;
|
| 553 |
color: var(--color-text-muted);
|
| 554 |
text-align: center;
|
| 555 |
-
margin-top:
|
|
|
|
| 556 |
max-width: 1400px;
|
| 557 |
margin-left: auto;
|
| 558 |
margin-right: auto;
|
|
@@ -849,9 +870,8 @@ footer a:hover {
|
|
| 849 |
margin: 0 auto;
|
| 850 |
background: var(--color-surface);
|
| 851 |
border-radius: var(--radius-lg);
|
| 852 |
-
box-shadow:
|
| 853 |
-
border: 1px solid rgba(0, 0, 0, 0.
|
| 854 |
-
/* Subtle border */
|
| 855 |
overflow: hidden;
|
| 856 |
/* Ensure rounded corners */
|
| 857 |
position: relative;
|
|
@@ -907,8 +927,9 @@ footer a:hover {
|
|
| 907 |
overflow-y: auto;
|
| 908 |
/* Enable vertical scroll */
|
| 909 |
padding: var(--space-md);
|
| 910 |
-
background: #
|
| 911 |
-
|
|
|
|
| 912 |
font-size: 16px;
|
| 913 |
}
|
| 914 |
|
|
|
|
| 150 |
font-size: 21px;
|
| 151 |
line-height: 1.7;
|
| 152 |
color: var(--color-text-muted);
|
| 153 |
+
max-width: 1200px;
|
| 154 |
margin: 0 auto var(--space-lg);
|
| 155 |
text-align: left;
|
| 156 |
}
|
|
|
|
| 179 |
font-size: 22px;
|
| 180 |
font-weight: 500;
|
| 181 |
color: var(--color-text);
|
| 182 |
+
margin-bottom: var(--space-xs);
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
.meta-row.authors a {
|
| 186 |
+
color: var(--color-text);
|
| 187 |
+
text-decoration: none;
|
| 188 |
+
position: relative;
|
| 189 |
+
padding: 2px 6px;
|
| 190 |
+
margin: 0 -2px;
|
| 191 |
+
border-radius: 6px;
|
| 192 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
| 193 |
+
/* 初始状态下非常淡的下划线,作为微妙的视觉暗示 */
|
| 194 |
+
border-bottom: 1.5px solid rgba(0, 113, 227, 0.1);
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.meta-row.authors a:hover {
|
| 198 |
+
color: var(--color-primary);
|
| 199 |
+
background-color: rgba(0, 113, 227, 0.05);
|
| 200 |
+
border-bottom-color: var(--color-primary);
|
| 201 |
+
transform: translateY(-1px);
|
| 202 |
}
|
| 203 |
|
| 204 |
.meta-row.affiliations {
|
|
|
|
| 572 |
line-height: 1.6;
|
| 573 |
color: var(--color-text-muted);
|
| 574 |
text-align: center;
|
| 575 |
+
margin-top: 0;
|
| 576 |
+
margin-bottom: var(--space-md);
|
| 577 |
max-width: 1400px;
|
| 578 |
margin-left: auto;
|
| 579 |
margin-right: auto;
|
|
|
|
| 870 |
margin: 0 auto;
|
| 871 |
background: var(--color-surface);
|
| 872 |
border-radius: var(--radius-lg);
|
| 873 |
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.04);
|
| 874 |
+
border: 1px solid rgba(0, 0, 0, 0.08);
|
|
|
|
| 875 |
overflow: hidden;
|
| 876 |
/* Ensure rounded corners */
|
| 877 |
position: relative;
|
|
|
|
| 927 |
overflow-y: auto;
|
| 928 |
/* Enable vertical scroll */
|
| 929 |
padding: var(--space-md);
|
| 930 |
+
background: #f4f9f4;
|
| 931 |
+
box-shadow: inset 0 1px 4px rgba(0, 0, 0, 0.02);
|
| 932 |
+
/* Soft light mint green background */
|
| 933 |
font-size: 16px;
|
| 934 |
}
|
| 935 |
|
trajectory.js
CHANGED
|
@@ -3,6 +3,12 @@
|
|
| 3 |
// AGENT TRAJECTORY - Chat Interface
|
| 4 |
// ============================================================================
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
let currentTrajScenario = 'mimic';
|
| 7 |
|
| 8 |
function initTrajectory() {
|
|
@@ -41,6 +47,13 @@ function initTrajectory() {
|
|
| 41 |
document.querySelectorAll('[data-traj-scenario]').forEach(b => b.classList.remove('active'));
|
| 42 |
btn.classList.add('active');
|
| 43 |
currentTrajScenario = btn.dataset.trajScenario;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
renderTrajectory(currentTrajScenario);
|
| 45 |
});
|
| 46 |
});
|
|
|
|
| 3 |
// AGENT TRAJECTORY - Chat Interface
|
| 4 |
// ============================================================================
|
| 5 |
|
| 6 |
+
const SCENARIO_DESCRIPTIONS = {
|
| 7 |
+
'mimic': 'Exploring clinical patterns and patient outcomes in a large-scale electronic health record (EHR) database.',
|
| 8 |
+
'10k': 'Extracting deep insights from SEC 10-K annual reports for longitudinal financial performance analysis.',
|
| 9 |
+
'globem': 'Analyzing multi-modal longitudinal behavioral and sensor data for detecting mental health trends.'
|
| 10 |
+
};
|
| 11 |
+
|
| 12 |
let currentTrajScenario = 'mimic';
|
| 13 |
|
| 14 |
function initTrajectory() {
|
|
|
|
| 47 |
document.querySelectorAll('[data-traj-scenario]').forEach(b => b.classList.remove('active'));
|
| 48 |
btn.classList.add('active');
|
| 49 |
currentTrajScenario = btn.dataset.trajScenario;
|
| 50 |
+
|
| 51 |
+
// Update description
|
| 52 |
+
const descEl = document.getElementById('trajectory-scenario-description');
|
| 53 |
+
if (descEl && SCENARIO_DESCRIPTIONS[currentTrajScenario]) {
|
| 54 |
+
descEl.textContent = SCENARIO_DESCRIPTIONS[currentTrajScenario];
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
renderTrajectory(currentTrajScenario);
|
| 58 |
});
|
| 59 |
});
|