thinkwee commited on
Commit
8c1709f
·
verified ·
1 Parent(s): 47b210f
Files changed (8) hide show
  1. .gitattributes +6 -0
  2. example.png +3 -0
  3. iit.png +3 -0
  4. iit_result.png +3 -0
  5. index.html +970 -427
  6. logo.png +3 -0
  7. overall.png +3 -0
  8. paradigm.png +3 -0
.gitattributes CHANGED
@@ -46,3 +46,9 @@ static/videos/shiba.mp4 filter=lfs diff=lfs merge=lfs -text
46
  static/videos/steve.mp4 filter=lfs diff=lfs merge=lfs -text
47
  static/videos/teaser.mp4 filter=lfs diff=lfs merge=lfs -text
48
  static/videos/toby.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
46
  static/videos/steve.mp4 filter=lfs diff=lfs merge=lfs -text
47
  static/videos/teaser.mp4 filter=lfs diff=lfs merge=lfs -text
48
  static/videos/toby.mp4 filter=lfs diff=lfs merge=lfs -text
49
+ example.png filter=lfs diff=lfs merge=lfs -text
50
+ iit_result.png filter=lfs diff=lfs merge=lfs -text
51
+ iit.png filter=lfs diff=lfs merge=lfs -text
52
+ logo.png filter=lfs diff=lfs merge=lfs -text
53
+ overall.png filter=lfs diff=lfs merge=lfs -text
54
+ paradigm.png filter=lfs diff=lfs merge=lfs -text
example.png ADDED

Git LFS Details

  • SHA256: 7648228d3e87ddad984626a599b514ed91d107676bdbbb52bf4fed1576a97d6a
  • Pointer size: 131 Bytes
  • Size of remote file: 866 kB
iit.png ADDED

Git LFS Details

  • SHA256: 44aa4e943994ae50b088cc6cf5b9b069d5dfc54ec449e2937c6b100b1df16035
  • Pointer size: 131 Bytes
  • Size of remote file: 162 kB
iit_result.png ADDED

Git LFS Details

  • SHA256: 116fc9d9dd1318831e77ab231b256f04d1b294562599dfc8ddaa7ca273541d2b
  • Pointer size: 131 Bytes
  • Size of remote file: 183 kB
index.html CHANGED
@@ -55,20 +55,330 @@
55
  z-index: -1;
56
  }
57
 
 
58
  .hero {
59
- background: white;
60
  border-radius: 12px;
61
  margin: 2rem;
62
  box-shadow: 0 4px 24px rgba(0,0,0,0.06);
63
  border: 1px solid #e5e7eb;
 
 
64
  }
65
 
66
- .section {
67
- background: white;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  margin: 2rem;
69
  border-radius: 12px;
70
  box-shadow: 0 2px 8px rgba(0,0,0,0.04);
71
  border: 1px solid #e5e7eb;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
 
74
  .publication-title {
@@ -90,6 +400,8 @@
90
  border: 1px solid #e5e7eb;
91
  margin: 1rem 0;
92
  transition: all 0.2s ease;
 
 
93
  }
94
 
95
  .glass-card:hover {
@@ -105,6 +417,8 @@
105
  border: 1px solid #e5e7eb;
106
  margin: 2rem 0;
107
  text-align: center;
 
 
108
  }
109
 
110
  .figure-placeholder {
@@ -158,6 +472,8 @@
158
  text-align: left;
159
  transition: all 0.2s ease;
160
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
 
 
161
  }
162
 
163
  .insight-card:hover {
@@ -185,6 +501,8 @@
185
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
186
  border: 1px solid #e5e7eb;
187
  margin: 2rem 0;
 
 
188
  }
189
 
190
  .comparison-table table {
@@ -229,6 +547,8 @@
229
  font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
230
  box-shadow: 0 4px 12px rgba(0,0,0,0.15);
231
  margin: 2rem 0;
 
 
232
  }
233
 
234
  .metrics-header {
@@ -254,6 +574,8 @@
254
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
255
  border: 1px solid #e5e7eb;
256
  transition: all 0.2s ease;
 
 
257
  }
258
 
259
  .method-card:hover {
@@ -278,6 +600,8 @@
278
  border-radius: 12px;
279
  margin: 2rem 0;
280
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
 
 
281
  }
282
 
283
  .stats-grid {
@@ -295,6 +619,8 @@
295
  text-align: center;
296
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
297
  transition: all 0.2s ease;
 
 
298
  }
299
 
300
  .stat-item:hover {
@@ -321,23 +647,25 @@
321
  overflow: hidden;
322
  box-shadow: 0 2px 12px rgba(0,0,0,0.08);
323
  border: 1px solid #e5e7eb;
324
- margin: 2rem 0;
325
  width: 100%;
 
 
326
  }
327
 
328
  .results-table table {
329
  width: 100%;
330
  border-collapse: collapse;
331
- font-size: 0.9rem;
332
  }
333
 
334
  .results-table th {
335
  background: #f8fafc;
336
- color: #374151;
337
  padding: 1rem 0.8rem;
338
  font-weight: 600;
339
  border-bottom: 2px solid #e5e7eb;
340
- text-align: center;
341
  position: sticky;
342
  top: 0;
343
  z-index: 10;
@@ -346,29 +674,94 @@
346
  .results-table td {
347
  padding: 0.8rem;
348
  border-bottom: 1px solid #f3f4f6;
349
- text-align: center;
350
- transition: all 0.2s ease;
351
  }
352
 
353
- .results-table tr:hover {
354
- background: #f9fafb;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
 
357
  .results-table .method-name {
358
- text-align: left;
359
  font-weight: 600;
360
  color: #1f2937;
361
- padding-left: 1rem;
362
  }
363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  .results-table .nover-row {
365
  background: #f0fdf4;
366
  border-left: 3px solid #10b981;
367
  }
368
 
369
- .results-table .nover-row:hover {
370
- background: #ecfdf5;
371
- }
372
 
373
  .results-table .best-score {
374
  color: #10b981;
@@ -376,19 +769,10 @@
376
  position: relative;
377
  }
378
 
379
- .results-table .improvement-badge {
380
- display: inline-block;
381
- background: #10b981;
382
- color: white;
383
- font-size: 0.75rem;
384
- padding: 0.2rem 0.5rem;
385
- border-radius: 12px;
386
- margin-left: 0.5rem;
387
- font-weight: 600;
388
- }
389
 
390
  .table-section {
391
- margin: 3rem 0;
392
  }
393
 
394
  .table-title {
@@ -414,29 +798,15 @@
414
  background: #f1f5f9 !important;
415
  color: #334155;
416
  font-weight: 700;
417
- text-align: left !important;
418
- padding-left: 1rem !important;
419
  }
420
 
421
- .score-cell {
422
- position: relative;
423
- cursor: pointer;
424
  }
425
 
426
- .score-cell:hover::after {
427
- content: attr(data-improvement);
428
- position: absolute;
429
- bottom: 100%;
430
- left: 50%;
431
- transform: translateX(-50%);
432
- background: #1f2937;
433
- color: white;
434
- padding: 0.5rem;
435
- border-radius: 6px;
436
- font-size: 0.8rem;
437
- white-space: nowrap;
438
- z-index: 100;
439
- opacity: 0.9;
440
  }
441
 
442
  @media (max-width: 768px) {
@@ -461,25 +831,155 @@
461
  display: block !important;
462
  }
463
 
464
- .improvement-badge {
465
- display: block;
466
- margin: 0.2rem 0;
467
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  }
469
  </style>
470
 
471
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 
 
 
 
 
 
 
 
 
 
472
  </head>
473
  <body>
474
  <div class="geometric-bg"></div>
475
 
476
  <section class="hero">
477
  <div class="hero-body">
478
- <div class="container is-max-desktop">
479
  <div class="columns is-centered">
480
  <div class="column has-text-centered">
481
  <h1 class="title is-1 publication-title">NOVER</h1>
482
- <h2 class="title is-2" style="color: #666; margin-top: -20px;">NO-VERifier Reinforcement Learning</h2>
483
  <p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p>
484
 
485
  <div class="is-size-5 publication-authors" style="margin: 2rem 0;">
@@ -491,22 +991,22 @@
491
 
492
  <div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;">
493
  <a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank"
494
- class="external-link button is-normal" style="background: #1a1a1a; color: white; border: none; border-radius: 6px;">
495
  <span class="icon"><i class="fas fa-file-pdf"></i></span>
496
  <span>Paper</span>
497
  </a>
498
  <a href="https://github.com/thinkwee/NOVER" target="_blank"
499
- class="external-link button is-normal" style="background: #10b981; color: white; border: none; border-radius: 6px;">
500
  <span class="icon"><i class="fab fa-github"></i></span>
501
  <span>Code</span>
502
  </a>
503
  <a href="#" target="_blank"
504
- class="external-link button is-normal" style="background: #ff6b35; color: white; border: none; border-radius: 6px;">
505
  <span class="icon"><i class="fas fa-database"></i></span>
506
  <span>Dataset</span>
507
  </a>
508
  <a href="#" target="_blank"
509
- class="external-link button is-normal" style="background: #3b82f6; color: white; border: none; border-radius: 6px;">
510
  <span class="icon"><i class="fas fa-cube"></i></span>
511
  <span>Model</span>
512
  </a>
@@ -518,420 +1018,470 @@
518
  </section>
519
 
520
  <section class="section">
521
- <div class="container is-max-desktop">
522
- <div class="abstract-card">
523
- <h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">Abstract</h2>
524
- <p class="is-size-5" style="color: #374151; line-height: 1.6;">
525
- <span class="nover">NOVER</span> introduces a verifier-free reinforcement learning framework that enables
526
- incentive training on any text-to-text task without external verifiers. Using reasoning perplexity as a reward proxy,
527
- it allows training reasoning models across diverse tasks where traditional rule-based verification is infeasible,
528
- from creative writing to social intelligence.
529
- </p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  </div>
531
  </div>
532
  </section>
533
 
534
  <section class="section">
535
- <div class="container is-max-desktop">
536
- <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Method & Applications</h2>
537
 
 
 
 
 
 
 
 
 
 
 
 
538
  <div class="method-comparison">
539
  <div class="method-card">
540
- <div class="method-icon"><i class="fas fa-balance-scale"></i></div>
541
- <h3 class="title is-5">RLVR</h3>
542
- <p>External verifiers required<br>Limited to verifiable tasks</p>
543
  </div>
544
  <div class="method-card">
545
  <div class="method-icon"><i class="fas fa-robot"></i></div>
546
  <h3 class="title is-5">RLHF</h3>
547
- <p>Expensive reward models<br>Human preference data</p>
 
 
 
 
 
548
  </div>
549
  <div class="method-card nover">
550
  <div class="method-icon"><i class="fas fa-brain"></i></div>
551
  <h3 class="title is-5"><span class="nover">NOVER</span></h3>
552
- <p>Reasoning perplexity proxy<br>Any text-to-text task</p>
553
  </div>
554
  </div>
555
-
556
- <div class="figure-container" style="margin-top: 3rem;">
557
- <div class="figure-placeholder experiment">
558
- <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin-bottom: 1rem;">
559
- <div style="text-align: center;">
560
- <i class="fas fa-calculator" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
561
- <div style="font-size: 0.7rem;">Math Reasoning</div>
562
- </div>
563
- <div style="text-align: center;">
564
- <i class="fas fa-pen-fancy" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
565
- <div style="font-size: 0.7rem;">Creative Writing</div>
566
- </div>
567
- <div style="text-align: center;">
568
- <i class="fas fa-users" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
569
- <div style="font-size: 0.7rem;">Social Intelligence</div>
570
- </div>
571
- <div style="text-align: center;">
572
- <i class="fas fa-globe" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
573
- <div style="font-size: 0.7rem;">Multilingual</div>
574
- </div>
575
  </div>
576
- <div>Text-to-Text Task Versatility</div>
577
- <div style="font-size: 0.85rem; margin-top: 0.5rem; opacity: 0.8;">
578
- NOVER's reward design enables reasoning training across diverse text generation tasks
579
  </div>
580
  </div>
581
- <p class="has-text-grey-dark">
582
- <strong>Universal Framework:</strong> Unlike traditional approaches requiring task-specific verifiers,
583
- NOVER's reasoning perplexity reward works across any text-to-text task where reasoning is beneficial.
584
- </p>
585
- </div>
586
- </div>
587
- </section>
588
-
589
- <section class="section">
590
- <div class="container is-max-desktop">
591
- <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Results & Analysis</h2>
592
-
593
- <div class="analysis-grid">
594
- <div class="insight-card">
595
- <div class="insight-title">Reasoning Perplexity as Proxy</div>
596
- <div class="insight-description">Using ground truth likelihood conditioned on reasoning as reward signal</div>
597
- </div>
598
- <div class="insight-card">
599
- <div class="insight-title">Policy-Proxy Synchronization</div>
600
- <div class="insight-description">Exponential smoothing prevents proxy-policy divergence and reward hacking</div>
601
  </div>
602
- <div class="insight-card">
603
- <div class="insight-title">Efficiency & Format Rewards</div>
604
- <div class="insight-description">Conditional rewards encourage concise reasoning and valid completions</div>
 
 
 
 
 
 
 
605
  </div>
606
  </div>
607
 
608
- <div class="table-section">
609
- <div class="table-title">Main Results: Overall Performance</div>
610
- <div class="results-table">
611
- <table>
612
- <thead>
613
- <tr>
614
- <th rowspan="2" style="vertical-align: middle;">Method</th>
615
- <th colspan="3">General Reasoning</th>
616
- <th>Writing</th>
617
- <th colspan="2">Social Intelligence</th>
618
- <th>Multilingual</th>
619
- <th rowspan="2" style="vertical-align: middle;">Avg.</th>
620
- </tr>
621
- <tr>
622
- <th>NR</th>
623
- <th>GT</th>
624
- <th>WI</th>
625
- <th>SGN</th>
626
- <th>EB</th>
627
- <th>TB</th>
628
- <th>OPUS</th>
629
- </tr>
630
- </thead>
631
- <tbody>
632
- <tr class="model-group-header">
633
- <td colspan="9"><strong>Qwen2.5-3B</strong></td>
634
- </tr>
635
- <tr>
636
- <td class="method-name">Base Model</td>
637
- <td class="score-cell">21.80%</td>
638
- <td class="score-cell">43.10%</td>
639
- <td class="score-cell">18.40%</td>
640
- <td class="score-cell">18.70%</td>
641
- <td class="score-cell">32.03%</td>
642
- <td class="score-cell">46.79%</td>
643
- <td class="score-cell">16.70%</td>
644
- <td class="score-cell">28.22%</td>
645
- </tr>
646
- <tr>
647
- <td class="method-name">+ CoT</td>
648
- <td class="score-cell">24.40%</td>
649
- <td class="score-cell">48.90%</td>
650
- <td class="score-cell">24.20%</td>
651
- <td class="score-cell">14.76%</td>
652
- <td class="score-cell">28.12%</td>
653
- <td class="score-cell">51.23%</td>
654
- <td class="score-cell">1.40%</td>
655
- <td class="score-cell">27.57%</td>
656
- </tr>
657
- <tr>
658
- <td class="method-name">+ SFT</td>
659
- <td class="score-cell">27.00%</td>
660
- <td class="score-cell">36.20%</td>
661
- <td class="score-cell">27.30%</td>
662
- <td class="score-cell">20.08%</td>
663
- <td class="score-cell">36.72%</td>
664
- <td class="score-cell">48.66%</td>
665
- <td class="score-cell">17.30%</td>
666
- <td class="score-cell">30.47%</td>
667
- </tr>
668
- <tr class="nover-row">
669
- <td class="method-name"><strong>+ NOVER</strong></td>
670
- <td class="score-cell best-score">28.60%</td>
671
- <td class="score-cell best-score">60.30%</td>
672
- <td class="score-cell best-score">28.10%</td>
673
- <td class="score-cell best-score">41.64%</td>
674
- <td class="score-cell best-score">38.28%</td>
675
- <td class="score-cell best-score">57.88%</td>
676
- <td class="score-cell best-score">20.70%</td>
677
- <td class="score-cell best-score">39.36%<span class="improvement-badge">+31.4%</span></td>
678
- </tr>
679
- <tr class="model-group-header">
680
- <td colspan="9"><strong>Qwen2.5-7B</strong></td>
681
- </tr>
682
- <tr>
683
- <td class="method-name">Base Model</td>
684
- <td class="score-cell">31.80%</td>
685
- <td class="score-cell">48.50%</td>
686
- <td class="score-cell">20.70%</td>
687
- <td class="score-cell">24.21%</td>
688
- <td class="score-cell">28.91%</td>
689
- <td class="score-cell">44.22%</td>
690
- <td class="score-cell">19.30%</td>
691
- <td class="score-cell">31.09%</td>
692
- </tr>
693
- <tr>
694
- <td class="method-name">+ CoT</td>
695
- <td class="score-cell">31.20%</td>
696
- <td class="score-cell">57.60%</td>
697
- <td class="score-cell">29.20%</td>
698
- <td class="score-cell">33.46%</td>
699
- <td class="score-cell">38.28%</td>
700
- <td class="score-cell">50.99%</td>
701
- <td class="score-cell">1.60%</td>
702
- <td class="score-cell">34.62%</td>
703
- </tr>
704
- <tr>
705
- <td class="method-name">+ SFT</td>
706
- <td class="score-cell">27.50%</td>
707
- <td class="score-cell">45.20%</td>
708
- <td class="score-cell">33.50%</td>
709
- <td class="score-cell">37.85%</td>
710
- <td class="score-cell">47.66%</td>
711
- <td class="score-cell">57.06%</td>
712
- <td class="score-cell">23.30%</td>
713
- <td class="score-cell">38.87%</td>
714
- </tr>
715
- <tr class="nover-row">
716
- <td class="method-name"><strong>+ NOVER</strong></td>
717
- <td class="score-cell best-score">38.20%</td>
718
- <td class="score-cell best-score">61.80%</td>
719
- <td class="score-cell best-score">36.60%</td>
720
- <td class="score-cell best-score">50.79%</td>
721
- <td class="score-cell best-score">49.22%</td>
722
- <td class="score-cell best-score">67.79%</td>
723
- <td class="score-cell best-score">26.80%</td>
724
- <td class="score-cell best-score">47.31%<span class="improvement-badge">+52.2%</span></td>
725
- </tr>
726
- <tr>
727
- <td class="method-name">Qwen2.5-3B-Instruct</td>
728
- <td class="score-cell">27.10%</td>
729
- <td class="score-cell">50.00%</td>
730
- <td class="score-cell">31.50%</td>
731
- <td class="score-cell">21.25%</td>
732
- <td class="score-cell">40.62%</td>
733
- <td class="score-cell">58.69%</td>
734
- <td class="score-cell">19.90%</td>
735
- <td class="score-cell">35.58%</td>
736
- </tr>
737
- <tr>
738
- <td class="method-name">Qwen2.5-7B-Instruct</td>
739
- <td class="score-cell">29.90%</td>
740
- <td class="score-cell">56.20%</td>
741
- <td class="score-cell">35.60%</td>
742
- <td class="score-cell">67.72%</td>
743
- <td class="score-cell">46.88%</td>
744
- <td class="score-cell">65.23%</td>
745
- <td class="score-cell">23.50%</td>
746
- <td class="score-cell">46.43%</td>
747
- </tr>
748
- <tr>
749
- <td class="method-name">R1-Distill-Qwen-7B</td>
750
- <td class="score-cell">41.00%</td>
751
- <td class="score-cell">60.20%</td>
752
- <td class="score-cell">38.00%</td>
753
- <td class="score-cell">40.16%</td>
754
- <td class="score-cell">35.16%</td>
755
- <td class="score-cell">54.61%</td>
756
- <td class="score-cell">8.20%</td>
757
- <td class="score-cell">39.62%</td>
758
- </tr>
759
- </tbody>
760
- </table>
761
- </div>
762
- <div class="table-caption">
763
- <strong>Table 1:</strong> Overall performance across diverse text-to-text tasks. NOVER achieves significant improvements over base models and competitive methods.
764
- <strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN,
765
- <strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION.
766
- </div>
767
- </div>
768
  </div>
769
  </section>
770
 
771
  <section class="section">
772
- <div class="container is-max-desktop">
773
- <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Key Technical Insights</h2>
774
 
775
- <div class="figure-container">
776
- <div class="figure-placeholder analysis">
777
- <div style="display: flex; justify-content: space-around; margin-bottom: 1rem;">
778
- <div style="text-align: center;">
779
- <div style="width: 50px; height: 50px; border-radius: 50%; background: #10b981; display: flex; align-items: center; justify-content: center; margin: 0 auto 0.5rem;">
780
- <i class="fas fa-brain" style="color: white; font-size: 1.2rem;"></i>
781
- </div>
782
- <div style="font-size: 0.8rem;">Reasoning Proxy</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  </div>
784
- <div style="text-align: center;">
785
- <div style="width: 50px; height: 50px; border-radius: 50%; background: #3b82f6; display: flex; align-items: center; justify-content: center; margin: 0 auto 0.5rem;">
786
- <i class="fas fa-sync-alt" style="color: white; font-size: 1.2rem;"></i>
787
- </div>
788
- <div style="font-size: 0.8rem;">Policy Sync</div>
789
- </div>
790
- <div style="text-align: center;">
791
- <div style="width: 50px; height: 50px; border-radius: 50%; background: #f59e0b; display: flex; align-items: center; justify-content: center; margin: 0 auto 0.5rem;">
792
- <i class="fas fa-shield-alt" style="color: white; font-size: 1.2rem;"></i>
793
- </div>
794
- <div style="font-size: 0.8rem;">Stability</div>
795
  </div>
796
  </div>
797
- <div>Core Components: Reasoning Perplexity, Synchronization & Stability</div>
798
- <div style="font-size: 0.85rem; margin-top: 0.5rem; opacity: 0.8;">
799
- How NOVER prevents reward hacking while enabling reasoning across diverse tasks
800
- </div>
801
- </div>
802
- <p class="has-text-grey-dark">
803
- <strong>Technical Innovation:</strong> NOVER combines reasoning perplexity as reward proxy with policy-proxy
804
- synchronization to prevent reward hacking, enabling stable training across any text-to-text task.
805
- </p>
806
- </div>
807
-
808
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; margin-top: 3rem;">
809
- <div class="table-section">
810
- <div class="table-title" style="font-size: 1.2rem;">FANToM: False Premise Tasks</div>
811
- <div class="results-table">
812
- <table>
813
- <thead>
814
- <tr>
815
- <th>Method</th>
816
- <th>3B Model</th>
817
- <th>7B Model</th>
818
- </tr>
819
- </thead>
820
- <tbody>
821
- <tr>
822
- <td class="method-name">Base</td>
823
- <td class="score-cell">12.43%</td>
824
- <td class="score-cell">14.59%</td>
825
- </tr>
826
- <tr>
827
- <td class="method-name">+ CoT</td>
828
- <td class="score-cell">14.23%</td>
829
- <td class="score-cell">19.28%</td>
830
- </tr>
831
- <tr>
832
- <td class="method-name">+ SFT</td>
833
- <td class="score-cell">26.49%</td>
834
- <td class="score-cell">29.73%</td>
835
- </tr>
836
- <tr class="nover-row">
837
- <td class="method-name"><strong>+ NOVER</strong></td>
838
- <td class="score-cell best-score">18.74%</td>
839
- <td class="score-cell best-score">23.42%</td>
840
- </tr>
841
- </tbody>
842
- </table>
843
- </div>
844
- <div class="table-caption" style="font-size: 0.8rem;">
845
- <strong>Table 2:</strong> Theory of mind tasks with false premise problems. NOVER shows balanced performance.
846
- </div>
847
  </div>
848
 
849
- <div class="table-section">
850
- <div class="table-title" style="font-size: 1.2rem;">Verifier Comparison</div>
851
- <div class="results-table">
852
- <table>
853
- <thead>
854
- <tr>
855
- <th>Group</th>
856
- <th>Method</th>
857
- <th>3B</th>
858
- <th>7B</th>
859
- </tr>
860
- </thead>
861
- <tbody>
862
- <tr class="model-group-header">
863
- <td rowspan="3">Baselines</td>
864
- <td class="method-name">Base</td>
865
- <td class="score-cell">18.40%</td>
866
- <td class="score-cell">20.70%</td>
867
- </tr>
868
- <tr>
869
- <td class="method-name">+ CoT</td>
870
- <td class="score-cell">24.20%</td>
871
- <td class="score-cell">29.20%</td>
872
- </tr>
873
- <tr>
874
- <td class="method-name">+ SFT</td>
875
- <td class="score-cell">27.30%</td>
876
- <td class="score-cell">33.50%</td>
877
- </tr>
878
- <tr class="model-group-header">
879
- <td rowspan="3">Model as<br>Verifier</td>
880
- <td class="method-name">+ GV</td>
881
- <td class="score-cell">18.30%</td>
882
- <td class="score-cell">30.00%</td>
883
- </tr>
884
- <tr>
885
- <td class="method-name">+ LJ</td>
886
- <td class="score-cell">21.40%</td>
887
- <td class="score-cell">3.80%</td>
888
- </tr>
889
- <tr>
890
- <td class="method-name">+ LJ_S</td>
891
- <td class="score-cell">--</td>
892
- <td class="score-cell">21.60%</td>
893
- </tr>
894
- <tr class="nover-row">
895
- <td>Verifier-Free</td>
896
- <td class="method-name"><strong>+ NOVER</strong></td>
897
- <td class="score-cell best-score">28.10%</td>
898
- <td class="score-cell best-score">36.60%</td>
899
- </tr>
900
- </tbody>
901
- </table>
902
- </div>
903
- <div class="table-caption" style="font-size: 0.8rem;">
904
- <strong>Table 3:</strong> WebInstruct experiments comparing LLM-as-a-Judge (LJ) and general verifier (GV) approaches.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905
  </div>
906
  </div>
907
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908
  </div>
909
  </section>
910
 
911
  <section class="section">
912
- <div class="container is-max-desktop">
913
  <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2>
 
 
 
 
 
 
 
 
 
914
 
915
  <div class="glass-card">
916
  <div style="text-align: center;">
917
- <div class="figure-placeholder analysis" style="margin-bottom: 2rem;">
918
  <div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;">
919
  <div style="text-align: center;">
920
- <i class="fas fa-fish" style="font-size: 2rem; margin-bottom: 0.5rem;"></i>
921
- <div style="font-size: 0.8rem;">Standard Training</div>
922
  </div>
923
  <div style="font-size: 1.5rem;">→</div>
924
  <div style="text-align: center;">
925
- <i class="fas fa-graduation-cap" style="font-size: 2rem; margin-bottom: 0.5rem;"></i>
926
- <div style="font-size: 0.8rem;">Process Reward</div>
927
  </div>
928
  </div>
929
- <div>Teaching Models "How to Fish" Rather Than Giving Them Fish</div>
930
- </div>
931
- <p style="color: #6b7280; line-height: 1.6;">
932
- <strong>Novel Paradigm:</strong> Inverse incentive training rewards the reasoning process itself,
933
- leading to enhanced creativity and thoughtfulness in open-ended tasks.
934
- </p>
935
  </div>
936
  </div>
937
  </div>
@@ -940,7 +1490,7 @@
940
 
941
 
942
  <section class="section" id="BibTeX">
943
- <div class="container is-max-desktop">
944
  <div class="glass-card">
945
  <h2 class="title is-3">Citation</h2>
946
  <pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover,
@@ -957,14 +1507,7 @@
957
  <div class="container has-text-centered">
958
  <div class="content">
959
  <div style="margin-bottom: 2rem;">
960
- <a class="icon-link" target="_blank" href="https://arxiv.org/pdf/2505.16022.pdf"
961
- style="margin: 0 1rem; font-size: 2rem; color: #1a1a1a;">
962
- <i class="fas fa-file-pdf"></i>
963
- </a>
964
- <a class="icon-link" href="https://github.com/thinkwee/NOVER" target="_blank"
965
- style="margin: 0 1rem; font-size: 2rem; color: #10b981;">
966
- <i class="fab fa-github"></i>
967
- </a>
968
  </div>
969
  <p style="color: #6b7280;">
970
  Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a>
@@ -974,4 +1517,4 @@
974
  </footer>
975
 
976
  </body>
977
- </html>
 
55
  z-index: -1;
56
  }
57
 
58
+ /* Hero Section - Brain/Neural Network Pattern */
59
  .hero {
60
+ background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
61
  border-radius: 12px;
62
  margin: 2rem;
63
  box-shadow: 0 4px 24px rgba(0,0,0,0.06);
64
  border: 1px solid #e5e7eb;
65
+ position: relative;
66
+ overflow: hidden;
67
  }
68
 
69
+ .hero::before {
70
+ content: '';
71
+ position: absolute;
72
+ top: 0;
73
+ left: 0;
74
+ width: 100%;
75
+ height: 100%;
76
+ background-image:
77
+ repeating-linear-gradient(
78
+ 45deg,
79
+ transparent,
80
+ transparent 40px,
81
+ rgba(16, 185, 129, 0.04) 40px,
82
+ rgba(16, 185, 129, 0.04) 80px
83
+ ),
84
+ repeating-linear-gradient(
85
+ -45deg,
86
+ transparent,
87
+ transparent 40px,
88
+ rgba(59, 130, 246, 0.03) 40px,
89
+ rgba(59, 130, 246, 0.03) 80px
90
+ );
91
+ background-size: 80px 80px, 80px 80px;
92
+ background-position: 0 0, 40px 40px;
93
+ opacity: 0.3;
94
+ pointer-events: none;
95
+ z-index: 0;
96
+ }
97
+
98
+ /* Abstract Section - Circuit/Technology Pattern */
99
+ .section:nth-of-type(1) {
100
+ background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
101
+ margin: 2rem;
102
+ border-radius: 12px;
103
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
104
+ border: 1px solid #e5e7eb;
105
+ position: relative;
106
+ overflow: hidden;
107
+ }
108
+
109
+ .section:nth-of-type(1)::before {
110
+ content: '';
111
+ position: absolute;
112
+ top: 0;
113
+ left: 0;
114
+ width: 100%;
115
+ height: 100%;
116
+ background-image:
117
+ repeating-linear-gradient(
118
+ 90deg,
119
+ transparent,
120
+ transparent 30px,
121
+ rgba(59, 130, 246, 0.06) 30px,
122
+ rgba(59, 130, 246, 0.06) 60px
123
+ ),
124
+ repeating-linear-gradient(
125
+ 0deg,
126
+ transparent,
127
+ transparent 30px,
128
+ rgba(59, 130, 246, 0.06) 30px,
129
+ rgba(59, 130, 246, 0.06) 60px
130
+ );
131
+ background-size: 60px 60px, 60px 60px;
132
+ background-position: 0 0, 30px 30px;
133
+ opacity: 0.3;
134
+ pointer-events: none;
135
+ z-index: 0;
136
+ }
137
+
138
+ /* Incentivize Reasoning Section - Puzzle/Logic Pattern */
139
+ .section:nth-of-type(2) {
140
+ background: linear-gradient(135deg, #ffffff 0%, #f0fdf4 100%);
141
+ margin: 2rem;
142
+ border-radius: 12px;
143
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
144
+ border: 1px solid #e5e7eb;
145
+ position: relative;
146
+ overflow: hidden;
147
+ }
148
+
149
+ .section:nth-of-type(2)::before {
150
+ content: '';
151
+ position: absolute;
152
+ top: 0;
153
+ left: 0;
154
+ width: 100%;
155
+ height: 100%;
156
+ background-image:
157
+ repeating-linear-gradient(
158
+ 45deg,
159
+ transparent,
160
+ transparent 50px,
161
+ rgba(16, 185, 129, 0.07) 50px,
162
+ rgba(16, 185, 129, 0.07) 100px
163
+ ),
164
+ repeating-linear-gradient(
165
+ -45deg,
166
+ transparent,
167
+ transparent 50px,
168
+ rgba(16, 185, 129, 0.05) 50px,
169
+ rgba(16, 185, 129, 0.05) 100px
170
+ );
171
+ background-size: 100px 100px, 100px 100px;
172
+ background-position: 0 0, 50px 50px;
173
+ opacity: 0.3;
174
+ pointer-events: none;
175
+ z-index: 0;
176
+ }
177
+
178
+ /* NOVER Methodology Section - Mathematical/Formula Pattern */
179
+ .section:nth-of-type(3) {
180
+ background: linear-gradient(135deg, #ffffff 0%, #fefce8 100%);
181
+ margin: 2rem;
182
+ border-radius: 12px;
183
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
184
+ border: 1px solid #e5e7eb;
185
+ position: relative;
186
+ overflow: hidden;
187
+ }
188
+
189
+ .section:nth-of-type(3)::before {
190
+ content: '';
191
+ position: absolute;
192
+ top: 0;
193
+ left: 0;
194
+ width: 100%;
195
+ height: 100%;
196
+ background-image:
197
+ repeating-linear-gradient(
198
+ 30deg,
199
+ transparent,
200
+ transparent 40px,
201
+ rgba(245, 158, 11, 0.06) 40px,
202
+ rgba(245, 158, 11, 0.06) 80px
203
+ ),
204
+ repeating-linear-gradient(
205
+ -30deg,
206
+ transparent,
207
+ transparent 40px,
208
+ rgba(245, 158, 11, 0.05) 40px,
209
+ rgba(245, 158, 11, 0.05) 80px
210
+ );
211
+ background-size: 80px 80px, 80px 80px;
212
+ background-position: 0 0, 40px 40px;
213
+ opacity: 0.3;
214
+ pointer-events: none;
215
+ z-index: 0;
216
+ }
217
+
218
+ /* Experimental Results Section - Data/Chart Pattern */
219
+ .section:nth-of-type(4) {
220
+ background: linear-gradient(135deg, #ffffff 0%, #fef2f2 100%);
221
+ margin: 2rem;
222
+ border-radius: 12px;
223
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
224
+ border: 1px solid #e5e7eb;
225
+ position: relative;
226
+ overflow: hidden;
227
+ }
228
+
229
+ .section:nth-of-type(4)::before {
230
+ content: '';
231
+ position: absolute;
232
+ top: 0;
233
+ left: 0;
234
+ width: 100%;
235
+ height: 100%;
236
+ background-image:
237
+ repeating-linear-gradient(
238
+ 0deg,
239
+ transparent,
240
+ transparent 35px,
241
+ rgba(239, 68, 68, 0.06) 35px,
242
+ rgba(239, 68, 68, 0.06) 70px
243
+ ),
244
+ repeating-linear-gradient(
245
+ 90deg,
246
+ transparent,
247
+ transparent 35px,
248
+ rgba(239, 68, 68, 0.05) 35px,
249
+ rgba(239, 68, 68, 0.05) 70px
250
+ );
251
+ background-size: 70px 70px, 70px 70px;
252
+ background-position: 0 0, 35px 35px;
253
+ opacity: 0.3;
254
+ pointer-events: none;
255
+ z-index: 0;
256
+ }
257
+
258
+ /* Inverse Incentive Training Section - Fish/Teaching Pattern */
259
+ .section:nth-of-type(5) {
260
+ background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
261
  margin: 2rem;
262
  border-radius: 12px;
263
  box-shadow: 0 2px 8px rgba(0,0,0,0.04);
264
  border: 1px solid #e5e7eb;
265
+ position: relative;
266
+ overflow: hidden;
267
+ }
268
+
269
+ .section:nth-of-type(5)::before {
270
+ content: '';
271
+ position: absolute;
272
+ top: 0;
273
+ left: 0;
274
+ width: 100%;
275
+ height: 100%;
276
+ background-image:
277
+ repeating-linear-gradient(
278
+ 60deg,
279
+ transparent,
280
+ transparent 60px,
281
+ rgba(14, 165, 233, 0.07) 60px,
282
+ rgba(14, 165, 233, 0.07) 120px
283
+ ),
284
+ repeating-linear-gradient(
285
+ -60deg,
286
+ transparent,
287
+ transparent 60px,
288
+ rgba(14, 165, 233, 0.05) 60px,
289
+ rgba(14, 165, 233, 0.05) 120px
290
+ );
291
+ background-size: 120px 120px, 120px 120px;
292
+ background-position: 0 0, 60px 60px;
293
+ opacity: 0.3;
294
+ pointer-events: none;
295
+ z-index: 0;
296
+ }
297
+
298
+ /* Citation Section - Book/Paper Pattern */
299
+ .section:nth-of-type(6) {
300
+ background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
301
+ margin: 2rem;
302
+ border-radius: 12px;
303
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
304
+ border: 1px solid #e5e7eb;
305
+ position: relative;
306
+ overflow: hidden;
307
+ }
308
+
309
+ .section:nth-of-type(6)::before {
310
+ content: '';
311
+ position: absolute;
312
+ top: 0;
313
+ left: 0;
314
+ width: 100%;
315
+ height: 100%;
316
+ background-image:
317
+ repeating-linear-gradient(
318
+ 25deg,
319
+ transparent,
320
+ transparent 45px,
321
+ rgba(107, 114, 128, 0.06) 45px,
322
+ rgba(107, 114, 128, 0.06) 90px
323
+ ),
324
+ repeating-linear-gradient(
325
+ -25deg,
326
+ transparent,
327
+ transparent 45px,
328
+ rgba(107, 114, 128, 0.05) 45px,
329
+ rgba(107, 114, 128, 0.05) 90px
330
+ );
331
+ background-size: 90px 90px, 90px 90px;
332
+ background-position: 0 0, 45px 45px;
333
+ opacity: 0.3;
334
+ pointer-events: none;
335
+ z-index: 0;
336
+ }
337
+
338
+ /* Footer Section - Social/Network Pattern */
339
+ footer.section {
340
+ background: linear-gradient(135deg, #ffffff 0%, #f9fafb 100%);
341
+ border-top: 1px solid #e5e7eb;
342
+ margin-top: 4rem;
343
+ position: relative;
344
+ overflow: hidden;
345
+ }
346
+
347
+ footer.section::before {
348
+ content: '';
349
+ position: absolute;
350
+ top: 0;
351
+ left: 0;
352
+ width: 100%;
353
+ height: 100%;
354
+ background-image:
355
+ repeating-linear-gradient(
356
+ 45deg,
357
+ transparent,
358
+ transparent 80px,
359
+ rgba(16, 185, 129, 0.06) 80px,
360
+ rgba(16, 185, 129, 0.06) 160px
361
+ ),
362
+ repeating-linear-gradient(
363
+ -45deg,
364
+ transparent,
365
+ transparent 80px,
366
+ rgba(59, 130, 246, 0.05) 80px,
367
+ rgba(59, 130, 246, 0.05) 160px
368
+ );
369
+ background-size: 160px 160px, 160px 160px;
370
+ background-position: 0 0, 80px 80px;
371
+ opacity: 0.3;
372
+ pointer-events: none;
373
+ z-index: 0;
374
+ }
375
+
376
+ /* Ensure content is above patterns */
377
+ .hero-body,
378
+ .section .container,
379
+ footer .container {
380
+ position: relative;
381
+ z-index: 1;
382
  }
383
 
384
  .publication-title {
 
400
  border: 1px solid #e5e7eb;
401
  margin: 1rem 0;
402
  transition: all 0.2s ease;
403
+ position: relative;
404
+ z-index: 1;
405
  }
406
 
407
  .glass-card:hover {
 
417
  border: 1px solid #e5e7eb;
418
  margin: 2rem 0;
419
  text-align: center;
420
+ position: relative;
421
+ z-index: 1;
422
  }
423
 
424
  .figure-placeholder {
 
472
  text-align: left;
473
  transition: all 0.2s ease;
474
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
475
+ position: relative;
476
+ z-index: 1;
477
  }
478
 
479
  .insight-card:hover {
 
501
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
502
  border: 1px solid #e5e7eb;
503
  margin: 2rem 0;
504
+ position: relative;
505
+ z-index: 1;
506
  }
507
 
508
  .comparison-table table {
 
547
  font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
548
  box-shadow: 0 4px 12px rgba(0,0,0,0.15);
549
  margin: 2rem 0;
550
+ position: relative;
551
+ z-index: 1;
552
  }
553
 
554
  .metrics-header {
 
574
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
575
  border: 1px solid #e5e7eb;
576
  transition: all 0.2s ease;
577
+ position: relative;
578
+ z-index: 1;
579
  }
580
 
581
  .method-card:hover {
 
600
  border-radius: 12px;
601
  margin: 2rem 0;
602
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
603
+ position: relative;
604
+ z-index: 1;
605
  }
606
 
607
  .stats-grid {
 
619
  text-align: center;
620
  box-shadow: 0 1px 3px rgba(0,0,0,0.1);
621
  transition: all 0.2s ease;
622
+ position: relative;
623
+ z-index: 1;
624
  }
625
 
626
  .stat-item:hover {
 
647
  overflow: hidden;
648
  box-shadow: 0 2px 12px rgba(0,0,0,0.08);
649
  border: 1px solid #e5e7eb;
650
+ margin: 0;
651
  width: 100%;
652
+ position: relative;
653
+ z-index: 1;
654
  }
655
 
656
  .results-table table {
657
  width: 100%;
658
  border-collapse: collapse;
659
+ font-size: 0.8rem;
660
  }
661
 
662
  .results-table th {
663
  background: #f8fafc;
664
+ color: #374155;
665
  padding: 1rem 0.8rem;
666
  font-weight: 600;
667
  border-bottom: 2px solid #e5e7eb;
668
+ text-align: center !important;
669
  position: sticky;
670
  top: 0;
671
  z-index: 10;
 
674
  .results-table td {
675
  padding: 0.8rem;
676
  border-bottom: 1px solid #f3f4f6;
677
+ text-align: center !important;
 
678
  }
679
 
680
+ /* 微调表格高度 - 为Table 1增加行高 */
681
+ .results-table.table-1 td {
682
+ padding: 0.8rem;
683
+ line-height: 0.9;
684
+ }
685
+
686
+ .results-table.table-1 th {
687
+ padding: 1.2rem 0.8rem;
688
+ }
689
+
690
+ /* 微调表格高度 - 为Table 2减少行高 */
691
+ .results-table.table-2 td {
692
+ padding: 0.8rem 0.8rem;
693
+ line-height: 1.3;
694
+ }
695
+
696
+ .results-table.table-2 th {
697
+ padding: 0.8rem 0.8rem;
698
+ }
699
+
700
+ /* 调整模型组标题的行高 */
701
+ .results-table.table-2 .model-group-header td {
702
+ padding: 0.8rem 0.6rem;
703
+ line-height: 1.3;
704
+ }
705
+
706
+ /* 进一步微调表格间距 */
707
+ .results-table.table-1 tbody tr {
708
+ height: 48px;
709
+ }
710
+
711
+ .results-table.table-2 tbody tr {
712
+ height: 42px;
713
+ }
714
+
715
+ /* 调整表格标题间距 */
716
+ .table-1 + .table-caption {
717
+ margin-top: 1.5rem;
718
+ }
719
+
720
+ .table-2 + .table-caption {
721
+ margin-top: 1rem;
722
  }
723
 
724
  .results-table .method-name {
725
+ text-align: center !important;
726
  font-weight: 600;
727
  color: #1f2937;
 
728
  }
729
 
730
+ /* 控制Table 2列宽度的CSS */
731
+ .results-table .model-type-column {
732
+ width: 100px;
733
+ min-width: 100px;
734
+ max-width: 100px;
735
+ }
736
+
737
+ .results-table .model-name-column {
738
+ width: 100px;
739
+ min-width: 100px;
740
+ max-width: 100px;
741
+ }
742
+
743
+ .results-table .method-column {
744
+ width: 100px;
745
+ min-width: 100px;
746
+ max-width: 100px;
747
+ }
748
+
749
+ .results-table .metric-column {
750
+ width: 80px;
751
+ min-width: 80px;
752
+ max-width: 80px;
753
+ }
754
+
755
+
756
+
757
+
758
+
759
  .results-table .nover-row {
760
  background: #f0fdf4;
761
  border-left: 3px solid #10b981;
762
  }
763
 
764
+
 
 
765
 
766
  .results-table .best-score {
767
  color: #10b981;
 
769
  position: relative;
770
  }
771
 
772
+
 
 
 
 
 
 
 
 
 
773
 
774
  .table-section {
775
+ margin: 0;
776
  }
777
 
778
  .table-title {
 
798
  background: #f1f5f9 !important;
799
  color: #334155;
800
  font-weight: 700;
801
+ text-align: center !important;
 
802
  }
803
 
804
+ .model-group-header td {
805
+ text-align: center !important;
 
806
  }
807
 
808
+ .score-cell {
809
+ position: relative;
 
 
 
 
 
 
 
 
 
 
 
 
810
  }
811
 
812
  @media (max-width: 768px) {
 
831
  display: block !important;
832
  }
833
 
834
+
835
+ }
836
+
837
+ .formula-container {
838
+ background: #f8fafc;
839
+ border: 1px solid #e2e8f0;
840
+ border-radius: 12px;
841
+ padding: 2rem;
842
+ margin: 2rem 0;
843
+ text-align: center;
844
+ position: relative;
845
+ z-index: 1;
846
+ }
847
+
848
+ .formula-container::before {
849
+ content: '🧮';
850
+ position: absolute;
851
+ top: 1rem;
852
+ left: 1rem;
853
+ font-size: 1.2rem;
854
+ }
855
+
856
+ .formula-title {
857
+ font-size: 1.1rem;
858
+ font-weight: 600;
859
+ color: #334155;
860
+ margin-bottom: 1rem;
861
+ }
862
+
863
+ .formula-description {
864
+ font-size: 0.9rem;
865
+ color: #64748b;
866
+ margin-top: 1rem;
867
+ line-height: 1.5;
868
+ }
869
+
870
+ .diagram-grid {
871
+ display: grid;
872
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
873
+ gap: 2rem;
874
+ margin: 3rem 0;
875
+ }
876
+
877
+ .diagram-card {
878
+ background: white;
879
+ border-radius: 12px;
880
+ padding: 2rem;
881
+ box-shadow: 0 2px 8px rgba(0,0,0,0.06);
882
+ border: 1px solid #e5e7eb;
883
+ text-align: center;
884
+ transition: all 0.3s ease;
885
+ position: relative;
886
+ z-index: 1;
887
+ }
888
+
889
+ .diagram-card:hover {
890
+ transform: translateY(-2px);
891
+ box-shadow: 0 8px 24px rgba(0,0,0,0.12);
892
+ }
893
+
894
+ .diagram-placeholder {
895
+ height: 200px;
896
+ background: #f1f5f9;
897
+ border: 2px dashed #94a3b8;
898
+ border-radius: 8px;
899
+ display: flex;
900
+ flex-direction: column;
901
+ align-items: center;
902
+ justify-content: center;
903
+ margin-bottom: 1rem;
904
+ transition: all 0.3s ease;
905
+ }
906
+
907
+ .diagram-placeholder:hover {
908
+ background: #e2e8f0;
909
+ border-color: #64748b;
910
+ }
911
+
912
+ .diagram-icon {
913
+ font-size: 3rem;
914
+ color: #64748b;
915
+ margin-bottom: 0.5rem;
916
+ }
917
+
918
+ .diagram-label {
919
+ font-size: 0.9rem;
920
+ color: #475569;
921
+ font-weight: 500;
922
+ }
923
+
924
+ .diagram-description {
925
+ font-size: 0.85rem;
926
+ color: #64748b;
927
+ line-height: 1.4;
928
+ }
929
+
930
+ /* Apple Design Button Styles */
931
+ .external-link.button {
932
+ position: relative;
933
+ overflow: hidden;
934
+ }
935
+
936
+ .external-link.button::before {
937
+ content: '';
938
+ position: absolute;
939
+ top: 0;
940
+ left: -100%;
941
+ width: 100%;
942
+ height: 100%;
943
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
944
+ transition: left 0.5s;
945
+ }
946
+
947
+ .external-link.button:hover {
948
+ transform: translateY(-2px);
949
+ box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15), 0 4px 8px rgba(0, 0, 0, 0.1);
950
+ }
951
+
952
+ .external-link.button:hover::before {
953
+ left: 100%;
954
+ }
955
+
956
+ .external-link.button:active {
957
+ transform: translateY(0);
958
+ transition: transform 0.1s;
959
  }
960
  </style>
961
 
962
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
963
+ <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
964
+ <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
965
+ <script>
966
+ window.MathJax = {
967
+ tex: {
968
+ inlineMath: [['$', '$'], ['\\(', '\\)']],
969
+ displayMath: [['$$', '$$'], ['\\[', '\\]']]
970
+ }
971
+ };
972
+ </script>
973
  </head>
974
  <body>
975
  <div class="geometric-bg"></div>
976
 
977
  <section class="hero">
978
  <div class="hero-body">
979
+ <div class="container is-widescreen">
980
  <div class="columns is-centered">
981
  <div class="column has-text-centered">
982
  <h1 class="title is-1 publication-title">NOVER</h1>
 
983
  <p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p>
984
 
985
  <div class="is-size-5 publication-authors" style="margin: 2rem 0;">
 
991
 
992
  <div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;">
993
  <a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank"
994
+ class="external-link button is-normal" style="background: linear-gradient(135deg, #B31B1B 0%, #D32F2F 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(179, 27, 27, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
995
  <span class="icon"><i class="fas fa-file-pdf"></i></span>
996
  <span>Paper</span>
997
  </a>
998
  <a href="https://github.com/thinkwee/NOVER" target="_blank"
999
+ class="external-link button is-normal" style="background: linear-gradient(135deg, #24292e 0%, #2f363d 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(36, 41, 46, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
1000
  <span class="icon"><i class="fab fa-github"></i></span>
1001
  <span>Code</span>
1002
  </a>
1003
  <a href="#" target="_blank"
1004
+ class="external-link button is-normal" style="background: linear-gradient(135deg, #FFD43B 0%, #FFE066 100%); color: #000; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(255, 212, 59, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
1005
  <span class="icon"><i class="fas fa-database"></i></span>
1006
  <span>Dataset</span>
1007
  </a>
1008
  <a href="#" target="_blank"
1009
+ class="external-link button is-normal" style="background: linear-gradient(135deg, #0EA5E9 0%, #38BDF8 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(14, 165, 233, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
1010
  <span class="icon"><i class="fas fa-cube"></i></span>
1011
  <span>Model</span>
1012
  </a>
 
1018
  </section>
1019
 
1020
  <section class="section">
1021
+ <div class="container is-widescreen">
1022
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; align-items: start;">
1023
+ <!-- Abstract on the left -->
1024
+ <div class="abstract-card">
1025
+ <h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">TL;DR</h2>
1026
+ <p class="is-size-5" style="color: #374151; line-height: 1.6;">
1027
+ <span class="nover">NOVER</span> (NO-Verifier Reinforcement Learning) enables
1028
+ incentive training on any text-to-text task without external verifiers. It utilizes policy model's reasoning perplexity to estimate the reward.
1029
+ <br>
1030
+ <br>
1031
+ <strong>• Your LLM is secretly a verifier.
1032
+ <br>
1033
+ • Your LLM only reason on <s>Easy-to-Verify</s> tasks.
1034
+ <br>
1035
+ • Your LLM can <s>reason</s> on ANY tasks.
1036
+ <br>
1037
+ • Your LLM can be incentivized to do more than reasoning.</strong>
1038
+ <br>
1039
+ </p>
1040
+ </div>
1041
+
1042
+ <!-- Overall framework image placeholder on the right -->
1043
+ <div class="figure-container">
1044
+ <div class="figure-placeholder" style="height: 280px; display: flex; flex-direction: column; justify-content: center; align-items: center;">
1045
+ <img src="logo.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
1046
+ </div>
1047
+ <div style="font-size: 1.1rem; color: #2e3036; text-align: center; margin-top: 0.5rem;">
1048
+ <div><span class="nover">NOVER</span> extends RLVR on any text-to-text task</div>
1049
+ <div>beyond easy-to-verify math/coding problems.</div>
1050
+ </div>
1051
+ </div>
1052
+ </div>
1053
+ </div>
1054
+ </section>
1055
+
1056
+ <section class="section">
1057
+ <div class="container is-widescreen">
1058
+ <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Incentivize Reasoning on Any Task</h2>
1059
+ <p class="is-size-5" style="color: #6b7280; margin-bottom: 3rem; max-width: 800px; margin-left: auto; margin-right: auto;">
1060
+ NOVER enables training large reasoning models on any text data and any task.<br>
1061
+ NO verifiers/models/rules needed, just ground truth answer, and policy model itself.<br>
1062
+ <strong>General Reasoning:</strong> ⚛️ physics • ⚖️ law • 🏥 medical • 💰 finance<br>
1063
+ <strong>Creative Tasks:</strong> 🎨 creative writing<br>
1064
+ <strong>Social Intelligence:</strong> 🧠 theory of mind • 😊 emotion detection • 🤝 social reasoning<br>
1065
+ <strong>Nautral Language Generation:</strong> 🌍 translation • 📚 summarization
1066
+ </p>
1067
+
1068
+ <div class="figure-container">
1069
+ <img src="example.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
1070
  </div>
1071
  </div>
1072
  </section>
1073
 
1074
  <section class="section">
1075
+ <div class="container is-widescreen">
1076
+ <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">NOVER Methodology</h2>
1077
 
1078
+ <!-- Image Placeholders Row -->
1079
+ <div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
1080
+ <div style="width: 600px; height: 420px;">
1081
+ <img src="paradigm.png" alt="paradigm" style="width: 100%; height: 100%; object-fit: contain;">
1082
+ </div>
1083
+ <div style="width: 600px; height: 420px;">
1084
+ <img src="overall.png" alt="overall" style="width: 100%; height: 100%; object-fit: contain;">
1085
+ </div>
1086
+ </div>
1087
+
1088
+ <!-- Core Framework Comparison -->
1089
  <div class="method-comparison">
1090
  <div class="method-card">
1091
+ <div class="method-icon"><i class="fas fa-graduation-cap"></i></div>
1092
+ <h3 class="title is-5">SFT</h3>
1093
+ <p>Memorize Input-Output Patterns</p>
1094
  </div>
1095
  <div class="method-card">
1096
  <div class="method-icon"><i class="fas fa-robot"></i></div>
1097
  <h3 class="title is-5">RLHF</h3>
1098
+ <p>Train Reward Model <br>Give Preference Feedback</p>
1099
+ </div>
1100
+ <div class="method-card">
1101
+ <div class="method-icon"><i class="fas fa-balance-scale"></i></div>
1102
+ <h3 class="title is-5">RLVR</h3>
1103
+ <p>Rule-based Reward <br>End2End Outcome RL</p>
1104
  </div>
1105
  <div class="method-card nover">
1106
  <div class="method-icon"><i class="fas fa-brain"></i></div>
1107
  <h3 class="title is-5"><span class="nover">NOVER</span></h3>
1108
+ <p>Reasoning Perplexity as Reward<br>Reason on Any Task</p>
1109
  </div>
1110
  </div>
1111
+
1112
+ <!-- Consolidated Mathematical Formulations -->
1113
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 2rem; margin: 3rem 0;">
1114
+ <!-- Reasoning Perplexity -->
1115
+ <div class="formula-container">
1116
+ <div class="formula-title">Reasoning Perplexity</div>
1117
+ <div style="font-size: 0.9rem; margin: 1rem 0;">
1118
+ $P_r(p, t, g) = \exp\left(-\frac{\sum_{i=1}^{|g|} \log \pi_{p}(g_i \mid p, t, g_{&lt;i})}{|g| \cdot N(|t|)}\right)$
 
 
 
 
 
 
 
 
 
 
 
 
1119
  </div>
1120
+ <div class="formula-description">
1121
+ Use perplexity of policy model on ground truth conditioned on reasoning trajectory as reward proxy
 
1122
  </div>
1123
  </div>
1124
+
1125
+ <!-- Rewards -->
1126
+ <div class="formula-container">
1127
+ <div class="formula-title">Rewards</div>
1128
+ <div style="font-size: 1.1rem; margin: 1rem 0;">
1129
+ $$R_{\mathrm{total}} = w_{\mathrm{f}} R_{\mathrm{f}} + \mathbb{I}(R_{\mathrm{f}} = 1) \cdot (w_{\mathrm{r}} R_{\mathrm{r}} + w_{\mathrm{e}} R_{\mathrm{e}})$$
1130
+ </div>
1131
+ <div class="formula-description">
1132
+ Combined reward function incorporating reasoning, efficiency, and format components
1133
+ </div>
 
 
 
 
 
 
 
 
 
 
1134
  </div>
1135
+
1136
+ <!-- Policy-Proxy Synchronization -->
1137
+ <div class="formula-container">
1138
+ <div class="formula-title">Policy-Proxy Synchronization</div>
1139
+ <div style="font-size: 1.1rem; margin: 1rem 0;">
1140
+ $$\pi_{\mathrm{p}} \leftarrow \alpha \cdot \pi_{\mathrm{p}} + (1-\alpha) \cdot \pi_{\theta}$$
1141
+ </div>
1142
+ <div class="formula-description">
1143
+ Smooth synchronization between policy and proxy ensures stable training with limited resource
1144
+ </div>
1145
  </div>
1146
  </div>
1147
 
1148
+
1149
+
1150
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1151
  </div>
1152
  </section>
1153
 
1154
  <section class="section">
1155
+ <div class="container is-widescreen">
1156
+ <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Experimental Results</h2>
1157
 
1158
+ <!-- Table 1 and Table 2 in two columns -->
1159
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; margin-top: 3rem; align-items: start;">
1160
+ <!-- Table 1 on the left -->
1161
+ <div>
1162
+ <h3 class="table-title">Overall on NOVEReason Dataset</h3>
1163
+ <!-- Main Results Table -->
1164
+ <div class="table-section">
1165
+ <div class="results-table table-1">
1166
+ <table>
1167
+ <thead>
1168
+ <tr>
1169
+ <th>Method</th>
1170
+ <th>NR</th>
1171
+ <th>GT</th>
1172
+ <th>WI</th>
1173
+ <th>SGN</th>
1174
+ <th>EB</th>
1175
+ <th>TB</th>
1176
+ <th>OPUS</th>
1177
+ </tr>
1178
+ </thead>
1179
+ <tbody>
1180
+ <tr class="model-group-header">
1181
+ <td colspan="8"><strong>Qwen2.5-3B</strong></td>
1182
+ </tr>
1183
+ <tr>
1184
+ <td class="method-name">Base</td>
1185
+ <td class="score-cell">21.80%</td>
1186
+ <td class="score-cell">43.10%</td>
1187
+ <td class="score-cell">18.40%</td>
1188
+ <td class="score-cell">18.70%</td>
1189
+ <td class="score-cell">32.03%</td>
1190
+ <td class="score-cell">46.79%</td>
1191
+ <td class="score-cell">16.70%</td>
1192
+ </tr>
1193
+ <tr>
1194
+ <td class="method-name">+ CoT</td>
1195
+ <td class="score-cell">24.40%</td>
1196
+ <td class="score-cell">48.90%</td>
1197
+ <td class="score-cell">24.20%</td>
1198
+ <td class="score-cell">14.76%</td>
1199
+ <td class="score-cell">28.12%</td>
1200
+ <td class="score-cell">51.23%</td>
1201
+ <td class="score-cell">1.40%</td>
1202
+ </tr>
1203
+ <tr>
1204
+ <td class="method-name">+ SFT</td>
1205
+ <td class="score-cell">27.00%</td>
1206
+ <td class="score-cell">36.20%</td>
1207
+ <td class="score-cell">27.30%</td>
1208
+ <td class="score-cell">20.08%</td>
1209
+ <td class="score-cell">36.72%</td>
1210
+ <td class="score-cell">48.66%</td>
1211
+ <td class="score-cell">17.30%</td>
1212
+ </tr>
1213
+ <tr class="nover-row">
1214
+ <td class="method-name"><strong>+ NOVER</strong></td>
1215
+ <td class="score-cell best-score">28.60%</td>
1216
+ <td class="score-cell best-score">60.30%</td>
1217
+ <td class="score-cell best-score">28.10%</td>
1218
+ <td class="score-cell best-score">41.64%</td>
1219
+ <td class="score-cell best-score">38.28%</td>
1220
+ <td class="score-cell best-score">57.88%</td>
1221
+ <td class="score-cell best-score">20.70%</td>
1222
+ </tr>
1223
+ <tr class="model-group-header">
1224
+ <td colspan="8"><strong>Qwen2.5-7B</strong></td>
1225
+ </tr>
1226
+ <tr>
1227
+ <td class="method-name">Base</td>
1228
+ <td class="score-cell">31.80%</td>
1229
+ <td class="score-cell">48.50%</td>
1230
+ <td class="score-cell">20.70%</td>
1231
+ <td class="score-cell">24.21%</td>
1232
+ <td class="score-cell">28.91%</td>
1233
+ <td class="score-cell">44.22%</td>
1234
+ <td class="score-cell">19.30%</td>
1235
+ </tr>
1236
+ <tr>
1237
+ <td class="method-name">+ CoT</td>
1238
+ <td class="score-cell">31.20%</td>
1239
+ <td class="score-cell">57.60%</td>
1240
+ <td class="score-cell">29.20%</td>
1241
+ <td class="score-cell">33.46%</td>
1242
+ <td class="score-cell">38.28%</td>
1243
+ <td class="score-cell">50.99%</td>
1244
+ <td class="score-cell">1.60%</td>
1245
+ </tr>
1246
+ <tr>
1247
+ <td class="method-name">+ SFT</td>
1248
+ <td class="score-cell">27.50%</td>
1249
+ <td class="score-cell">45.20%</td>
1250
+ <td class="score-cell">33.50%</td>
1251
+ <td class="score-cell">37.85%</td>
1252
+ <td class="score-cell">47.66%</td>
1253
+ <td class="score-cell">57.06%</td>
1254
+ <td class="score-cell">23.30%</td>
1255
+ </tr>
1256
+ <tr class="nover-row">
1257
+ <td class="method-name"><strong>+ NOVER</strong></td>
1258
+ <td class="score-cell best-score">38.20%</td>
1259
+ <td class="score-cell best-score">61.80%</td>
1260
+ <td class="score-cell best-score">36.60%</td>
1261
+ <td class="score-cell best-score">50.79%</td>
1262
+ <td class="score-cell best-score">49.22%</td>
1263
+ <td class="score-cell best-score">67.79%</td>
1264
+ <td class="score-cell best-score">26.80%</td>
1265
+ </tr>
1266
+ <tr class="model-group-header">
1267
+ <td colspan="8"><strong>Other Baselines</strong></td>
1268
+ </tr>
1269
+ <tr>
1270
+ <td class="method-name">Qwen2.5-3B-Instruct</td>
1271
+ <td class="score-cell">27.10%</td>
1272
+ <td class="score-cell">50.00%</td>
1273
+ <td class="score-cell">31.50%</td>
1274
+ <td class="score-cell">21.25%</td>
1275
+ <td class="score-cell">40.62%</td>
1276
+ <td class="score-cell">58.69%</td>
1277
+ <td class="score-cell">19.90%</td>
1278
+ </tr>
1279
+ <tr>
1280
+ <td class="method-name">Qwen2.5-7B-Instruct</td>
1281
+ <td class="score-cell">29.90%</td>
1282
+ <td class="score-cell">56.20%</td>
1283
+ <td class="score-cell">35.60%</td>
1284
+ <td class="score-cell">67.72%</td>
1285
+ <td class="score-cell">46.88%</td>
1286
+ <td class="score-cell">65.23%</td>
1287
+ <td class="score-cell">23.50%</td>
1288
+ </tr>
1289
+ <tr>
1290
+ <td class="method-name">R1-Distill-Qwen-7B</td>
1291
+ <td class="score-cell">41.00%</td>
1292
+ <td class="score-cell">60.20%</td>
1293
+ <td class="score-cell">38.00%</td>
1294
+ <td class="score-cell">40.16%</td>
1295
+ <td class="score-cell">35.16%</td>
1296
+ <td class="score-cell">54.61%</td>
1297
+ <td class="score-cell">8.20%</td>
1298
+ </tr>
1299
+ </tbody>
1300
+ </table>
1301
  </div>
1302
+ <div class="table-caption">
1303
+ <strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN,
1304
+ <strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION.
 
 
 
 
 
 
 
 
1305
  </div>
1306
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1307
  </div>
1308
 
1309
+ <!-- Table 2 on the right -->
1310
+ <div>
1311
+ <h3 class="table-title">General Reasoning with Different Backends</h3>
1312
+ <div class="table-section">
1313
+ <div class="results-table table-2">
1314
+ <table>
1315
+ <thead>
1316
+ <tr>
1317
+ <th class="model-type-column">Model Type</th>
1318
+ <th class="model-name-column">Model</th>
1319
+ <th class="method-column">Method</th>
1320
+ <th class="metric-column">NR</th>
1321
+ <th class="metric-column">GT</th>
1322
+ <th class="metric-column">WI</th>
1323
+ </tr>
1324
+ </thead>
1325
+ <tbody>
1326
+ <tr class="model-group-header">
1327
+ <td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Base</td>
1328
+ <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen2.5 3B</td>
1329
+ <td class="method-name">Base</td>
1330
+ <td class="score-cell">21.80%</td>
1331
+ <td class="score-cell">43.10%</td>
1332
+ <td class="score-cell">18.40%</td>
1333
+ </tr>
1334
+ <tr>
1335
+ <td class="method-name">+ CoT</td>
1336
+ <td class="score-cell">24.40%</td>
1337
+ <td class="score-cell">48.90%</td>
1338
+ <td class="score-cell">24.20%</td>
1339
+ </tr>
1340
+ <tr>
1341
+ <td class="method-name">+ SFT</td>
1342
+ <td class="score-cell">27.00%</td>
1343
+ <td class="score-cell">36.20%</td>
1344
+ <td class="score-cell">27.30%</td>
1345
+ </tr>
1346
+ <tr class="nover-row">
1347
+ <td class="method-name"><strong>+ NOVER</strong></td>
1348
+ <td class="score-cell best-score"><strong>28.60%</strong></td>
1349
+ <td class="score-cell best-score"><strong>60.30%</strong></td>
1350
+ <td class="score-cell best-score"><strong>28.10%</strong></td>
1351
+ </tr>
1352
+ <tr class="model-group-header">
1353
+ <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen 2.5 7B</td>
1354
+ <td class="method-name">Base</td>
1355
+ <td class="score-cell">31.80%</td>
1356
+ <td class="score-cell">48.50%</td>
1357
+ <td class="score-cell">20.70%</td>
1358
+ </tr>
1359
+ <tr>
1360
+ <td class="method-name">+ CoT</td>
1361
+ <td class="score-cell">31.20%</td>
1362
+ <td class="score-cell">57.60%</td>
1363
+ <td class="score-cell">29.20%</td>
1364
+ </tr>
1365
+ <tr>
1366
+ <td class="method-name">+ SFT</td>
1367
+ <td class="score-cell">27.50%</td>
1368
+ <td class="score-cell">45.20%</td>
1369
+ <td class="score-cell">33.50%</td>
1370
+ </tr>
1371
+ <tr class="nover-row">
1372
+ <td class="method-name"><strong>+ NOVER</strong></td>
1373
+ <td class="score-cell best-score"><strong>38.20%</strong></td>
1374
+ <td class="score-cell best-score"><strong>61.80%</strong></td>
1375
+ <td class="score-cell best-score"><strong>36.60%</strong></td>
1376
+ </tr>
1377
+ <tr class="model-group-header">
1378
+ <td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Instruct</td>
1379
+ <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Llama-3.1-8B</td>
1380
+ <td class="method-name">Base</td>
1381
+ <td class="score-cell">34.20%</td>
1382
+ <td class="score-cell">36.70%</td>
1383
+ <td class="score-cell">29.90%</td>
1384
+ </tr>
1385
+ <tr>
1386
+ <td class="method-name">+ CoT</td>
1387
+ <td class="score-cell">28.10%</td>
1388
+ <td class="score-cell">35.10%</td>
1389
+ <td class="score-cell">30.00%</td>
1390
+ </tr>
1391
+ <tr>
1392
+ <td class="method-name">+ SFT</td>
1393
+ <td class="score-cell">23.60%</td>
1394
+ <td class="score-cell">23.40%</td>
1395
+ <td class="score-cell best-score"><strong>34.50%</strong></td>
1396
+ </tr>
1397
+ <tr class="nover-row">
1398
+ <td class="method-name"><strong>+ NOVER</strong></td>
1399
+ <td class="score-cell best-score"><strong>40.70%</strong></td>
1400
+ <td class="score-cell best-score"><strong>41.50%</strong></td>
1401
+ <td class="score-cell">34.00%</td>
1402
+ </tr>
1403
+ <tr class="model-group-header">
1404
+ <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Mistral-7B</td>
1405
+ <td class="method-name">Base</td>
1406
+ <td class="score-cell best-score"><strong>33.00%</strong></td>
1407
+ <td class="score-cell">17.80%</td>
1408
+ <td class="score-cell">27.00%</td>
1409
+ </tr>
1410
+ <tr>
1411
+ <td class="method-name">+ CoT</td>
1412
+ <td class="score-cell">29.20%</td>
1413
+ <td class="score-cell">18.60%</td>
1414
+ <td class="score-cell">27.10%</td>
1415
+ </tr>
1416
+ <tr>
1417
+ <td class="method-name">+ SFT</td>
1418
+ <td class="score-cell">22.50%</td>
1419
+ <td class="score-cell">20.70%</td>
1420
+ <td class="score-cell">27.80%</td>
1421
+ </tr>
1422
+ <tr class="nover-row">
1423
+ <td class="method-name"><strong>+ NOVER</strong></td>
1424
+ <td class="score-cell">32.20%</td>
1425
+ <td class="score-cell best-score"><strong>21.90%</strong></td>
1426
+ <td class="score-cell best-score"><strong>29.30%</strong></td>
1427
+ </tr>
1428
+ </tbody>
1429
+ </table>
1430
+ </div>
1431
+ <div class="table-caption">
1432
+ <strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct.
1433
  </div>
1434
  </div>
1435
  </div>
1436
+ </div>
1437
+
1438
+ <!-- Key Takeaways below the tables - full width -->
1439
+ <div style="margin-top: 3rem;">
1440
+ <div class="glass-card">
1441
+ <h3 class="title is-4" style="color: #1a1a1a; margin-bottom: 1.5rem;">Key Takeaways</h3>
1442
+ <ul style="color: #374151; line-height: 1.8; font-size: 0.9rem;">
1443
+ <li>• NOVER trains successfully on both pretrained and instruct models, with larger gains on stronger base models</li>
1444
+ <li>• Despite the free-form nature of answers, NOVER still prefer objective solutions instead of subjective ones</li>
1445
+ <li>• On general reasoning, NOVER inherits base model boundaries, which have been observed in math reasoning. It struggles on false-premise tasks like FANToM</li>
1446
+ <li>• NOVER's design prevent reward hacking, avoiding issues such as reasoning explosion and collapse</li>
1447
+ <li>• Unlike closed-source or verifier-based rewards that suffer from cold start and hacking risks, NOVER remains stable</li>
1448
+ <li>• Its dense reward signals allow greater error tolerance and encourage diverse reasoning patterns</li>
1449
+ </ul>
1450
+ </div>
1451
+ </div>
1452
+
1453
+
1454
+
1455
  </div>
1456
  </section>
1457
 
1458
  <section class="section">
1459
+ <div class="container is-widescreen">
1460
  <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2>
1461
+
1462
+ <div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
1463
+ <div style="width: 600px; height: 420px;">
1464
+ <img src="iit.png" alt="iit" style="width: 100%; height: 100%; object-fit: contain;">
1465
+ </div>
1466
+ <div style="width: 600px; height: 420px;">
1467
+ <img src="iit_result.png" alt="iit_result" style="width: 100%; height: 100%; object-fit: contain;">
1468
+ </div>
1469
+ </div>
1470
 
1471
  <div class="glass-card">
1472
  <div style="text-align: center;">
 
1473
  <div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;">
1474
  <div style="text-align: center;">
1475
+ <i class="fas fa-fish" style="font-size: 3rem; margin-bottom: 0.5rem; color: #0e41a8;"></i>
1476
+ <div style="font-size: 1.0rem; color: #0e41a8;">Reward the Outcome, Incentivize Process</div>
1477
  </div>
1478
  <div style="font-size: 1.5rem;">→</div>
1479
  <div style="text-align: center;">
1480
+ <i class="fas fa-graduation-cap" style="font-size: 3rem; margin-bottom: 0.5rem; color: #d736d2;"></i>
1481
+ <div style="font-size: 1.0rem; color: #d736d2;">Write Rubrics in the Outcome, Process as Result</div>
1482
  </div>
1483
  </div>
1484
+ <div style="font-size: 1.2rem; color: #000000;">Teaching Models "How to Fish" Rather Than Giving Them Fish</div>
 
 
 
 
 
1485
  </div>
1486
  </div>
1487
  </div>
 
1490
 
1491
 
1492
  <section class="section" id="BibTeX">
1493
+ <div class="container is-widescreen">
1494
  <div class="glass-card">
1495
  <h2 class="title is-3">Citation</h2>
1496
  <pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover,
 
1507
  <div class="container has-text-centered">
1508
  <div class="content">
1509
  <div style="margin-bottom: 2rem;">
1510
+ <p>Find me on <a href="https://thinkwee.top/about" target="_blank" style="color: #10b981;">thinkwee.top/about</a>, with other interesting works on LLM Agent🤖, NLP and more~</p>
 
 
 
 
 
 
 
1511
  </div>
1512
  <p style="color: #6b7280;">
1513
  Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a>
 
1517
  </footer>
1518
 
1519
  </body>
1520
+ </html>
logo.png ADDED

Git LFS Details

  • SHA256: 664d2a9d8119f4331c61bfdf0a0edb6572a1a954b37b10780ea57899299b6327
  • Pointer size: 132 Bytes
  • Size of remote file: 1.29 MB
overall.png ADDED

Git LFS Details

  • SHA256: 4281cf1760e20e5a434eaad9723481d28c81be24efca3cfbbb1a730f1666e0b2
  • Pointer size: 131 Bytes
  • Size of remote file: 173 kB
paradigm.png ADDED

Git LFS Details

  • SHA256: 7994994301154cbdd0233f66561db1582399202e372c186911bcfae516010bc3
  • Pointer size: 131 Bytes
  • Size of remote file: 188 kB