Spaces:

thinkwee
/

NOVER

Running

App Files Files Community

thinkwee commited on Aug 20, 2025

Commit

8c1709f

verified ·

1 Parent(s): 47b210f

init

Browse files

Files changed (8) hide show

.gitattributes +6 -0
example.png +3 -0
iit.png +3 -0
iit_result.png +3 -0
index.html +970 -427
logo.png +3 -0
overall.png +3 -0
paradigm.png +3 -0

.gitattributes CHANGED Viewed

@@ -46,3 +46,9 @@ static/videos/shiba.mp4 filter=lfs diff=lfs merge=lfs -text
 static/videos/steve.mp4 filter=lfs diff=lfs merge=lfs -text
 static/videos/teaser.mp4 filter=lfs diff=lfs merge=lfs -text
 static/videos/toby.mp4 filter=lfs diff=lfs merge=lfs -text

 static/videos/steve.mp4 filter=lfs diff=lfs merge=lfs -text
 static/videos/teaser.mp4 filter=lfs diff=lfs merge=lfs -text
 static/videos/toby.mp4 filter=lfs diff=lfs merge=lfs -text
+example.png filter=lfs diff=lfs merge=lfs -text
+iit_result.png filter=lfs diff=lfs merge=lfs -text
+iit.png filter=lfs diff=lfs merge=lfs -text
+logo.png filter=lfs diff=lfs merge=lfs -text
+overall.png filter=lfs diff=lfs merge=lfs -text
+paradigm.png filter=lfs diff=lfs merge=lfs -text

example.png ADDED Viewed

Git LFS Details

SHA256: 7648228d3e87ddad984626a599b514ed91d107676bdbbb52bf4fed1576a97d6a
Pointer size: 131 Bytes
Size of remote file: 866 kB

iit.png ADDED Viewed

Git LFS Details

SHA256: 44aa4e943994ae50b088cc6cf5b9b069d5dfc54ec449e2937c6b100b1df16035
Pointer size: 131 Bytes
Size of remote file: 162 kB

iit_result.png ADDED Viewed

Git LFS Details

SHA256: 116fc9d9dd1318831e77ab231b256f04d1b294562599dfc8ddaa7ca273541d2b
Pointer size: 131 Bytes
Size of remote file: 183 kB

index.html CHANGED Viewed

@@ -55,20 +55,330 @@
       z-index: -1;
     }
     .hero {
-      background: white;
       border-radius: 12px;
       margin: 2rem;
       box-shadow: 0 4px 24px rgba(0,0,0,0.06);
       border: 1px solid #e5e7eb;
     }
-    .section {
-      background: white;
       margin: 2rem;
       border-radius: 12px;
       box-shadow: 0 2px 8px rgba(0,0,0,0.04);
       border: 1px solid #e5e7eb;
     }
     .publication-title {
@@ -90,6 +400,8 @@
       border: 1px solid #e5e7eb;
       margin: 1rem 0;
       transition: all 0.2s ease;
     }
     .glass-card:hover {
@@ -105,6 +417,8 @@
       border: 1px solid #e5e7eb;
       margin: 2rem 0;
       text-align: center;
     }
     .figure-placeholder {
@@ -158,6 +472,8 @@
       text-align: left;
       transition: all 0.2s ease;
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
     }
     .insight-card:hover {
@@ -185,6 +501,8 @@
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
       border: 1px solid #e5e7eb;
       margin: 2rem 0;
     }
     .comparison-table table {
@@ -229,6 +547,8 @@
       font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
       box-shadow: 0 4px 12px rgba(0,0,0,0.15);
       margin: 2rem 0;
     }
     .metrics-header {
@@ -254,6 +574,8 @@
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
       border: 1px solid #e5e7eb;
       transition: all 0.2s ease;
     }
     .method-card:hover {
@@ -278,6 +600,8 @@
       border-radius: 12px;
       margin: 2rem 0;
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
     }
     .stats-grid {
@@ -295,6 +619,8 @@
       text-align: center;
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
       transition: all 0.2s ease;
     }
     .stat-item:hover {
@@ -321,23 +647,25 @@
       overflow: hidden;
       box-shadow: 0 2px 12px rgba(0,0,0,0.08);
       border: 1px solid #e5e7eb;
-      margin: 2rem 0;
       width: 100%;
     }
     .results-table table {
       width: 100%;
       border-collapse: collapse;
-      font-size: 0.9rem;
     }
     .results-table th {
       background: #f8fafc;
-      color: #374151;
       padding: 1rem 0.8rem;
       font-weight: 600;
       border-bottom: 2px solid #e5e7eb;
-      text-align: center;
       position: sticky;
       top: 0;
       z-index: 10;
@@ -346,29 +674,94 @@
     .results-table td {
       padding: 0.8rem;
       border-bottom: 1px solid #f3f4f6;
-      text-align: center;
-      transition: all 0.2s ease;
     }
-    .results-table tr:hover {
-      background: #f9fafb;
     }
     .results-table .method-name {
-      text-align: left;
       font-weight: 600;
       color: #1f2937;
-      padding-left: 1rem;
     }
     .results-table .nover-row {
       background: #f0fdf4;
       border-left: 3px solid #10b981;
     }
-    .results-table .nover-row:hover {
-      background: #ecfdf5;
-    }
     .results-table .best-score {
       color: #10b981;
@@ -376,19 +769,10 @@
       position: relative;
     }
-    .results-table .improvement-badge {
-      display: inline-block;
-      background: #10b981;
-      color: white;
-      font-size: 0.75rem;
-      padding: 0.2rem 0.5rem;
-      border-radius: 12px;
-      margin-left: 0.5rem;
-      font-weight: 600;
-    }
     .table-section {
-      margin: 3rem 0;
     }
     .table-title {
@@ -414,29 +798,15 @@
       background: #f1f5f9 !important;
       color: #334155;
       font-weight: 700;
-      text-align: left !important;
-      padding-left: 1rem !important;
     }
-    .score-cell {
-      position: relative;
-      cursor: pointer;
     }
-    .score-cell:hover::after {
-      content: attr(data-improvement);
-      position: absolute;
-      bottom: 100%;
-      left: 50%;
-      transform: translateX(-50%);
-      background: #1f2937;
-      color: white;
-      padding: 0.5rem;
-      border-radius: 6px;
-      font-size: 0.8rem;
-      white-space: nowrap;
-      z-index: 100;
-      opacity: 0.9;
     }
     @media (max-width: 768px) {
@@ -461,25 +831,155 @@
         display: block !important;
       }
-      .improvement-badge {
-        display: block;
-        margin: 0.2rem 0;
-      }
     }
   </style>
   <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 </head>
 <body>
   <div class="geometric-bg"></div>
 <section class="hero">
   <div class="hero-body">
-    <div class="container is-max-desktop">
       <div class="columns is-centered">
         <div class="column has-text-centered">
           <h1 class="title is-1 publication-title">NOVER</h1>
-          <h2 class="title is-2" style="color: #666; margin-top: -20px;">NO-VERifier Reinforcement Learning</h2>
           <p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p>
           <div class="is-size-5 publication-authors" style="margin: 2rem 0;">
@@ -491,22 +991,22 @@
           <div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;">
             <a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank"
-               class="external-link button is-normal" style="background: #1a1a1a; color: white; border: none; border-radius: 6px;">
               <span class="icon"><i class="fas fa-file-pdf"></i></span>
               <span>Paper</span>
             </a>
             <a href="https://github.com/thinkwee/NOVER" target="_blank"
-               class="external-link button is-normal" style="background: #10b981; color: white; border: none; border-radius: 6px;">
               <span class="icon"><i class="fab fa-github"></i></span>
               <span>Code</span>
             </a>
             <a href="#" target="_blank"
-               class="external-link button is-normal" style="background: #ff6b35; color: white; border: none; border-radius: 6px;">
               <span class="icon"><i class="fas fa-database"></i></span>
               <span>Dataset</span>
             </a>
             <a href="#" target="_blank"
-               class="external-link button is-normal" style="background: #3b82f6; color: white; border: none; border-radius: 6px;">
               <span class="icon"><i class="fas fa-cube"></i></span>
               <span>Model</span>
             </a>
@@ -518,420 +1018,470 @@
 </section>
 <section class="section">
-  <div class="container is-max-desktop">
-    <div class="abstract-card">
-      <h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">Abstract</h2>
-      <p class="is-size-5" style="color: #374151; line-height: 1.6;">
-        <span class="nover">NOVER</span> introduces a verifier-free reinforcement learning framework that enables
-        incentive training on any text-to-text task without external verifiers. Using reasoning perplexity as a reward proxy,
-        it allows training reasoning models across diverse tasks where traditional rule-based verification is infeasible,
-        from creative writing to social intelligence.
-      </p>
     </div>
   </div>
 </section>
 <section class="section">
-  <div class="container is-max-desktop">
-    <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Method & Applications</h2>
     <div class="method-comparison">
       <div class="method-card">
-        <div class="method-icon"><i class="fas fa-balance-scale"></i></div>
-        <h3 class="title is-5">RLVR</h3>
-        <p>External verifiers required<br>Limited to verifiable tasks</p>
       </div>
       <div class="method-card">
         <div class="method-icon"><i class="fas fa-robot"></i></div>
         <h3 class="title is-5">RLHF</h3>
-        <p>Expensive reward models<br>Human preference data</p>
       </div>
       <div class="method-card nover">
         <div class="method-icon"><i class="fas fa-brain"></i></div>
         <h3 class="title is-5"><span class="nover">NOVER</span></h3>
-        <p>Reasoning perplexity proxy<br>Any text-to-text task</p>
       </div>
     </div>
-    <div class="figure-container" style="margin-top: 3rem;">
-      <div class="figure-placeholder experiment">
-        <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin-bottom: 1rem;">
-          <div style="text-align: center;">
-            <i class="fas fa-calculator" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
-            <div style="font-size: 0.7rem;">Math Reasoning</div>
-          </div>
-          <div style="text-align: center;">
-            <i class="fas fa-pen-fancy" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
-            <div style="font-size: 0.7rem;">Creative Writing</div>
-          </div>
-          <div style="text-align: center;">
-            <i class="fas fa-users" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
-            <div style="font-size: 0.7rem;">Social Intelligence</div>
-          </div>
-          <div style="text-align: center;">
-            <i class="fas fa-globe" style="font-size: 1.5rem; margin-bottom: 0.3rem;"></i>
-            <div style="font-size: 0.7rem;">Multilingual</div>
-          </div>
         </div>
-        <div>Text-to-Text Task Versatility</div>
-        <div style="font-size: 0.85rem; margin-top: 0.5rem; opacity: 0.8;">
-          NOVER's reward design enables reasoning training across diverse text generation tasks
         </div>
       </div>
-      <p class="has-text-grey-dark">
-        <strong>Universal Framework:</strong> Unlike traditional approaches requiring task-specific verifiers,
-        NOVER's reasoning perplexity reward works across any text-to-text task where reasoning is beneficial.
-      </p>
-    </div>
-  </div>
-</section>
-<section class="section">
-  <div class="container is-max-desktop">
-    <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Results & Analysis</h2>
-    <div class="analysis-grid">
-      <div class="insight-card">
-        <div class="insight-title">Reasoning Perplexity as Proxy</div>
-        <div class="insight-description">Using ground truth likelihood conditioned on reasoning as reward signal</div>
-      </div>
-      <div class="insight-card">
-        <div class="insight-title">Policy-Proxy Synchronization</div>
-        <div class="insight-description">Exponential smoothing prevents proxy-policy divergence and reward hacking</div>
       </div>
-      <div class="insight-card">
-        <div class="insight-title">Efficiency & Format Rewards</div>
-        <div class="insight-description">Conditional rewards encourage concise reasoning and valid completions</div>
       </div>
     </div>
-    <div class="table-section">
-      <div class="table-title">Main Results: Overall Performance</div>
-      <div class="results-table">
-        <table>
-          <thead>
-            <tr>
-              <th rowspan="2" style="vertical-align: middle;">Method</th>
-              <th colspan="3">General Reasoning</th>
-              <th>Writing</th>
-              <th colspan="2">Social Intelligence</th>
-              <th>Multilingual</th>
-              <th rowspan="2" style="vertical-align: middle;">Avg.</th>
-            </tr>
-            <tr>
-              <th>NR</th>
-              <th>GT</th>
-              <th>WI</th>
-              <th>SGN</th>
-              <th>EB</th>
-              <th>TB</th>
-              <th>OPUS</th>
-            </tr>
-          </thead>
-          <tbody>
-            <tr class="model-group-header">
-              <td colspan="9"><strong>Qwen2.5-3B</strong></td>
-            </tr>
-            <tr>
-              <td class="method-name">Base Model</td>
-              <td class="score-cell">21.80%</td>
-              <td class="score-cell">43.10%</td>
-              <td class="score-cell">18.40%</td>
-              <td class="score-cell">18.70%</td>
-              <td class="score-cell">32.03%</td>
-              <td class="score-cell">46.79%</td>
-              <td class="score-cell">16.70%</td>
-              <td class="score-cell">28.22%</td>
-            </tr>
-            <tr>
-              <td class="method-name">+ CoT</td>
-              <td class="score-cell">24.40%</td>
-              <td class="score-cell">48.90%</td>
-              <td class="score-cell">24.20%</td>
-              <td class="score-cell">14.76%</td>
-              <td class="score-cell">28.12%</td>
-              <td class="score-cell">51.23%</td>
-              <td class="score-cell">1.40%</td>
-              <td class="score-cell">27.57%</td>
-            </tr>
-            <tr>
-              <td class="method-name">+ SFT</td>
-              <td class="score-cell">27.00%</td>
-              <td class="score-cell">36.20%</td>
-              <td class="score-cell">27.30%</td>
-              <td class="score-cell">20.08%</td>
-              <td class="score-cell">36.72%</td>
-              <td class="score-cell">48.66%</td>
-              <td class="score-cell">17.30%</td>
-              <td class="score-cell">30.47%</td>
-            </tr>
-            <tr class="nover-row">
-              <td class="method-name"><strong>+ NOVER</strong></td>
-              <td class="score-cell best-score">28.60%</td>
-              <td class="score-cell best-score">60.30%</td>
-              <td class="score-cell best-score">28.10%</td>
-              <td class="score-cell best-score">41.64%</td>
-              <td class="score-cell best-score">38.28%</td>
-              <td class="score-cell best-score">57.88%</td>
-              <td class="score-cell best-score">20.70%</td>
-              <td class="score-cell best-score">39.36%<span class="improvement-badge">+31.4%</span></td>
-            </tr>
-            <tr class="model-group-header">
-              <td colspan="9"><strong>Qwen2.5-7B</strong></td>
-            </tr>
-            <tr>
-              <td class="method-name">Base Model</td>
-              <td class="score-cell">31.80%</td>
-              <td class="score-cell">48.50%</td>
-              <td class="score-cell">20.70%</td>
-              <td class="score-cell">24.21%</td>
-              <td class="score-cell">28.91%</td>
-              <td class="score-cell">44.22%</td>
-              <td class="score-cell">19.30%</td>
-              <td class="score-cell">31.09%</td>
-            </tr>
-            <tr>
-              <td class="method-name">+ CoT</td>
-              <td class="score-cell">31.20%</td>
-              <td class="score-cell">57.60%</td>
-              <td class="score-cell">29.20%</td>
-              <td class="score-cell">33.46%</td>
-              <td class="score-cell">38.28%</td>
-              <td class="score-cell">50.99%</td>
-              <td class="score-cell">1.60%</td>
-              <td class="score-cell">34.62%</td>
-            </tr>
-            <tr>
-              <td class="method-name">+ SFT</td>
-              <td class="score-cell">27.50%</td>
-              <td class="score-cell">45.20%</td>
-              <td class="score-cell">33.50%</td>
-              <td class="score-cell">37.85%</td>
-              <td class="score-cell">47.66%</td>
-              <td class="score-cell">57.06%</td>
-              <td class="score-cell">23.30%</td>
-              <td class="score-cell">38.87%</td>
-            </tr>
-            <tr class="nover-row">
-              <td class="method-name"><strong>+ NOVER</strong></td>
-              <td class="score-cell best-score">38.20%</td>
-              <td class="score-cell best-score">61.80%</td>
-              <td class="score-cell best-score">36.60%</td>
-              <td class="score-cell best-score">50.79%</td>
-              <td class="score-cell best-score">49.22%</td>
-              <td class="score-cell best-score">67.79%</td>
-              <td class="score-cell best-score">26.80%</td>
-              <td class="score-cell best-score">47.31%<span class="improvement-badge">+52.2%</span></td>
-            </tr>
-            <tr>
-              <td class="method-name">Qwen2.5-3B-Instruct</td>
-              <td class="score-cell">27.10%</td>
-              <td class="score-cell">50.00%</td>
-              <td class="score-cell">31.50%</td>
-              <td class="score-cell">21.25%</td>
-              <td class="score-cell">40.62%</td>
-              <td class="score-cell">58.69%</td>
-              <td class="score-cell">19.90%</td>
-              <td class="score-cell">35.58%</td>
-            </tr>
-            <tr>
-              <td class="method-name">Qwen2.5-7B-Instruct</td>
-              <td class="score-cell">29.90%</td>
-              <td class="score-cell">56.20%</td>
-              <td class="score-cell">35.60%</td>
-              <td class="score-cell">67.72%</td>
-              <td class="score-cell">46.88%</td>
-              <td class="score-cell">65.23%</td>
-              <td class="score-cell">23.50%</td>
-              <td class="score-cell">46.43%</td>
-            </tr>
-            <tr>
-              <td class="method-name">R1-Distill-Qwen-7B</td>
-              <td class="score-cell">41.00%</td>
-              <td class="score-cell">60.20%</td>
-              <td class="score-cell">38.00%</td>
-              <td class="score-cell">40.16%</td>
-              <td class="score-cell">35.16%</td>
-              <td class="score-cell">54.61%</td>
-              <td class="score-cell">8.20%</td>
-              <td class="score-cell">39.62%</td>
-            </tr>
-          </tbody>
-        </table>
-      </div>
-      <div class="table-caption">
-        <strong>Table 1:</strong> Overall performance across diverse text-to-text tasks. NOVER achieves significant improvements over base models and competitive methods.
-        <strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN,
-        <strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION.
-      </div>
-    </div>
   </div>
 </section>
 <section class="section">
-  <div class="container is-max-desktop">
-    <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Key Technical Insights</h2>
-    <div class="figure-container">
-      <div class="figure-placeholder analysis">
-        <div style="display: flex; justify-content: space-around; margin-bottom: 1rem;">
-          <div style="text-align: center;">
-            <div style="width: 50px; height: 50px; border-radius: 50%; background: #10b981; display: flex; align-items: center; justify-content: center; margin: 0 auto 0.5rem;">
-              <i class="fas fa-brain" style="color: white; font-size: 1.2rem;"></i>
-            </div>
-            <div style="font-size: 0.8rem;">Reasoning Proxy</div>
           </div>
-          <div style="text-align: center;">
-            <div style="width: 50px; height: 50px; border-radius: 50%; background: #3b82f6; display: flex; align-items: center; justify-content: center; margin: 0 auto 0.5rem;">
-              <i class="fas fa-sync-alt" style="color: white; font-size: 1.2rem;"></i>
-            </div>
-            <div style="font-size: 0.8rem;">Policy Sync</div>
-          </div>
-          <div style="text-align: center;">
-            <div style="width: 50px; height: 50px; border-radius: 50%; background: #f59e0b; display: flex; align-items: center; justify-content: center; margin: 0 auto 0.5rem;">
-              <i class="fas fa-shield-alt" style="color: white; font-size: 1.2rem;"></i>
-            </div>
-            <div style="font-size: 0.8rem;">Stability</div>
           </div>
         </div>
-        <div>Core Components: Reasoning Perplexity, Synchronization & Stability</div>
-        <div style="font-size: 0.85rem; margin-top: 0.5rem; opacity: 0.8;">
-          How NOVER prevents reward hacking while enabling reasoning across diverse tasks
-        </div>
-      </div>
-      <p class="has-text-grey-dark">
-        <strong>Technical Innovation:</strong> NOVER combines reasoning perplexity as reward proxy with policy-proxy
-        synchronization to prevent reward hacking, enabling stable training across any text-to-text task.
-      </p>
-    </div>
-    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; margin-top: 3rem;">
-      <div class="table-section">
-        <div class="table-title" style="font-size: 1.2rem;">FANToM: False Premise Tasks</div>
-        <div class="results-table">
-          <table>
-            <thead>
-              <tr>
-                <th>Method</th>
-                <th>3B Model</th>
-                <th>7B Model</th>
-              </tr>
-            </thead>
-            <tbody>
-              <tr>
-                <td class="method-name">Base</td>
-                <td class="score-cell">12.43%</td>
-                <td class="score-cell">14.59%</td>
-              </tr>
-              <tr>
-                <td class="method-name">+ CoT</td>
-                <td class="score-cell">14.23%</td>
-                <td class="score-cell">19.28%</td>
-              </tr>
-              <tr>
-                <td class="method-name">+ SFT</td>
-                <td class="score-cell">26.49%</td>
-                <td class="score-cell">29.73%</td>
-              </tr>
-              <tr class="nover-row">
-                <td class="method-name"><strong>+ NOVER</strong></td>
-                <td class="score-cell best-score">18.74%</td>
-                <td class="score-cell best-score">23.42%</td>
-              </tr>
-            </tbody>
-          </table>
-        </div>
-        <div class="table-caption" style="font-size: 0.8rem;">
-          <strong>Table 2:</strong> Theory of mind tasks with false premise problems. NOVER shows balanced performance.
-        </div>
       </div>
-      <div class="table-section">
-        <div class="table-title" style="font-size: 1.2rem;">Verifier Comparison</div>
-        <div class="results-table">
-          <table>
-            <thead>
-              <tr>
-                <th>Group</th>
-                <th>Method</th>
-                <th>3B</th>
-                <th>7B</th>
-              </tr>
-            </thead>
-            <tbody>
-              <tr class="model-group-header">
-                <td rowspan="3">Baselines</td>
-                <td class="method-name">Base</td>
-                <td class="score-cell">18.40%</td>
-                <td class="score-cell">20.70%</td>
-              </tr>
-              <tr>
-                <td class="method-name">+ CoT</td>
-                <td class="score-cell">24.20%</td>
-                <td class="score-cell">29.20%</td>
-              </tr>
-              <tr>
-                <td class="method-name">+ SFT</td>
-                <td class="score-cell">27.30%</td>
-                <td class="score-cell">33.50%</td>
-              </tr>
-              <tr class="model-group-header">
-                <td rowspan="3">Model as<br>Verifier</td>
-                <td class="method-name">+ GV</td>
-                <td class="score-cell">18.30%</td>
-                <td class="score-cell">30.00%</td>
-              </tr>
-              <tr>
-                <td class="method-name">+ LJ</td>
-                <td class="score-cell">21.40%</td>
-                <td class="score-cell">3.80%</td>
-              </tr>
-              <tr>
-                <td class="method-name">+ LJ_S</td>
-                <td class="score-cell">--</td>
-                <td class="score-cell">21.60%</td>
-              </tr>
-              <tr class="nover-row">
-                <td>Verifier-Free</td>
-                <td class="method-name"><strong>+ NOVER</strong></td>
-                <td class="score-cell best-score">28.10%</td>
-                <td class="score-cell best-score">36.60%</td>
-              </tr>
-            </tbody>
-          </table>
-        </div>
-        <div class="table-caption" style="font-size: 0.8rem;">
-          <strong>Table 3:</strong> WebInstruct experiments comparing LLM-as-a-Judge (LJ) and general verifier (GV) approaches.
         </div>
       </div>
     </div>
   </div>
 </section>
 <section class="section">
-  <div class="container is-max-desktop">
     <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2>
     <div class="glass-card">
       <div style="text-align: center;">
-        <div class="figure-placeholder analysis" style="margin-bottom: 2rem;">
           <div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;">
             <div style="text-align: center;">
-              <i class="fas fa-fish" style="font-size: 2rem; margin-bottom: 0.5rem;"></i>
-              <div style="font-size: 0.8rem;">Standard Training</div>
             </div>
             <div style="font-size: 1.5rem;">→</div>
             <div style="text-align: center;">
-              <i class="fas fa-graduation-cap" style="font-size: 2rem; margin-bottom: 0.5rem;"></i>
-              <div style="font-size: 0.8rem;">Process Reward</div>
             </div>
           </div>
-          <div>Teaching Models "How to Fish" Rather Than Giving Them Fish</div>
-        </div>
-        <p style="color: #6b7280; line-height: 1.6;">
-          <strong>Novel Paradigm:</strong> Inverse incentive training rewards the reasoning process itself,
-          leading to enhanced creativity and thoughtfulness in open-ended tasks.
-        </p>
       </div>
     </div>
   </div>
@@ -940,7 +1490,7 @@
 <section class="section" id="BibTeX">
-  <div class="container is-max-desktop">
     <div class="glass-card">
       <h2 class="title is-3">Citation</h2>
       <pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover,
@@ -957,14 +1507,7 @@
   <div class="container has-text-centered">
     <div class="content">
       <div style="margin-bottom: 2rem;">
-        <a class="icon-link" target="_blank" href="https://arxiv.org/pdf/2505.16022.pdf"
-           style="margin: 0 1rem; font-size: 2rem; color: #1a1a1a;">
-          <i class="fas fa-file-pdf"></i>
-        </a>
-        <a class="icon-link" href="https://github.com/thinkwee/NOVER" target="_blank"
-           style="margin: 0 1rem; font-size: 2rem; color: #10b981;">
-          <i class="fab fa-github"></i>
-        </a>
       </div>
       <p style="color: #6b7280;">
         Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a>
@@ -974,4 +1517,4 @@
 </footer>
 </body>
-</html>

       z-index: -1;
     }
+    /* Hero Section - Brain/Neural Network Pattern */
     .hero {
+      background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
       border-radius: 12px;
       margin: 2rem;
       box-shadow: 0 4px 24px rgba(0,0,0,0.06);
       border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
     }
+    .hero::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          45deg,
+          transparent,
+          transparent 40px,
+          rgba(16, 185, 129, 0.04) 40px,
+          rgba(16, 185, 129, 0.04) 80px
+        ),
+        repeating-linear-gradient(
+          -45deg,
+          transparent,
+          transparent 40px,
+          rgba(59, 130, 246, 0.03) 40px,
+          rgba(59, 130, 246, 0.03) 80px
+        );
+      background-size: 80px 80px, 80px 80px;
+      background-position: 0 0, 40px 40px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Abstract Section - Circuit/Technology Pattern */
+    .section:nth-of-type(1) {
+      background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
+      margin: 2rem;
+      border-radius: 12px;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.04);
+      border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
+    }
+    .section:nth-of-type(1)::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          90deg,
+          transparent,
+          transparent 30px,
+          rgba(59, 130, 246, 0.06) 30px,
+          rgba(59, 130, 246, 0.06) 60px
+        ),
+        repeating-linear-gradient(
+          0deg,
+          transparent,
+          transparent 30px,
+          rgba(59, 130, 246, 0.06) 30px,
+          rgba(59, 130, 246, 0.06) 60px
+        );
+      background-size: 60px 60px, 60px 60px;
+      background-position: 0 0, 30px 30px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Incentivize Reasoning Section - Puzzle/Logic Pattern */
+    .section:nth-of-type(2) {
+      background: linear-gradient(135deg, #ffffff 0%, #f0fdf4 100%);
+      margin: 2rem;
+      border-radius: 12px;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.04);
+      border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
+    }
+    .section:nth-of-type(2)::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          45deg,
+          transparent,
+          transparent 50px,
+          rgba(16, 185, 129, 0.07) 50px,
+          rgba(16, 185, 129, 0.07) 100px
+        ),
+        repeating-linear-gradient(
+          -45deg,
+          transparent,
+          transparent 50px,
+          rgba(16, 185, 129, 0.05) 50px,
+          rgba(16, 185, 129, 0.05) 100px
+        );
+      background-size: 100px 100px, 100px 100px;
+      background-position: 0 0, 50px 50px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* NOVER Methodology Section - Mathematical/Formula Pattern */
+    .section:nth-of-type(3) {
+      background: linear-gradient(135deg, #ffffff 0%, #fefce8 100%);
+      margin: 2rem;
+      border-radius: 12px;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.04);
+      border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
+    }
+    .section:nth-of-type(3)::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          30deg,
+          transparent,
+          transparent 40px,
+          rgba(245, 158, 11, 0.06) 40px,
+          rgba(245, 158, 11, 0.06) 80px
+        ),
+        repeating-linear-gradient(
+          -30deg,
+          transparent,
+          transparent 40px,
+          rgba(245, 158, 11, 0.05) 40px,
+          rgba(245, 158, 11, 0.05) 80px
+        );
+      background-size: 80px 80px, 80px 80px;
+      background-position: 0 0, 40px 40px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Experimental Results Section - Data/Chart Pattern */
+    .section:nth-of-type(4) {
+      background: linear-gradient(135deg, #ffffff 0%, #fef2f2 100%);
+      margin: 2rem;
+      border-radius: 12px;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.04);
+      border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
+    }
+    .section:nth-of-type(4)::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          0deg,
+          transparent,
+          transparent 35px,
+          rgba(239, 68, 68, 0.06) 35px,
+          rgba(239, 68, 68, 0.06) 70px
+        ),
+        repeating-linear-gradient(
+          90deg,
+          transparent,
+          transparent 35px,
+          rgba(239, 68, 68, 0.05) 35px,
+          rgba(239, 68, 68, 0.05) 70px
+        );
+      background-size: 70px 70px, 70px 70px;
+      background-position: 0 0, 35px 35px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Inverse Incentive Training Section - Fish/Teaching Pattern */
+    .section:nth-of-type(5) {
+      background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
       margin: 2rem;
       border-radius: 12px;
       box-shadow: 0 2px 8px rgba(0,0,0,0.04);
       border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
+    }
+    .section:nth-of-type(5)::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          60deg,
+          transparent,
+          transparent 60px,
+          rgba(14, 165, 233, 0.07) 60px,
+          rgba(14, 165, 233, 0.07) 120px
+        ),
+        repeating-linear-gradient(
+          -60deg,
+          transparent,
+          transparent 60px,
+          rgba(14, 165, 233, 0.05) 60px,
+          rgba(14, 165, 233, 0.05) 120px
+        );
+      background-size: 120px 120px, 120px 120px;
+      background-position: 0 0, 60px 60px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Citation Section - Book/Paper Pattern */
+    .section:nth-of-type(6) {
+      background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
+      margin: 2rem;
+      border-radius: 12px;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.04);
+      border: 1px solid #e5e7eb;
+      position: relative;
+      overflow: hidden;
+    }
+    .section:nth-of-type(6)::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          25deg,
+          transparent,
+          transparent 45px,
+          rgba(107, 114, 128, 0.06) 45px,
+          rgba(107, 114, 128, 0.06) 90px
+        ),
+        repeating-linear-gradient(
+          -25deg,
+          transparent,
+          transparent 45px,
+          rgba(107, 114, 128, 0.05) 45px,
+          rgba(107, 114, 128, 0.05) 90px
+        );
+      background-size: 90px 90px, 90px 90px;
+      background-position: 0 0, 45px 45px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Footer Section - Social/Network Pattern */
+    footer.section {
+      background: linear-gradient(135deg, #ffffff 0%, #f9fafb 100%);
+      border-top: 1px solid #e5e7eb;
+      margin-top: 4rem;
+      position: relative;
+      overflow: hidden;
+    }
+    footer.section::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: 0;
+      width: 100%;
+      height: 100%;
+      background-image:
+        repeating-linear-gradient(
+          45deg,
+          transparent,
+          transparent 80px,
+          rgba(16, 185, 129, 0.06) 80px,
+          rgba(16, 185, 129, 0.06) 160px
+        ),
+        repeating-linear-gradient(
+          -45deg,
+          transparent,
+          transparent 80px,
+          rgba(59, 130, 246, 0.05) 80px,
+          rgba(59, 130, 246, 0.05) 160px
+        );
+      background-size: 160px 160px, 160px 160px;
+      background-position: 0 0, 80px 80px;
+      opacity: 0.3;
+      pointer-events: none;
+      z-index: 0;
+    }
+    /* Ensure content is above patterns */
+    .hero-body,
+    .section .container,
+    footer .container {
+      position: relative;
+      z-index: 1;
     }
     .publication-title {
       border: 1px solid #e5e7eb;
       margin: 1rem 0;
       transition: all 0.2s ease;
+      position: relative;
+      z-index: 1;
     }
     .glass-card:hover {
       border: 1px solid #e5e7eb;
       margin: 2rem 0;
       text-align: center;
+      position: relative;
+      z-index: 1;
     }
     .figure-placeholder {
       text-align: left;
       transition: all 0.2s ease;
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+      position: relative;
+      z-index: 1;
     }
     .insight-card:hover {
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
       border: 1px solid #e5e7eb;
       margin: 2rem 0;
+      position: relative;
+      z-index: 1;
     }
     .comparison-table table {
       font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
       box-shadow: 0 4px 12px rgba(0,0,0,0.15);
       margin: 2rem 0;
+      position: relative;
+      z-index: 1;
     }
     .metrics-header {
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
       border: 1px solid #e5e7eb;
       transition: all 0.2s ease;
+      position: relative;
+      z-index: 1;
     }
     .method-card:hover {
       border-radius: 12px;
       margin: 2rem 0;
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+      position: relative;
+      z-index: 1;
     }
     .stats-grid {
       text-align: center;
       box-shadow: 0 1px 3px rgba(0,0,0,0.1);
       transition: all 0.2s ease;
+      position: relative;
+      z-index: 1;
     }
     .stat-item:hover {
       overflow: hidden;
       box-shadow: 0 2px 12px rgba(0,0,0,0.08);
       border: 1px solid #e5e7eb;
+      margin: 0;
       width: 100%;
+      position: relative;
+      z-index: 1;
     }
     .results-table table {
       width: 100%;
       border-collapse: collapse;
+      font-size: 0.8rem;
     }
     .results-table th {
       background: #f8fafc;
+      color: #374155;
       padding: 1rem 0.8rem;
       font-weight: 600;
       border-bottom: 2px solid #e5e7eb;
+      text-align: center !important;
       position: sticky;
       top: 0;
       z-index: 10;
     .results-table td {
       padding: 0.8rem;
       border-bottom: 1px solid #f3f4f6;
+      text-align: center !important;
     }
+    /* 微调表格高度 - 为Table 1增加行高 */
+    .results-table.table-1 td {
+      padding: 0.8rem;
+      line-height: 0.9;
+    }
+    .results-table.table-1 th {
+      padding: 1.2rem 0.8rem;
+    }
+    /* 微调表格高度 - 为Table 2减少行高 */
+    .results-table.table-2 td {
+      padding: 0.8rem 0.8rem;
+      line-height: 1.3;
+    }
+    .results-table.table-2 th {
+      padding: 0.8rem 0.8rem;
+    }
+    /* 调整模型组标题的行高 */
+    .results-table.table-2 .model-group-header td {
+      padding: 0.8rem 0.6rem;
+      line-height: 1.3;
+    }
+    /* 进一步微调表格间距 */
+    .results-table.table-1 tbody tr {
+      height: 48px;
+    }
+    .results-table.table-2 tbody tr {
+      height: 42px;
+    }
+    /* 调整表格标题间距 */
+    .table-1 + .table-caption {
+      margin-top: 1.5rem;
+    }
+    .table-2 + .table-caption {
+      margin-top: 1rem;
     }
     .results-table .method-name {
+      text-align: center !important;
       font-weight: 600;
       color: #1f2937;
     }
+    /* 控制Table 2列宽度的CSS */
+    .results-table .model-type-column {
+      width: 100px;
+      min-width: 100px;
+      max-width: 100px;
+    }
+    .results-table .model-name-column {
+      width: 100px;
+      min-width: 100px;
+      max-width: 100px;
+    }
+    .results-table .method-column {
+      width: 100px;
+      min-width: 100px;
+      max-width: 100px;
+    }
+    .results-table .metric-column {
+      width: 80px;
+      min-width: 80px;
+      max-width: 80px;
+    }
     .results-table .nover-row {
       background: #f0fdf4;
       border-left: 3px solid #10b981;
     }
     .results-table .best-score {
       color: #10b981;
       position: relative;
     }
     .table-section {
+      margin: 0;
     }
     .table-title {
       background: #f1f5f9 !important;
       color: #334155;
       font-weight: 700;
+      text-align: center !important;
     }
+    .model-group-header td {
+      text-align: center !important;
     }
+    .score-cell {
+      position: relative;
     }
     @media (max-width: 768px) {
         display: block !important;
       }
+    }
+    .formula-container {
+      background: #f8fafc;
+      border: 1px solid #e2e8f0;
+      border-radius: 12px;
+      padding: 2rem;
+      margin: 2rem 0;
+      text-align: center;
+      position: relative;
+      z-index: 1;
+    }
+    .formula-container::before {
+      content: '🧮';
+      position: absolute;
+      top: 1rem;
+      left: 1rem;
+      font-size: 1.2rem;
+    }
+    .formula-title {
+      font-size: 1.1rem;
+      font-weight: 600;
+      color: #334155;
+      margin-bottom: 1rem;
+    }
+    .formula-description {
+      font-size: 0.9rem;
+      color: #64748b;
+      margin-top: 1rem;
+      line-height: 1.5;
+    }
+    .diagram-grid {
+      display: grid;
+      grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+      gap: 2rem;
+      margin: 3rem 0;
+    }
+    .diagram-card {
+      background: white;
+      border-radius: 12px;
+      padding: 2rem;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.06);
+      border: 1px solid #e5e7eb;
+      text-align: center;
+      transition: all 0.3s ease;
+      position: relative;
+      z-index: 1;
+    }
+    .diagram-card:hover {
+      transform: translateY(-2px);
+      box-shadow: 0 8px 24px rgba(0,0,0,0.12);
+    }
+    .diagram-placeholder {
+      height: 200px;
+      background: #f1f5f9;
+      border: 2px dashed #94a3b8;
+      border-radius: 8px;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      margin-bottom: 1rem;
+      transition: all 0.3s ease;
+    }
+    .diagram-placeholder:hover {
+      background: #e2e8f0;
+      border-color: #64748b;
+    }
+    .diagram-icon {
+      font-size: 3rem;
+      color: #64748b;
+      margin-bottom: 0.5rem;
+    }
+    .diagram-label {
+      font-size: 0.9rem;
+      color: #475569;
+      font-weight: 500;
+    }
+    .diagram-description {
+      font-size: 0.85rem;
+      color: #64748b;
+      line-height: 1.4;
+    }
+    /* Apple Design Button Styles */
+    .external-link.button {
+      position: relative;
+      overflow: hidden;
+    }
+    .external-link.button::before {
+      content: '';
+      position: absolute;
+      top: 0;
+      left: -100%;
+      width: 100%;
+      height: 100%;
+      background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+      transition: left 0.5s;
+    }
+    .external-link.button:hover {
+      transform: translateY(-2px);
+      box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15), 0 4px 8px rgba(0, 0, 0, 0.1);
+    }
+    .external-link.button:hover::before {
+      left: 100%;
+    }
+    .external-link.button:active {
+      transform: translateY(0);
+      transition: transform 0.1s;
     }
   </style>
   <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+  <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+  <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+  <script>
+    window.MathJax = {
+      tex: {
+        inlineMath: [['$', '$'], ['\\(', '\\)']],
+        displayMath: [['$$', '$$'], ['\\[', '\\]']]
+      }
+    };
+  </script>
 </head>
 <body>
   <div class="geometric-bg"></div>
 <section class="hero">
   <div class="hero-body">
+    <div class="container is-widescreen">
       <div class="columns is-centered">
         <div class="column has-text-centered">
           <h1 class="title is-1 publication-title">NOVER</h1>
           <p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p>
           <div class="is-size-5 publication-authors" style="margin: 2rem 0;">
           <div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;">
             <a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank"
+               class="external-link button is-normal" style="background: linear-gradient(135deg, #B31B1B 0%, #D32F2F 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(179, 27, 27, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
               <span class="icon"><i class="fas fa-file-pdf"></i></span>
               <span>Paper</span>
             </a>
             <a href="https://github.com/thinkwee/NOVER" target="_blank"
+               class="external-link button is-normal" style="background: linear-gradient(135deg, #24292e 0%, #2f363d 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(36, 41, 46, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
               <span class="icon"><i class="fab fa-github"></i></span>
               <span>Code</span>
             </a>
             <a href="#" target="_blank"
+               class="external-link button is-normal" style="background: linear-gradient(135deg, #FFD43B 0%, #FFE066 100%); color: #000; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(255, 212, 59, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
               <span class="icon"><i class="fas fa-database"></i></span>
               <span>Dataset</span>
             </a>
             <a href="#" target="_blank"
+               class="external-link button is-normal" style="background: linear-gradient(135deg, #0EA5E9 0%, #38BDF8 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(14, 165, 233, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
               <span class="icon"><i class="fas fa-cube"></i></span>
               <span>Model</span>
             </a>
 </section>
 <section class="section">
+  <div class="container is-widescreen">
+    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; align-items: start;">
+      <!-- Abstract on the left -->
+      <div class="abstract-card">
+        <h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">TL;DR</h2>
+        <p class="is-size-5" style="color: #374151; line-height: 1.6;">
+          <span class="nover">NOVER</span> (NO-Verifier Reinforcement Learning) enables
+          incentive training on any text-to-text task without external verifiers. It utilizes policy model's reasoning perplexity to estimate the reward.
+          <br>
+          <br>
+          <strong>• Your LLM is secretly a verifier.
+          <br>
+          • Your LLM only reason on <s>Easy-to-Verify</s> tasks.
+          <br>
+          • Your LLM can <s>reason</s> on ANY tasks.
+          <br>
+          • Your LLM can be incentivized to do more than reasoning.</strong>
+          <br>
+        </p>
+      </div>
+      <!-- Overall framework image placeholder on the right -->
+      <div class="figure-container">
+        <div class="figure-placeholder" style="height: 280px; display: flex; flex-direction: column; justify-content: center; align-items: center;">
+          <img src="logo.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
+        </div>
+        <div style="font-size: 1.1rem; color: #2e3036; text-align: center; margin-top: 0.5rem;">
+          <div><span class="nover">NOVER</span> extends RLVR on any text-to-text task</div>
+          <div>beyond easy-to-verify math/coding problems.</div>
+        </div>
+      </div>
+    </div>
+  </div>
+</section>
+<section class="section">
+  <div class="container is-widescreen">
+    <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Incentivize Reasoning on Any Task</h2>
+    <p class="is-size-5" style="color: #6b7280; margin-bottom: 3rem; max-width: 800px; margin-left: auto; margin-right: auto;">
+      NOVER enables training large reasoning models on any text data and any task.<br>
+      NO verifiers/models/rules needed, just ground truth answer, and policy model itself.<br>
+      <strong>General Reasoning:</strong> ⚛️ physics • ⚖️ law • 🏥 medical • 💰 finance<br>
+      <strong>Creative Tasks:</strong> 🎨 creative writing<br>
+      <strong>Social Intelligence:</strong> 🧠 theory of mind • 😊 emotion detection • 🤝 social reasoning<br>
+      <strong>Nautral Language Generation:</strong> 🌍 translation • 📚 summarization
+    </p>
+    <div class="figure-container">
+        <img src="example.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
     </div>
   </div>
 </section>
 <section class="section">
+  <div class="container is-widescreen">
+    <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">NOVER Methodology</h2>
+    <!-- Image Placeholders Row -->
+    <div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
+      <div style="width: 600px; height: 420px;">
+        <img src="paradigm.png" alt="paradigm" style="width: 100%; height: 100%; object-fit: contain;">
+      </div>
+      <div style="width: 600px; height: 420px;">
+        <img src="overall.png" alt="overall" style="width: 100%; height: 100%; object-fit: contain;">
+      </div>
+    </div>
+    <!-- Core Framework Comparison -->
     <div class="method-comparison">
       <div class="method-card">
+        <div class="method-icon"><i class="fas fa-graduation-cap"></i></div>
+        <h3 class="title is-5">SFT</h3>
+        <p>Memorize Input-Output Patterns</p>
       </div>
       <div class="method-card">
         <div class="method-icon"><i class="fas fa-robot"></i></div>
         <h3 class="title is-5">RLHF</h3>
+        <p>Train Reward Model <br>Give Preference Feedback</p>
+      </div>
+      <div class="method-card">
+        <div class="method-icon"><i class="fas fa-balance-scale"></i></div>
+        <h3 class="title is-5">RLVR</h3>
+        <p>Rule-based Reward <br>End2End Outcome RL</p>
       </div>
       <div class="method-card nover">
         <div class="method-icon"><i class="fas fa-brain"></i></div>
         <h3 class="title is-5"><span class="nover">NOVER</span></h3>
+        <p>Reasoning Perplexity as Reward<br>Reason on Any Task</p>
       </div>
     </div>
+    <!-- Consolidated Mathematical Formulations -->
+    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 2rem; margin: 3rem 0;">
+      <!-- Reasoning Perplexity -->
+      <div class="formula-container">
+        <div class="formula-title">Reasoning Perplexity</div>
+        <div style="font-size: 0.9rem; margin: 1rem 0;">
+          $P_r(p, t, g) = \exp\left(-\frac{\sum_{i=1}^{|g|} \log \pi_{p}(g_i \mid p, t, g_{&lt;i})}{|g| \cdot N(|t|)}\right)$
         </div>
+        <div class="formula-description">
+          Use perplexity of policy model on ground truth conditioned on reasoning trajectory as reward proxy
         </div>
       </div>
+      <!-- Rewards -->
+      <div class="formula-container">
+        <div class="formula-title">Rewards</div>
+        <div style="font-size: 1.1rem; margin: 1rem 0;">
+          $$R_{\mathrm{total}} = w_{\mathrm{f}} R_{\mathrm{f}} + \mathbb{I}(R_{\mathrm{f}} = 1) \cdot (w_{\mathrm{r}} R_{\mathrm{r}} + w_{\mathrm{e}} R_{\mathrm{e}})$$
+        </div>
+        <div class="formula-description">
+          Combined reward function incorporating reasoning, efficiency, and format components
+        </div>
       </div>
+      <!-- Policy-Proxy Synchronization -->
+      <div class="formula-container">
+        <div class="formula-title">Policy-Proxy Synchronization</div>
+        <div style="font-size: 1.1rem; margin: 1rem 0;">
+          $$\pi_{\mathrm{p}} \leftarrow \alpha \cdot \pi_{\mathrm{p}} + (1-\alpha) \cdot \pi_{\theta}$$
+        </div>
+        <div class="formula-description">
+          Smooth synchronization between policy and proxy ensures stable training with limited resource
+        </div>
       </div>
     </div>
   </div>
 </section>
 <section class="section">
+  <div class="container is-widescreen">
+    <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Experimental Results</h2>
+    <!-- Table 1 and Table 2 in two columns -->
+    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; margin-top: 3rem; align-items: start;">
+      <!-- Table 1 on the left -->
+      <div>
+        <h3 class="table-title">Overall on NOVEReason Dataset</h3>
+        <!-- Main Results Table -->
+        <div class="table-section">
+          <div class="results-table table-1">
+            <table>
+              <thead>
+                <tr>
+                  <th>Method</th>
+                  <th>NR</th>
+                  <th>GT</th>
+                  <th>WI</th>
+                  <th>SGN</th>
+                  <th>EB</th>
+                  <th>TB</th>
+                  <th>OPUS</th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr class="model-group-header">
+                  <td colspan="8"><strong>Qwen2.5-3B</strong></td>
+                </tr>
+                <tr>
+                  <td class="method-name">Base</td>
+                  <td class="score-cell">21.80%</td>
+                  <td class="score-cell">43.10%</td>
+                  <td class="score-cell">18.40%</td>
+                  <td class="score-cell">18.70%</td>
+                  <td class="score-cell">32.03%</td>
+                  <td class="score-cell">46.79%</td>
+                  <td class="score-cell">16.70%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ CoT</td>
+                  <td class="score-cell">24.40%</td>
+                  <td class="score-cell">48.90%</td>
+                  <td class="score-cell">24.20%</td>
+                  <td class="score-cell">14.76%</td>
+                  <td class="score-cell">28.12%</td>
+                  <td class="score-cell">51.23%</td>
+                  <td class="score-cell">1.40%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ SFT</td>
+                  <td class="score-cell">27.00%</td>
+                  <td class="score-cell">36.20%</td>
+                  <td class="score-cell">27.30%</td>
+                  <td class="score-cell">20.08%</td>
+                  <td class="score-cell">36.72%</td>
+                  <td class="score-cell">48.66%</td>
+                  <td class="score-cell">17.30%</td>
+                </tr>
+                <tr class="nover-row">
+                  <td class="method-name"><strong>+ NOVER</strong></td>
+                  <td class="score-cell best-score">28.60%</td>
+                  <td class="score-cell best-score">60.30%</td>
+                  <td class="score-cell best-score">28.10%</td>
+                  <td class="score-cell best-score">41.64%</td>
+                  <td class="score-cell best-score">38.28%</td>
+                  <td class="score-cell best-score">57.88%</td>
+                  <td class="score-cell best-score">20.70%</td>
+                </tr>
+                <tr class="model-group-header">
+                  <td colspan="8"><strong>Qwen2.5-7B</strong></td>
+                </tr>
+                <tr>
+                  <td class="method-name">Base</td>
+                  <td class="score-cell">31.80%</td>
+                  <td class="score-cell">48.50%</td>
+                  <td class="score-cell">20.70%</td>
+                  <td class="score-cell">24.21%</td>
+                  <td class="score-cell">28.91%</td>
+                  <td class="score-cell">44.22%</td>
+                  <td class="score-cell">19.30%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ CoT</td>
+                  <td class="score-cell">31.20%</td>
+                  <td class="score-cell">57.60%</td>
+                  <td class="score-cell">29.20%</td>
+                  <td class="score-cell">33.46%</td>
+                  <td class="score-cell">38.28%</td>
+                  <td class="score-cell">50.99%</td>
+                  <td class="score-cell">1.60%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ SFT</td>
+                  <td class="score-cell">27.50%</td>
+                  <td class="score-cell">45.20%</td>
+                  <td class="score-cell">33.50%</td>
+                  <td class="score-cell">37.85%</td>
+                  <td class="score-cell">47.66%</td>
+                  <td class="score-cell">57.06%</td>
+                  <td class="score-cell">23.30%</td>
+                </tr>
+                <tr class="nover-row">
+                  <td class="method-name"><strong>+ NOVER</strong></td>
+                  <td class="score-cell best-score">38.20%</td>
+                  <td class="score-cell best-score">61.80%</td>
+                  <td class="score-cell best-score">36.60%</td>
+                  <td class="score-cell best-score">50.79%</td>
+                  <td class="score-cell best-score">49.22%</td>
+                  <td class="score-cell best-score">67.79%</td>
+                  <td class="score-cell best-score">26.80%</td>
+                </tr>
+                <tr class="model-group-header">
+                  <td colspan="8"><strong>Other Baselines</strong></td>
+                </tr>
+                <tr>
+                  <td class="method-name">Qwen2.5-3B-Instruct</td>
+                  <td class="score-cell">27.10%</td>
+                  <td class="score-cell">50.00%</td>
+                  <td class="score-cell">31.50%</td>
+                  <td class="score-cell">21.25%</td>
+                  <td class="score-cell">40.62%</td>
+                  <td class="score-cell">58.69%</td>
+                  <td class="score-cell">19.90%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">Qwen2.5-7B-Instruct</td>
+                  <td class="score-cell">29.90%</td>
+                  <td class="score-cell">56.20%</td>
+                  <td class="score-cell">35.60%</td>
+                  <td class="score-cell">67.72%</td>
+                  <td class="score-cell">46.88%</td>
+                  <td class="score-cell">65.23%</td>
+                  <td class="score-cell">23.50%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">R1-Distill-Qwen-7B</td>
+                  <td class="score-cell">41.00%</td>
+                  <td class="score-cell">60.20%</td>
+                  <td class="score-cell">38.00%</td>
+                  <td class="score-cell">40.16%</td>
+                  <td class="score-cell">35.16%</td>
+                  <td class="score-cell">54.61%</td>
+                  <td class="score-cell">8.20%</td>
+                </tr>
+              </tbody>
+            </table>
           </div>
+          <div class="table-caption">
+            <strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN,
+            <strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION.
           </div>
         </div>
       </div>
+      <!-- Table 2 on the right -->
+      <div>
+        <h3 class="table-title">General Reasoning with Different Backends</h3>
+        <div class="table-section">
+          <div class="results-table table-2">
+            <table>
+              <thead>
+                <tr>
+                  <th class="model-type-column">Model Type</th>
+                  <th class="model-name-column">Model</th>
+                  <th class="method-column">Method</th>
+                  <th class="metric-column">NR</th>
+                  <th class="metric-column">GT</th>
+                  <th class="metric-column">WI</th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr class="model-group-header">
+                  <td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Base</td>
+                  <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen2.5 3B</td>
+                  <td class="method-name">Base</td>
+                  <td class="score-cell">21.80%</td>
+                  <td class="score-cell">43.10%</td>
+                  <td class="score-cell">18.40%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ CoT</td>
+                  <td class="score-cell">24.40%</td>
+                  <td class="score-cell">48.90%</td>
+                  <td class="score-cell">24.20%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ SFT</td>
+                  <td class="score-cell">27.00%</td>
+                  <td class="score-cell">36.20%</td>
+                  <td class="score-cell">27.30%</td>
+                </tr>
+                <tr class="nover-row">
+                  <td class="method-name"><strong>+ NOVER</strong></td>
+                  <td class="score-cell best-score"><strong>28.60%</strong></td>
+                  <td class="score-cell best-score"><strong>60.30%</strong></td>
+                  <td class="score-cell best-score"><strong>28.10%</strong></td>
+                </tr>
+                <tr class="model-group-header">
+                  <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen 2.5 7B</td>
+                  <td class="method-name">Base</td>
+                  <td class="score-cell">31.80%</td>
+                  <td class="score-cell">48.50%</td>
+                  <td class="score-cell">20.70%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ CoT</td>
+                  <td class="score-cell">31.20%</td>
+                  <td class="score-cell">57.60%</td>
+                  <td class="score-cell">29.20%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ SFT</td>
+                  <td class="score-cell">27.50%</td>
+                  <td class="score-cell">45.20%</td>
+                  <td class="score-cell">33.50%</td>
+                </tr>
+                <tr class="nover-row">
+                  <td class="method-name"><strong>+ NOVER</strong></td>
+                  <td class="score-cell best-score"><strong>38.20%</strong></td>
+                  <td class="score-cell best-score"><strong>61.80%</strong></td>
+                  <td class="score-cell best-score"><strong>36.60%</strong></td>
+                </tr>
+                <tr class="model-group-header">
+                  <td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Instruct</td>
+                  <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Llama-3.1-8B</td>
+                  <td class="method-name">Base</td>
+                  <td class="score-cell">34.20%</td>
+                  <td class="score-cell">36.70%</td>
+                  <td class="score-cell">29.90%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ CoT</td>
+                  <td class="score-cell">28.10%</td>
+                  <td class="score-cell">35.10%</td>
+                  <td class="score-cell">30.00%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ SFT</td>
+                  <td class="score-cell">23.60%</td>
+                  <td class="score-cell">23.40%</td>
+                  <td class="score-cell best-score"><strong>34.50%</strong></td>
+                </tr>
+                <tr class="nover-row">
+                  <td class="method-name"><strong>+ NOVER</strong></td>
+                  <td class="score-cell best-score"><strong>40.70%</strong></td>
+                  <td class="score-cell best-score"><strong>41.50%</strong></td>
+                  <td class="score-cell">34.00%</td>
+                </tr>
+                <tr class="model-group-header">
+                  <td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Mistral-7B</td>
+                  <td class="method-name">Base</td>
+                  <td class="score-cell best-score"><strong>33.00%</strong></td>
+                  <td class="score-cell">17.80%</td>
+                  <td class="score-cell">27.00%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ CoT</td>
+                  <td class="score-cell">29.20%</td>
+                  <td class="score-cell">18.60%</td>
+                  <td class="score-cell">27.10%</td>
+                </tr>
+                <tr>
+                  <td class="method-name">+ SFT</td>
+                  <td class="score-cell">22.50%</td>
+                  <td class="score-cell">20.70%</td>
+                  <td class="score-cell">27.80%</td>
+                </tr>
+                <tr class="nover-row">
+                  <td class="method-name"><strong>+ NOVER</strong></td>
+                  <td class="score-cell">32.20%</td>
+                  <td class="score-cell best-score"><strong>21.90%</strong></td>
+                  <td class="score-cell best-score"><strong>29.30%</strong></td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+          <div class="table-caption">
+            <strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct.
         </div>
       </div>
     </div>
+  </div>
+  <!-- Key Takeaways below the tables - full width -->
+  <div style="margin-top: 3rem;">
+    <div class="glass-card">
+      <h3 class="title is-4" style="color: #1a1a1a; margin-bottom: 1.5rem;">Key Takeaways</h3>
+      <ul style="color: #374151; line-height: 1.8; font-size: 0.9rem;">
+        <li>• NOVER trains successfully on both pretrained and instruct models, with larger gains on stronger base models</li>
+        <li>• Despite the free-form nature of answers, NOVER still prefer objective solutions instead of subjective ones</li>
+        <li>• On general reasoning, NOVER inherits base model boundaries, which have been observed in math reasoning. It struggles on false-premise tasks like FANToM</li>
+        <li>• NOVER's design prevent reward hacking, avoiding issues such as reasoning explosion and collapse</li>
+        <li>• Unlike closed-source or verifier-based rewards that suffer from cold start and hacking risks, NOVER remains stable</li>
+        <li>• Its dense reward signals allow greater error tolerance and encourage diverse reasoning patterns</li>
+      </ul>
+    </div>
+  </div>
   </div>
 </section>
 <section class="section">
+  <div class="container is-widescreen">
     <h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2>
+    <div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
+      <div style="width: 600px; height: 420px;">
+        <img src="iit.png" alt="iit" style="width: 100%; height: 100%; object-fit: contain;">
+      </div>
+      <div style="width: 600px; height: 420px;">
+        <img src="iit_result.png" alt="iit_result" style="width: 100%; height: 100%; object-fit: contain;">
+      </div>
+    </div>
     <div class="glass-card">
       <div style="text-align: center;">
           <div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;">
             <div style="text-align: center;">
+              <i class="fas fa-fish" style="font-size: 3rem; margin-bottom: 0.5rem; color: #0e41a8;"></i>
+              <div style="font-size: 1.0rem; color: #0e41a8;">Reward the Outcome, Incentivize Process</div>
             </div>
             <div style="font-size: 1.5rem;">→</div>
             <div style="text-align: center;">
+              <i class="fas fa-graduation-cap" style="font-size: 3rem; margin-bottom: 0.5rem; color: #d736d2;"></i>
+              <div style="font-size: 1.0rem; color: #d736d2;">Write Rubrics in the Outcome, Process as Result</div>
             </div>
           </div>
+          <div style="font-size: 1.2rem; color: #000000;">Teaching Models "How to Fish" Rather Than Giving Them Fish</div>
       </div>
     </div>
   </div>
 <section class="section" id="BibTeX">
+  <div class="container is-widescreen">
     <div class="glass-card">
       <h2 class="title is-3">Citation</h2>
       <pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover,
   <div class="container has-text-centered">
     <div class="content">
       <div style="margin-bottom: 2rem;">
+        <p>Find me on <a href="https://thinkwee.top/about" target="_blank" style="color: #10b981;">thinkwee.top/about</a>, with other interesting works on LLM Agent🤖, NLP and more~</p>
       </div>
       <p style="color: #6b7280;">
         Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a>
 </footer>
 </body>
+</html>

logo.png ADDED Viewed

Git LFS Details

SHA256: 664d2a9d8119f4331c61bfdf0a0edb6572a1a954b37b10780ea57899299b6327
Pointer size: 132 Bytes
Size of remote file: 1.29 MB

overall.png ADDED Viewed

Git LFS Details

SHA256: 4281cf1760e20e5a434eaad9723481d28c81be24efca3cfbbb1a730f1666e0b2
Pointer size: 131 Bytes
Size of remote file: 173 kB

paradigm.png ADDED Viewed

Git LFS Details

SHA256: 7994994301154cbdd0233f66561db1582399202e372c186911bcfae516010bc3
Pointer size: 131 Bytes
Size of remote file: 188 kB