| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>UGTC — Uncertainty-Gated Temporal Credit</title> |
| <meta name="description" content="UGTC: A backbone-agnostic advantage estimator for actor-critic reinforcement learning, published at UYES Journal."> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-mml-chtml.min.js"></script> |
| <style> |
| :root { |
| --bg: #0a0e17; |
| --bg2: #111827; |
| --bg3: #1f2937; |
| --border: #374151; |
| --accent: #6366f1; |
| --accent2: #8b5cf6; |
| --accent3: #06b6d4; |
| --text: #f1f5f9; |
| --muted: #94a3b8; |
| --green: #10b981; |
| --orange: #f59e0b; |
| --red: #ef4444; |
| } |
| * { margin: 0; padding: 0; box-sizing: border-box; } |
| body { font-family: 'Inter', sans-serif; background: var(--bg); color: var(--text); line-height: 1.7; } |
| code, pre, .mono { font-family: 'JetBrains Mono', monospace; } |
| |
| |
| nav { |
| position: sticky; top: 0; z-index: 100; |
| background: rgba(10,14,23,0.95); |
| backdrop-filter: blur(12px); |
| border-bottom: 1px solid var(--border); |
| padding: 0 2rem; |
| display: flex; align-items: center; justify-content: space-between; |
| height: 60px; |
| } |
| .nav-logo { font-weight: 700; font-size: 1.1rem; color: var(--accent); letter-spacing: -0.02em; } |
| .nav-links { display: flex; gap: 1.5rem; list-style: none; } |
| .nav-links a { color: var(--muted); text-decoration: none; font-size: 0.9rem; transition: color 0.2s; } |
| .nav-links a:hover { color: var(--text); } |
| |
| |
| .hero { |
| text-align: center; |
| padding: 6rem 2rem 4rem; |
| max-width: 900px; |
| margin: 0 auto; |
| } |
| .hero-badge { |
| display: inline-flex; align-items: center; gap: 0.5rem; |
| background: rgba(99,102,241,0.12); |
| border: 1px solid rgba(99,102,241,0.3); |
| color: var(--accent); |
| padding: 0.4rem 1rem; border-radius: 9999px; |
| font-size: 0.8rem; font-weight: 600; letter-spacing: 0.05em; |
| text-transform: uppercase; margin-bottom: 1.5rem; |
| } |
| h1 { |
| font-size: clamp(2.2rem, 5vw, 3.5rem); |
| font-weight: 700; |
| letter-spacing: -0.03em; |
| background: linear-gradient(135deg, #fff 0%, #a5b4fc 100%); |
| -webkit-background-clip: text; -webkit-text-fill-color: transparent; |
| margin-bottom: 0.5rem; |
| } |
| .hero-subtitle { |
| font-size: 1.2rem; color: var(--muted); margin-bottom: 1.5rem; max-width: 600px; margin-left: auto; margin-right: auto; |
| } |
| .hero-tagline { |
| font-size: 0.95rem; color: var(--muted); margin-bottom: 2rem; |
| max-width: 700px; margin-left: auto; margin-right: auto; |
| } |
| .badges { display: flex; flex-wrap: wrap; gap: 0.5rem; justify-content: center; margin-bottom: 2.5rem; } |
| .badge { display: inline-block; } |
| .badge img { height: 24px; } |
| |
| .btn-group { display: flex; gap: 1rem; justify-content: center; flex-wrap: wrap; } |
| .btn { |
| padding: 0.75rem 1.75rem; border-radius: 8px; text-decoration: none; |
| font-weight: 600; font-size: 0.95rem; transition: all 0.2s; |
| display: inline-flex; align-items: center; gap: 0.5rem; |
| } |
| .btn-primary { |
| background: linear-gradient(135deg, var(--accent), var(--accent2)); |
| color: white; |
| } |
| .btn-primary:hover { transform: translateY(-2px); box-shadow: 0 8px 25px rgba(99,102,241,0.4); } |
| .btn-secondary { |
| background: var(--bg3); border: 1px solid var(--border); color: var(--text); |
| } |
| .btn-secondary:hover { border-color: var(--accent); color: var(--accent); } |
| |
| |
| main { max-width: 1100px; margin: 0 auto; padding: 0 2rem 6rem; } |
| |
| |
| section { margin-bottom: 4rem; } |
| h2 { |
| font-size: 1.7rem; font-weight: 700; letter-spacing: -0.02em; |
| margin-bottom: 1.5rem; color: var(--text); |
| padding-bottom: 0.75rem; |
| border-bottom: 1px solid var(--border); |
| } |
| h3 { font-size: 1.2rem; font-weight: 600; margin: 1.5rem 0 0.75rem; color: var(--text); } |
| |
| |
| .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 1.25rem; } |
| .card { |
| background: var(--bg2); border: 1px solid var(--border); |
| border-radius: 12px; padding: 1.5rem; |
| transition: border-color 0.2s, transform 0.2s; |
| } |
| .card:hover { border-color: var(--accent); transform: translateY(-2px); } |
| .card-icon { font-size: 1.75rem; margin-bottom: 0.75rem; } |
| .card h3 { margin: 0 0 0.5rem; font-size: 1rem; } |
| .card p { font-size: 0.9rem; color: var(--muted); margin: 0; } |
| |
| |
| .arch-box { |
| background: var(--bg2); border: 1px solid var(--border); |
| border-radius: 12px; padding: 2rem; overflow-x: auto; |
| } |
| .arch-box pre { font-size: 0.82rem; line-height: 1.6; color: var(--text); } |
| |
| |
| .math-block { |
| background: var(--bg2); border: 1px solid var(--border); |
| border-radius: 8px; padding: 1.5rem; margin: 1rem 0; |
| overflow-x: auto; |
| } |
| |
| |
| table { width: 100%; border-collapse: collapse; } |
| th { background: var(--bg3); padding: 0.75rem 1rem; text-align: left; font-size: 0.85rem; color: var(--muted); font-weight: 600; } |
| td { padding: 0.75rem 1rem; border-top: 1px solid var(--border); font-size: 0.9rem; } |
| tr:hover td { background: var(--bg2); } |
| .tag { |
| display: inline-block; padding: 0.2rem 0.6rem; border-radius: 4px; |
| font-size: 0.75rem; font-weight: 600; font-family: 'JetBrains Mono', monospace; |
| } |
| .tag-green { background: rgba(16,185,129,0.12); color: var(--green); } |
| .tag-blue { background: rgba(6,182,212,0.12); color: var(--accent3); } |
| .tag-purple { background: rgba(99,102,241,0.12); color: var(--accent); } |
| |
| |
| .code-block { |
| background: #0d1117; border: 1px solid var(--border); |
| border-radius: 8px; padding: 1.25rem 1.5rem; |
| overflow-x: auto; position: relative; |
| } |
| .code-block pre { font-size: 0.85rem; line-height: 1.7; color: #e6edf3; } |
| .code-label { |
| position: absolute; top: 0.75rem; right: 1rem; |
| font-size: 0.7rem; color: var(--muted); font-family: 'JetBrains Mono', monospace; |
| } |
| |
| |
| .gate-viz { |
| display: flex; flex-direction: column; gap: 0.5rem; padding: 1.5rem; |
| background: var(--bg2); border: 1px solid var(--border); border-radius: 12px; |
| } |
| .gate-row { display: flex; align-items: center; gap: 1rem; } |
| .gate-label { font-size: 0.85rem; color: var(--muted); width: 120px; flex-shrink: 0; } |
| .gate-bar-bg { flex: 1; height: 8px; background: var(--bg3); border-radius: 4px; overflow: hidden; } |
| .gate-bar { height: 100%; border-radius: 4px; transition: width 0.5s ease; } |
| .gate-value { font-size: 0.8rem; font-family: monospace; color: var(--muted); width: 40px; text-align: right; } |
| |
| |
| footer { |
| border-top: 1px solid var(--border); |
| padding: 2rem; |
| text-align: center; |
| color: var(--muted); |
| font-size: 0.85rem; |
| } |
| footer a { color: var(--accent); text-decoration: none; } |
| footer a:hover { text-decoration: underline; } |
| |
| @media (max-width: 640px) { |
| nav { padding: 0 1rem; } |
| .nav-links { display: none; } |
| .hero { padding: 4rem 1rem 3rem; } |
| main { padding: 0 1rem 4rem; } |
| } |
| </style> |
| </head> |
| <body> |
|
|
| <nav> |
| <div class="nav-logo">UGTC</div> |
| <ul class="nav-links"> |
| <li><a href="#architecture">Architecture</a></li> |
| <li><a href="#math">Mathematics</a></li> |
| <li><a href="#algorithms">Algorithms</a></li> |
| <li><a href="#quickstart">Quick Start</a></li> |
| <li><a href="https://github.com/ethosoftai/ugtc">GitHub</a></li> |
| <li><a href="https://doi.org/10.5281/zenodo.19715116">Paper</a></li> |
| </ul> |
| </nav> |
|
|
| <div class="hero"> |
| <div class="hero-badge">📄 Published · UYES Journal · 2026</div> |
| <h1>Uncertainty-Gated Temporal Credit</h1> |
| <p class="hero-subtitle">A plug-in advantage estimator for actor-critic reinforcement learning</p> |
| <p class="hero-tagline"> |
| UGTC dynamically blends short-horizon (low-variance) and long-horizon (low-bias) advantage |
| estimates using a sigmoid gate driven by critic ensemble disagreement — resolving the |
| bias–variance trade-off in temporal credit assignment. |
| </p> |
| <div class="badges"> |
| <span class="badge"><img src="https://img.shields.io/badge/Paper-Zenodo%2019715116-blue?style=flat-square&logo=zenodo" alt="Paper"></span> |
| <span class="badge"><img src="https://img.shields.io/badge/Published-UYES%20Journal-green?style=flat-square" alt="UYES"></span> |
| <span class="badge"><img src="https://img.shields.io/badge/License-MIT-yellow?style=flat-square" alt="License"></span> |
| <span class="badge"><img src="https://img.shields.io/badge/Python-3.10%2B-blue?style=flat-square&logo=python" alt="Python"></span> |
| <span class="badge"><img src="https://img.shields.io/badge/PyTorch-2.2%2B-ee4c2c?style=flat-square&logo=pytorch" alt="PyTorch"></span> |
| </div> |
| <div class="btn-group"> |
| <a href="https://github.com/ethosoftai/ugtc" class="btn btn-primary">⭐ View on GitHub</a> |
| <a href="https://doi.org/10.5281/zenodo.19715116" class="btn btn-secondary">📄 Read Paper</a> |
| <a href="https://huggingface.co/spaces/Ethosoft/ugtc" class="btn btn-secondary">🤗 Live Demo</a> |
| </div> |
| </div> |
|
|
| <main> |
|
|
| |
| <section> |
| <h2>Key Features</h2> |
| <div class="cards"> |
| <div class="card"> |
| <div class="card-icon">🔌</div> |
| <h3>Backbone-Agnostic</h3> |
| <p>Drop UGTC into any actor-critic algorithm by replacing the advantage computation. Tested with PPO, TD3, SAC.</p> |
| </div> |
| <div class="card"> |
| <div class="card-icon">🎯</div> |
| <h3>Adaptive Credit Assignment</h3> |
| <p>Automatically selects between short-horizon and long-horizon GAE estimates based on per-state uncertainty.</p> |
| </div> |
| <div class="card"> |
| <div class="card-icon">📐</div> |
| <h3>Fixed Hyperparameters</h3> |
| <p>λ_fast=0.80, λ_slow=0.99, M=3, β=5.0. Same across all benchmarks — no per-task tuning required.</p> |
| </div> |
| <div class="card"> |
| <div class="card-icon">🔬</div> |
| <h3>Ensemble Uncertainty</h3> |
| <p>Slow critic ensemble disagreement provides calibrated uncertainty estimates without Bayesian inference.</p> |
| </div> |
| <div class="card"> |
| <div class="card-icon">⚡</div> |
| <h3>Lightweight Overhead</h3> |
| <p>Three small MLP value heads. Minimal parameter and compute overhead relative to actor network.</p> |
| </div> |
| <div class="card"> |
| <div class="card-icon">🌐</div> |
| <h3>Multi-Language</h3> |
| <p>Reference implementations in Python, C++ (header-only), and Java for portability.</p> |
| </div> |
| </div> |
| </section> |
|
|
| |
| <section id="architecture"> |
| <h2>Architecture</h2> |
| <div class="arch-box"> |
| <pre> |
| ┌─────────────────────────────────────────────────────────────────────────────┐ |
| │ UGTC MODULE │ |
| │ │ |
| │ Input: s (observation) │ |
| │ │ |
| │ ┌──────────────────┐ ┌────────────────────────────────────────────┐ │ |
| │ │ Fast Critic │ │ Slow Ensemble (M=3) │ │ |
| │ │ V_fast(s) │ │ V¹(s) V²(s) V³(s) │ │ |
| │ │ λ_fast = 0.80 │ │ (independent parameters, λ = 0.99) │ │ |
| │ └────────┬─────────┘ └──────────────────┬──────────────────────── ┘ │ |
| │ │ │ │ |
| │ │ ┌─────────────┴───────────────┐ │ |
| │ │ │ σ(s) = std(V¹,V²,V³)(s) │ │ |
| │ │ │ Ensemble Disagreement │ │ |
| │ │ └─────────────┬───────────────┘ │ |
| │ │ │ │ |
| │ │ ┌─────────────▼───────────────┐ │ |
| │ │ │ EMA Normalization │ │ |
| │ │ │ σ_EMA ← α·σ_EMA + (1-α)·σ │ │ |
| │ │ │ σ̂(s) = σ(s) / (σ_EMA + ε) │ │ |
| │ │ └─────────────┬───────────────┘ │ |
| │ │ │ │ |
| │ │ ┌─────────────▼───────────────┐ │ |
| │ │ │ Sigmoid Gate │ │ |
| │ │ │ u(s) = σ(-β·(σ̂(s) - 1)) │ │ |
| │ │ └─────────────┬───────────────┘ │ |
| │ │ │ │ |
| │ ┌────────▼───────────────────────────────────▼─────────────────────────┐ │ |
| │ │ A^UGTC = u(s) · A^slow + (1 - u(s)) · A^fast │ │ |
| │ │ Blended Advantage Estimate │ │ |
| │ └───────────────────────────────────────────────────────────────────────┘ │ |
| └─────────────────────────────────────────────────────────────────────────────┘ |
| </pre> |
| </div> |
|
|
| <h3>Gate Behavior</h3> |
| <div class="gate-viz"> |
| <div class="gate-row"> |
| <span class="gate-label">Low uncertainty</span> |
| <div class="gate-bar-bg"><div class="gate-bar" style="width:92%;background:linear-gradient(90deg,#6366f1,#8b5cf6)"></div></div> |
| <span class="gate-value">u → 1</span> |
| <span style="font-size:0.8rem;color:#10b981;">→ use A^slow (accurate)</span> |
| </div> |
| <div class="gate-row"> |
| <span class="gate-label">Medium uncertainty</span> |
| <div class="gate-bar-bg"><div class="gate-bar" style="width:50%;background:linear-gradient(90deg,#6366f1,#06b6d4)"></div></div> |
| <span class="gate-value">u = 0.5</span> |
| <span style="font-size:0.8rem;color:#94a3b8;">→ equal blend</span> |
| </div> |
| <div class="gate-row"> |
| <span class="gate-label">High uncertainty</span> |
| <div class="gate-bar-bg"><div class="gate-bar" style="width:8%;background:linear-gradient(90deg,#f59e0b,#ef4444)"></div></div> |
| <span class="gate-value">u → 0</span> |
| <span style="font-size:0.8rem;color:#f59e0b;">→ use A^fast (stable)</span> |
| </div> |
| </div> |
| </section> |
|
|
| |
| <section id="math"> |
| <h2>Mathematical Foundation</h2> |
|
|
| <h3>Generalized Advantage Estimation</h3> |
| <div class="math-block"> |
| \[ |
| \delta_t = r_t + \gamma V(s_{t+1})(1 - d_t) - V(s_t) |
| \] |
| \[ |
| A_t^{\text{GAE}} = \sum_{k=0}^{\infty} (\gamma\lambda)^k \delta_{t+k} |
| \] |
| </div> |
|
|
| <h3>UGTC Dual-Stream Computation</h3> |
| <div class="math-block"> |
| \[ |
| A_t^{\text{fast}} = \text{GAE}\!\left(\tau,\, V_{\text{fast}},\, \lambda_{\text{fast}} = 0.80\right) |
| \] |
| \[ |
| A_t^{\text{slow}} = \text{GAE}\!\left(\tau,\, \bar{V}_{\text{slow}},\, \lambda_{\text{slow}} = 0.99\right) |
| \] |
| <p style="color:var(--muted);font-size:0.85rem;margin-top:0.75rem;"> |
| where \(\bar{V}_{\text{slow}} = \frac{1}{M}\sum_{m=1}^{M} V^m_{\text{slow}}\) (ensemble mean, M = 3) |
| </p> |
| </div> |
|
|
| <h3>Uncertainty Gate</h3> |
| <div class="math-block"> |
| \[ |
| \sigma(s) = \text{std}\!\left(V^1_{\text{slow}}(s),\, \ldots,\, V^M_{\text{slow}}(s)\right) |
| \] |
| \[ |
| \hat{\sigma}(s) = \frac{\sigma(s)}{\sigma_{\text{EMA}} + \varepsilon}, \qquad |
| \sigma_{\text{EMA}} \leftarrow \alpha \cdot \sigma_{\text{EMA}} + (1-\alpha)\cdot\mathbb{E}[\sigma(s)] |
| \] |
| \[ |
| u(s) = \sigma\!\left(-\beta \cdot (\hat{\sigma}(s) - 1)\right) |
| \] |
| </div> |
|
|
| <h3>Blended Advantage</h3> |
| <div class="math-block"> |
| \[ |
| \boxed{A_t^{\text{UGTC}} = u(s_t) \cdot A_t^{\text{slow}} + (1 - u(s_t)) \cdot A_t^{\text{fast}}} |
| \] |
| </div> |
|
|
| <h3>Fixed Hyperparameters</h3> |
| <table> |
| <thead> |
| <tr><th>Parameter</th><th>Symbol</th><th>Value</th><th>Description</th></tr> |
| </thead> |
| <tbody> |
| <tr><td>Fast λ</td><td>\(\lambda_{\text{fast}}\)</td><td><span class="tag tag-green">0.80</span></td><td>GAE lambda for fast critic (low variance)</td></tr> |
| <tr><td>Slow λ</td><td>\(\lambda_{\text{slow}}\)</td><td><span class="tag tag-green">0.99</span></td><td>GAE lambda for slow ensemble (low bias)</td></tr> |
| <tr><td>Ensemble size</td><td>M</td><td><span class="tag tag-blue">3</span></td><td>Number of slow critic heads</td></tr> |
| <tr><td>Gate temperature</td><td>β</td><td><span class="tag tag-purple">5.0</span></td><td>Sigmoid sharpness</td></tr> |
| <tr><td>EMA momentum</td><td>α</td><td><span class="tag tag-green">0.99</span></td><td>Running uncertainty normalization</td></tr> |
| </tbody> |
| </table> |
| </section> |
|
|
| |
| <section id="algorithms"> |
| <h2>RL Algorithm Integrations</h2> |
| <div class="cards"> |
| <div class="card"> |
| <h3>UGTC-PPO</h3> |
| <p style="color:var(--muted);font-size:0.85rem;margin-bottom:0.75rem;"> |
| <span class="tag tag-green">On-policy</span> |
| </p> |
| <p>A^UGTC replaces standard GAE in the clipped surrogate objective. All UGTC critics trained via same regression pipeline.</p> |
| </div> |
| <div class="card"> |
| <h3>UGTC-TD3</h3> |
| <p style="color:var(--muted);font-size:0.85rem;margin-bottom:0.75rem;"> |
| <span class="tag tag-blue">Off-policy</span> |
| </p> |
| <p>UGTC provides baseline correction for the actor: L = -(Q_min + η·A^UGTC). Twin-Q and delayed update preserved.</p> |
| </div> |
| <div class="card"> |
| <h3>UGTC-SAC</h3> |
| <p style="color:var(--muted);font-size:0.85rem;margin-bottom:0.75rem;"> |
| <span class="tag tag-blue">Off-policy</span> |
| </p> |
| <p>V^UGTC replaces implicit value baseline in the entropy-regularized actor loss. Auto-α entropy tuning unchanged.</p> |
| </div> |
| <div class="card"> |
| <h3>UGTC-DDPG</h3> |
| <p style="color:var(--muted);font-size:0.85rem;margin-bottom:0.75rem;"> |
| <span class="tag tag-purple">Extension</span> |
| </p> |
| <p>Proposed extension following TD3 integration logic. Not benchmarked in the paper — labeled as implementation assumption.</p> |
| </div> |
| </div> |
| </section> |
|
|
| |
| <section id="quickstart"> |
| <h2>Quick Start</h2> |
|
|
| <h3>Installation</h3> |
| <div class="code-block"> |
| <span class="code-label">bash</span> |
| <pre>git clone https://github.com/ethosoftai/ugtc.git |
| cd ugtc |
| pip install -e .</pre> |
| </div> |
|
|
| <h3>Minimal Usage</h3> |
| <div class="code-block"> |
| <span class="code-label">python</span> |
| <pre>from ugtc import UGTCModule |
|
|
| # Create UGTC module (obs_dim=17 for Hopper-v4) |
| ugtc = UGTCModule(obs_dim=17) |
|
|
| # Replace standard GAE in your PPO update: |
| advantages = ugtc.compute_advantages( |
| obs=obs, # (T, obs_dim) |
| next_obs=next_obs, # (T, obs_dim) |
| rewards=rewards, # (T,) |
| dones=dones, # (T,) |
| gamma=0.99, |
| ) |
|
|
| # Same as before: normalize and use in clipped surrogate |
| advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)</pre> |
| </div> |
|
|
| <h3>Run an Example</h3> |
| <div class="code-block"> |
| <span class="code-label">bash</span> |
| <pre># UGTC-PPO on CartPole-v1 (no MuJoCo needed) |
| python examples/ugtc_ppo_cartpole.py |
|
|
| # UGTC-PPO on Hopper-v4 (requires MuJoCo) |
| python examples/ugtc_ppo_mujoco.py --env Hopper-v4 |
|
|
| # UGTC-TD3 on Pendulum-v1 |
| python examples/ugtc_td3_pendulum.py</pre> |
| </div> |
| </section> |
|
|
| |
| <section> |
| <h2>Citation</h2> |
| <div class="code-block"> |
| <pre>@misc{dalar2026ugtc, |
| author = {Dalar, Yağız Ekrem}, |
| title = {{UGTC}: Uncertainty-Gated Temporal Credit}, |
| year = {2026}, |
| publisher = {Zenodo}, |
| doi = {10.5281/zenodo.19715116}, |
| url = {https://doi.org/10.5281/zenodo.19715116}, |
| note = {Accepted — Ulysseus Young Explorers in Science (UYES) Journal. |
| Journal DOI forthcoming.} |
| }</pre> |
| </div> |
| </section> |
|
|
| </main> |
|
|
| <footer> |
| <p> |
| UGTC · <a href="https://github.com/ethosoftai">Ethosoft AI</a> · |
| <a href="https://doi.org/10.5281/zenodo.19715116">Paper</a> · |
| <a href="https://github.com/ethosoftai/ugtc">GitHub</a> · |
| <a href="https://huggingface.co/spaces/Ethosoft/ugtc">HuggingFace</a> |
| </p> |
| <p style="margin-top:0.5rem;">MIT License · Accepted at Ulysseus Young Explorers in Science (UYES) Journal</p> |
| </footer> |
|
|
| </body> |
| </html> |
|
|