Spaces:

sairaj2
/

AutoClean-Ai

Sleeping

App Files Files Community

sairaj2 commited on 7 days ago

Commit

d137754

verified ·

1 Parent(s): 8d340f1

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

Dockerfile +2 -2
openenv.yaml +1 -1
server/Dockerfile +1 -1
server/__init__.py +1 -1
server/app.py +39 -39
server/metrics.py +71 -71
server/tasks.py +22 -22

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-# HallucinationGuard-Env Dockerfile - HF Spaces optimized
 # Single-stage build: avoids broken --target copy with compiled packages (torch, etc.)
 FROM python:3.10-slim
@@ -28,7 +28,7 @@ COPY . .
 RUN pip install --no-cache-dir -e .
 # Cache directory for datasets
-RUN mkdir -p /tmp/halluguard_cache /tmp/transformers_cache /tmp/hf_cache
 # HF Spaces default port
 EXPOSE 7860

+# DataQualityGuard-Env Dockerfile - HF Spaces optimized
 # Single-stage build: avoids broken --target copy with compiled packages (torch, etc.)
 FROM python:3.10-slim
 RUN pip install --no-cache-dir -e .
 # Cache directory for datasets
+RUN mkdir -p /tmp/cleanguard_cache /tmp/transformers_cache /tmp/hf_cache
 # HF Spaces default port
 EXPOSE 7860

openenv.yaml CHANGED Viewed

@@ -78,7 +78,7 @@ datasets:
   - squad
   - squad_v2
   - trivia_qa
-  - halueval
   - truthful_qa
   - hotpotqa
   - boolq

   - squad
   - squad_v2
   - trivia_qa
+  - data_quality_eval
   - truthful_qa
   - hotpotqa
   - boolq

server/Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-# HallucinationGuard-Env Dockerfile
 FROM python:3.10-slim
 WORKDIR /app

+# DataQualityGuard-Env Dockerfile
 FROM python:3.10-slim
 WORKDIR /app

server/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Server module for HallucinationGuard-Env."""
 import sys
 import os

+"""Server module for DataQualityGuard-Env."""
 import sys
 import os

server/app.py CHANGED Viewed

@@ -38,7 +38,7 @@ STUNNING_DOCS_HTML = """<!DOCTYPE html>
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>HallucinationGuard-Env · OpenEnv</title>
 <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><rect width='100' height='100' rx='20' fill='%23080c14'/><text x='50' y='68' font-size='55' text-anchor='middle' fill='%23f59e0b' font-family='sans-serif' font-weight='bold'>H</text></svg>">
 <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
 <style>
@@ -409,14 +409,14 @@ input[type=range] {
 }
 .ep-meta { display: flex; justify-content: space-between; align-items: center; }
 .ep-step { font-size: 11px; color: var(--muted); font-family: var(--mono); }
-.halluc-badge {
   display: none;
   font-size: 11px; font-weight: 700; letter-spacing: 0.5px;
   padding: 3px 10px; border-radius: 100px;
 }
-.halluc-badge.show { display: inline-block; }
-.halluc-badge.yes { background: var(--red-dim); color: var(--red); border: 1px solid rgba(248,113,113,0.3); }
-.halluc-badge.no { background: var(--green-dim); color: var(--green); border: 1px solid rgba(74,222,128,0.3); }
 /* ── REWARD BREAKDOWN ── */
 .reward-section { margin-top: 16px; }
@@ -506,7 +506,7 @@ input[type=range] {
     <div class="hero-badge">OpenEnv · RL Environment</div>
     <div class="ver-chip">v4.2.0</div>
     <h1>
-      <span class="accent">Hallucination</span><span class="accent2">Guard</span>‑Env
     </h1>
     <p class="hero-sub">
       Train AI models to answer <strong>only from verified context</strong> — with a 9-component reward system that penalizes fabrication and rewards factual grounding, citation accuracy, and calibrated confidence.
@@ -549,7 +549,7 @@ input[type=range] {
 <div id="overview" class="panel active">
   <div class="section-head">
     <h2>How it works</h2>
-    <p>Three primitives. Nine reward signals. One goal: no hallucinations.</p>
   </div>
   <div class="steps">
     <div class="step">
@@ -568,13 +568,13 @@ input[type=range] {
       <span class="step-num">03</span>
       <div class="step-icon">📊</div>
       <h4>grade()</h4>
-      <p>Aggregate episode rewards into a task score. Track accuracy, hallucination rate, and skill rating over time.</p>
     </div>
   </div>
   <div class="card">
     <h3>9-Component Reward System</h3>
-    <p>Every answer is graded on <strong>factual correctness</strong>, <strong>source grounding</strong>, <strong>citation accuracy</strong>, <strong>confidence calibration</strong>, <strong>semantic consistency</strong>, <strong>hallucination detection</strong>, <strong>ROUGE-L</strong>, <strong>BERTScore</strong>, and <strong>AlignScore</strong>. Each component is weighted and combined into a single scalar reward in <strong>[0, 1]</strong>. Confident wrong answers are penalized harder than uncertain ones.</p>
   </div>
   <div class="card">
     <h3>Curriculum Progression</h3>
@@ -634,9 +634,9 @@ input[type=range] {
         <span class="diff-badge advanced">Advanced</span>
         <span class="data-count">~210K examples</span>
       </div>
-      <p>Resist adversarial prompts designed to elicit hallucinations. Many questions are deliberately unanswerable — confident refusals with low confidence score better than fabricated plausible-sounding answers.</p>
       <div class="dataset-chips">
-        <span class="ds-chip">HaluEval</span>
         <span class="ds-chip">TruthfulQA</span>
         <span class="ds-chip">FEVER</span>
         <span class="ds-chip">Climate-FEVER</span>
@@ -665,7 +665,7 @@ input[type=range] {
       <tbody>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/reset</td><td class="td-desc">Start episode — returns question, context, difficulty, episode_id</td></tr>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/step</td><td class="td-desc">Submit answer with confidence + source_quote, receive reward breakdown</td></tr>
-        <tr><td><span class="method get">GET</span></td><td class="endpoint">/state</td><td class="td-desc">Current episode metadata — accuracy, hallucination_rate, skill_rating</td></tr>
         <tr><td><span class="method get">GET</span></td><td class="endpoint">/tasks</td><td class="td-desc">List all 3 tasks with action schema</td></tr>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/grader</td><td class="td-desc">Score a completed episode (0.0 – 1.0) from rewards + infos</td></tr>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/baseline</td><td class="td-desc">Run heuristic baseline across all 3 tasks</td></tr>
@@ -716,7 +716,7 @@ result = requests.<span class="fn">post</span>(<span class="st">f"{BASE}/step"</
 }).json()
 <span class="fn">print</span>(result[<span class="st">"reward"</span>])            <span class="cm"># scalar in [0, 1]</span>
-<span class="fn">print</span>(result[<span class="st">"is_hallucination"</span>])   <span class="cm"># bool</span></div>
     </div>
   </div>
 </div>
@@ -735,7 +735,7 @@ result = requests.<span class="fn">post</span>(<span class="st">f"{BASE}/step"</
         <div class="ep-progress">
           <div class="ep-meta">
             <span class="ep-step" id="ep-step-label">No episode active</span>
-            <span class="halluc-badge" id="halluc-badge"></span>
           </div>
           <div class="ep-bar-bg"><div class="ep-bar-fill" id="ep-bar" style="width:0%"></div></div>
         </div>
@@ -822,7 +822,7 @@ result = requests.<span class="fn">post</span>(<span class="st">f"{BASE}/step"</
 <!-- ══ FOOTER ══ -->
 <footer style="text-align:center;padding:32px 40px 24px;border-top:1px solid var(--border);color:var(--muted);font-size:12px;">
-  HallucinationGuard-Env v4.2.0 &middot; OpenEnv &middot; <a href="/swagger" style="color:var(--amber);text-decoration:none">Swagger Docs</a> &middot; <a href="/redoc" style="color:var(--amber);text-decoration:none">ReDoc</a>
 </footer>
 <script>
@@ -836,7 +836,7 @@ const REWARD_KEYS = [
   {key:'citation',              label:'Citation Accuracy',    css:'rc-2'},
   {key:'calibration',          label:'Confidence Calibr.',   css:'rc-3'},
   {key:'consistency',           label:'Semantic Consistency', css:'rc-4'},
-  {key:'halluc_detect',        label:'Hallucination Detect.', css:'rc-5'},
   {key:'rouge_l',               label:'ROUGE-L',             css:'rc-6'},
   {key:'bert_score',            label:'BERTScore',            css:'rc-7'},
   {key:'align_score',           label:'AlignScore',           css:'rc-8'},
@@ -846,7 +846,7 @@ const REWARD_KEYS = [
   {key:'citation_accuracy',     label:'Citation Accuracy',    css:'rc-2'},
   {key:'confidence_calibration', label:'Confidence Calibr.',   css:'rc-3'},
   {key:'semantic_consistency',  label:'Semantic Consistency', css:'rc-4'},
-  {key:'hallucination_penalty', label:'Hallucination Detect.', css:'rc-5'},
   {key:'rouge_score',           label:'ROUGE-L',              css:'rc-6'},
   {key:'bertscore',             label:'BERTScore',            css:'rc-7'},
   {key:'alignscore',            label:'AlignScore',           css:'rc-8'},
@@ -926,11 +926,11 @@ function renderRewards(data) {
   container.innerHTML = html || '<div style="color:var(--border2);font-size:12px;text-align:center;padding:12px">No breakdown data in response</div>';
-  // hallucination badge
-  const badge = document.getElementById('halluc-badge');
-  if (data.is_hallucination != null) {
-    badge.className = 'halluc-badge show ' + (data.is_hallucination ? 'yes' : 'no');
-    badge.textContent = data.is_hallucination ? '⚠ Hallucination' : '✓ Grounded';
   }
 }
@@ -958,7 +958,7 @@ async function doReset() {
     document.getElementById('reward-bars').innerHTML = '<div style="text-align:center;padding:20px 0;color:var(--border2);font-size:13px;">Submit an answer to see the 9-component reward breakdown</div>';
     document.getElementById('total-reward').textContent = '—';
     document.getElementById('total-reward').style.color = 'var(--amber)';
-    document.getElementById('halluc-badge').className = 'halluc-badge';
     setStatus('ready');
   } catch(e) {
     document.getElementById('ctx-box').innerHTML = '<span style="color:var(--red)">Error: ' + escHtml(e.message) + '</span>';
@@ -1068,7 +1068,7 @@ def _get_default_env() -> DataCleaningEnvironment:
                 def reset(self, **kwargs):
                     return type('Obs', (), {'question': 'Placeholder', 'context': 'Context', 'reward': 0.0, 'done': False, 'info': {}})()
                 def step(self, action):
-                    return type('Obs', (), {'reward': 0.0, 'done': False, 'is_hallucination': False, 'info': {}})()
                 def state(self): return {}
                 def close(self): pass
             _default_env = MinimalEnv()
@@ -1083,7 +1083,7 @@ def _create_session_env(session_id: str) -> DataCleaningEnvironment:
     loader_env = _get_default_env()
     # Pass the shared loader directly into __init__ so we skip the expensive
     # DatasetLoader() construction and dataset loading that would otherwise
-    # happen inside HallucinationEnvironment.__init__
     env = DataCleaningEnvironment(session_id=session_id, dataset_loader=loader_env.dataset_loader)
     return env
@@ -1147,7 +1147,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     lifespan=lifespan,
-    title="HallucinationGuard-Env",
     version="4.2.0",
     docs_url="/swagger",
     redoc_url="/redoc",
@@ -1156,7 +1156,7 @@ app = FastAPI(
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 import json as _json
-_LEADERBOARD_FILE = "/tmp/hallucination_guard_leaderboard.json"
 def _load_leaderboard():
     if os.path.exists(_LEADERBOARD_FILE):
@@ -1281,7 +1281,7 @@ async def run_baseline(body: Dict[str, Any] = {}):
         for _ in range(steps):
             if obs_dict.get("done"): break
             ctx = obs_dict.get("context", "")
-            action = HallucinationAction(answer=ctx[:100], confidence=0.6, source_quote=ctx[:80])
             obs_dict = _safe_dict(env.step(action))
             rewards.append(float(obs_dict.get("reward") or 0))
             obs_meta = obs_dict.get("metadata", {})
@@ -1293,8 +1293,8 @@ async def run_baseline(body: Dict[str, Any] = {}):
                     "correctness": obs_correctness,
                     "grounding": obs_dict.get("grounding_score", 0),
                     "calibration": obs_calibration,
-                    "hallucination_score": 1.0 if obs_dict.get("is_hallucination") else 0.0,
-                    "is_hallucination": bool(obs_dict.get("is_hallucination", False)),
                     "semantic_consistency": rb.get("semantic_consistency", 0.0),
                     "rouge_l": rb.get("rouge_l", 0.0),
                     "bert_score": rb.get("bert_score", 0.0),
@@ -1305,8 +1305,8 @@ async def run_baseline(body: Dict[str, Any] = {}):
                     "correctness": 0.0,
                     "grounding": obs_dict.get("grounding_score", 0),
                     "calibration": 0.6,
-                    "hallucination_score": 1.0 if obs_dict.get("is_hallucination") else 0.0,
-                    "is_hallucination": bool(obs_dict.get("is_hallucination", False)),
                 })
         results.append(compute_task_score(task, rewards, infos))
         try: env.close()
@@ -1321,7 +1321,7 @@ async def batch_evaluate(body: Dict[str, Any]):
     results = []
     for i, item in enumerate(items):
         r, info = calculate_reward(item.get("answer",""), item.get("confidence",0.5), item.get("source_quote",""), item.get("context",""), item.get("ground_truth",""))
-        results.append({"index": i, "reward": round(r,4), "is_hallucination": info.get("is_hallucination", False)})
     return {"total_items": len(results), "results": results}
 @app.get("/leaderboard", tags=["Leaderboard"])
@@ -1333,7 +1333,7 @@ async def leaderboard():
 @app.post("/leaderboard/submit", tags=["Leaderboard"])
 async def submit_leaderboard(data: Dict[str, Any]):
-    required = ["model_name", "avg_reward", "avg_accuracy", "hallucination_rate", "total_episodes", "total_steps"]
     if missing := [f for f in required if f not in data]: raise HTTPException(422, f"Missing: {missing}")
     _leaderboard[data["model_name"]] = {**data, "submitted_at": time.time()}
     _save_leaderboard(_leaderboard)
@@ -1345,12 +1345,12 @@ async def health(): return {"status": "healthy", "version": "4.2.0"}
 @app.get("/metadata", tags=["OpenEnv"])
 async def metadata():
     return {
-        "name": "hallucination-guard-env",
         "version": "4.2.0",
         "license": "MIT",
         "description": (
             "An OpenEnv RL environment that trains AI models to answer questions "
-            "ONLY from verified context documents — penalizing hallucination and "
             "rewarding factual grounding."
         ),
     }
@@ -1377,7 +1377,7 @@ async def schema():
                 "done":               {"type": "boolean"},
                 "reward":             {"type": "number"},
                 "feedback":           {"type": "string"},
-                "is_hallucination":   {"type": "boolean"},
                 "grounding_score":    {"type": "number"},
                 "difficulty_level":   {"type": "string"},
                 "attempts_remaining": {"type": "integer"},
@@ -1389,7 +1389,7 @@ async def schema():
                 "episode_id":            {"type": "string"},
                 "step_count":            {"type": "integer"},
                 "accuracy":              {"type": "number"},
-                "hallucination_rate":    {"type": "number"},
                 "average_reward":        {"type": "number"},
                 "current_difficulty":    {"type": "string"},
                 "skill_rating":          {"type": "number"},
@@ -1408,7 +1408,7 @@ async def datasets():
 async def mcp(body: Dict[str, Any]):
     if body.get("method") == "tools/list":
         return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"tools": [{"name": "reset", "inputSchema": {"type": "object"}}, {"name": "step", "inputSchema": {"type": "object"}}]}}
-    return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"name": "hallucination-guard-env", "version": "4.2.0"}}
 @app.middleware("http")
 async def log_req(request, call_next):

 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>DataQualityGuard-Env · OpenEnv</title>
 <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><rect width='100' height='100' rx='20' fill='%23080c14'/><text x='50' y='68' font-size='55' text-anchor='middle' fill='%23f59e0b' font-family='sans-serif' font-weight='bold'>H</text></svg>">
 <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
 <style>
 }
 .ep-meta { display: flex; justify-content: space-between; align-items: center; }
 .ep-step { font-size: 11px; color: var(--muted); font-family: var(--mono); }
+.cleanc-badge {
   display: none;
   font-size: 11px; font-weight: 700; letter-spacing: 0.5px;
   padding: 3px 10px; border-radius: 100px;
 }
+.cleanc-badge.show { display: inline-block; }
+.cleanc-badge.yes { background: var(--red-dim); color: var(--red); border: 1px solid rgba(248,113,113,0.3); }
+.cleanc-badge.no { background: var(--green-dim); color: var(--green); border: 1px solid rgba(74,222,128,0.3); }
 /* ── REWARD BREAKDOWN ── */
 .reward-section { margin-top: 16px; }
     <div class="hero-badge">OpenEnv · RL Environment</div>
     <div class="ver-chip">v4.2.0</div>
     <h1>
+      <span class="accent">DataQuality</span><span class="accent2">Guard</span>‑Env
     </h1>
     <p class="hero-sub">
       Train AI models to answer <strong>only from verified context</strong> — with a 9-component reward system that penalizes fabrication and rewards factual grounding, citation accuracy, and calibrated confidence.
 <div id="overview" class="panel active">
   <div class="section-head">
     <h2>How it works</h2>
+    <p>Three primitives. Nine reward signals. One goal: no data_qualitys.</p>
   </div>
   <div class="steps">
     <div class="step">
       <span class="step-num">03</span>
       <div class="step-icon">📊</div>
       <h4>grade()</h4>
+      <p>Aggregate episode rewards into a task score. Track accuracy, data_quality rate, and skill rating over time.</p>
     </div>
   </div>
   <div class="card">
     <h3>9-Component Reward System</h3>
+    <p>Every answer is graded on <strong>factual correctness</strong>, <strong>source grounding</strong>, <strong>citation accuracy</strong>, <strong>confidence calibration</strong>, <strong>semantic consistency</strong>, <strong>data_quality detection</strong>, <strong>ROUGE-L</strong>, <strong>BERTScore</strong>, and <strong>AlignScore</strong>. Each component is weighted and combined into a single scalar reward in <strong>[0, 1]</strong>. Confident wrong answers are penalized harder than uncertain ones.</p>
   </div>
   <div class="card">
     <h3>Curriculum Progression</h3>
         <span class="diff-badge advanced">Advanced</span>
         <span class="data-count">~210K examples</span>
       </div>
+      <p>Resist adversarial prompts designed to elicit data_qualitys. Many questions are deliberately unanswerable — confident refusals with low confidence score better than fabricated plausible-sounding answers.</p>
       <div class="dataset-chips">
+        <span class="ds-chip">DataQualityEval</span>
         <span class="ds-chip">TruthfulQA</span>
         <span class="ds-chip">FEVER</span>
         <span class="ds-chip">Climate-FEVER</span>
       <tbody>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/reset</td><td class="td-desc">Start episode — returns question, context, difficulty, episode_id</td></tr>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/step</td><td class="td-desc">Submit answer with confidence + source_quote, receive reward breakdown</td></tr>
+        <tr><td><span class="method get">GET</span></td><td class="endpoint">/state</td><td class="td-desc">Current episode metadata — accuracy, data_quality_rate, skill_rating</td></tr>
         <tr><td><span class="method get">GET</span></td><td class="endpoint">/tasks</td><td class="td-desc">List all 3 tasks with action schema</td></tr>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/grader</td><td class="td-desc">Score a completed episode (0.0 – 1.0) from rewards + infos</td></tr>
         <tr><td><span class="method post">POST</span></td><td class="endpoint">/baseline</td><td class="td-desc">Run heuristic baseline across all 3 tasks</td></tr>
 }).json()
 <span class="fn">print</span>(result[<span class="st">"reward"</span>])            <span class="cm"># scalar in [0, 1]</span>
+<span class="fn">print</span>(result[<span class="st">"is_data_quality"</span>])   <span class="cm"># bool</span></div>
     </div>
   </div>
 </div>
         <div class="ep-progress">
           <div class="ep-meta">
             <span class="ep-step" id="ep-step-label">No episode active</span>
+            <span class="cleanc-badge" id="cleanc-badge"></span>
           </div>
           <div class="ep-bar-bg"><div class="ep-bar-fill" id="ep-bar" style="width:0%"></div></div>
         </div>
 <!-- ══ FOOTER ══ -->
 <footer style="text-align:center;padding:32px 40px 24px;border-top:1px solid var(--border);color:var(--muted);font-size:12px;">
+  DataQualityGuard-Env v4.2.0 &middot; OpenEnv &middot; <a href="/swagger" style="color:var(--amber);text-decoration:none">Swagger Docs</a> &middot; <a href="/redoc" style="color:var(--amber);text-decoration:none">ReDoc</a>
 </footer>
 <script>
   {key:'citation',              label:'Citation Accuracy',    css:'rc-2'},
   {key:'calibration',          label:'Confidence Calibr.',   css:'rc-3'},
   {key:'consistency',           label:'Semantic Consistency', css:'rc-4'},
+  {key:'cleanc_detect',        label:'DataQuality Detect.', css:'rc-5'},
   {key:'rouge_l',               label:'ROUGE-L',             css:'rc-6'},
   {key:'bert_score',            label:'BERTScore',            css:'rc-7'},
   {key:'align_score',           label:'AlignScore',           css:'rc-8'},
   {key:'citation_accuracy',     label:'Citation Accuracy',    css:'rc-2'},
   {key:'confidence_calibration', label:'Confidence Calibr.',   css:'rc-3'},
   {key:'semantic_consistency',  label:'Semantic Consistency', css:'rc-4'},
+  {key:'data_quality_penalty', label:'DataQuality Detect.', css:'rc-5'},
   {key:'rouge_score',           label:'ROUGE-L',              css:'rc-6'},
   {key:'bertscore',             label:'BERTScore',            css:'rc-7'},
   {key:'alignscore',            label:'AlignScore',           css:'rc-8'},
   container.innerHTML = html || '<div style="color:var(--border2);font-size:12px;text-align:center;padding:12px">No breakdown data in response</div>';
+  // data_quality badge
+  const badge = document.getElementById('cleanc-badge');
+  if (data.is_data_quality != null) {
+    badge.className = 'cleanc-badge show ' + (data.is_data_quality ? 'yes' : 'no');
+    badge.textContent = data.is_data_quality ? '⚠ DataQuality' : '✓ Grounded';
   }
 }
     document.getElementById('reward-bars').innerHTML = '<div style="text-align:center;padding:20px 0;color:var(--border2);font-size:13px;">Submit an answer to see the 9-component reward breakdown</div>';
     document.getElementById('total-reward').textContent = '—';
     document.getElementById('total-reward').style.color = 'var(--amber)';
+    document.getElementById('cleanc-badge').className = 'cleanc-badge';
     setStatus('ready');
   } catch(e) {
     document.getElementById('ctx-box').innerHTML = '<span style="color:var(--red)">Error: ' + escHtml(e.message) + '</span>';
                 def reset(self, **kwargs):
                     return type('Obs', (), {'question': 'Placeholder', 'context': 'Context', 'reward': 0.0, 'done': False, 'info': {}})()
                 def step(self, action):
+                    return type('Obs', (), {'reward': 0.0, 'done': False, 'is_data_quality': False, 'info': {}})()
                 def state(self): return {}
                 def close(self): pass
             _default_env = MinimalEnv()
     loader_env = _get_default_env()
     # Pass the shared loader directly into __init__ so we skip the expensive
     # DatasetLoader() construction and dataset loading that would otherwise
+    # happen inside DataQualityEnvironment.__init__
     env = DataCleaningEnvironment(session_id=session_id, dataset_loader=loader_env.dataset_loader)
     return env
 app = FastAPI(
     lifespan=lifespan,
+    title="DataQualityGuard-Env",
     version="4.2.0",
     docs_url="/swagger",
     redoc_url="/redoc",
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 import json as _json
+_LEADERBOARD_FILE = "/tmp/data_quality_guard_leaderboard.json"
 def _load_leaderboard():
     if os.path.exists(_LEADERBOARD_FILE):
         for _ in range(steps):
             if obs_dict.get("done"): break
             ctx = obs_dict.get("context", "")
+            action = DataQualityAction(answer=ctx[:100], confidence=0.6, source_quote=ctx[:80])
             obs_dict = _safe_dict(env.step(action))
             rewards.append(float(obs_dict.get("reward") or 0))
             obs_meta = obs_dict.get("metadata", {})
                     "correctness": obs_correctness,
                     "grounding": obs_dict.get("grounding_score", 0),
                     "calibration": obs_calibration,
+                    "data_quality_score": 1.0 if obs_dict.get("is_data_quality") else 0.0,
+                    "is_data_quality": bool(obs_dict.get("is_data_quality", False)),
                     "semantic_consistency": rb.get("semantic_consistency", 0.0),
                     "rouge_l": rb.get("rouge_l", 0.0),
                     "bert_score": rb.get("bert_score", 0.0),
                     "correctness": 0.0,
                     "grounding": obs_dict.get("grounding_score", 0),
                     "calibration": 0.6,
+                    "data_quality_score": 1.0 if obs_dict.get("is_data_quality") else 0.0,
+                    "is_data_quality": bool(obs_dict.get("is_data_quality", False)),
                 })
         results.append(compute_task_score(task, rewards, infos))
         try: env.close()
     results = []
     for i, item in enumerate(items):
         r, info = calculate_reward(item.get("answer",""), item.get("confidence",0.5), item.get("source_quote",""), item.get("context",""), item.get("ground_truth",""))
+        results.append({"index": i, "reward": round(r,4), "is_data_quality": info.get("is_data_quality", False)})
     return {"total_items": len(results), "results": results}
 @app.get("/leaderboard", tags=["Leaderboard"])
 @app.post("/leaderboard/submit", tags=["Leaderboard"])
 async def submit_leaderboard(data: Dict[str, Any]):
+    required = ["model_name", "avg_reward", "avg_accuracy", "data_quality_rate", "total_episodes", "total_steps"]
     if missing := [f for f in required if f not in data]: raise HTTPException(422, f"Missing: {missing}")
     _leaderboard[data["model_name"]] = {**data, "submitted_at": time.time()}
     _save_leaderboard(_leaderboard)
 @app.get("/metadata", tags=["OpenEnv"])
 async def metadata():
     return {
+        "name": "data_quality-guard-env",
         "version": "4.2.0",
         "license": "MIT",
         "description": (
             "An OpenEnv RL environment that trains AI models to answer questions "
+            "ONLY from verified context documents — penalizing data_quality and "
             "rewarding factual grounding."
         ),
     }
                 "done":               {"type": "boolean"},
                 "reward":             {"type": "number"},
                 "feedback":           {"type": "string"},
+                "is_data_quality":   {"type": "boolean"},
                 "grounding_score":    {"type": "number"},
                 "difficulty_level":   {"type": "string"},
                 "attempts_remaining": {"type": "integer"},
                 "episode_id":            {"type": "string"},
                 "step_count":            {"type": "integer"},
                 "accuracy":              {"type": "number"},
+                "data_quality_rate":    {"type": "number"},
                 "average_reward":        {"type": "number"},
                 "current_difficulty":    {"type": "string"},
                 "skill_rating":          {"type": "number"},
 async def mcp(body: Dict[str, Any]):
     if body.get("method") == "tools/list":
         return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"tools": [{"name": "reset", "inputSchema": {"type": "object"}}, {"name": "step", "inputSchema": {"type": "object"}}]}}
+    return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"name": "data_quality-guard-env", "version": "4.2.0"}}
 @app.middleware("http")
 async def log_req(request, call_next):

server/metrics.py CHANGED Viewed

@@ -1,9 +1,9 @@
-"""Professional-grade metrics and visualization for HallucinationGuard-Env.
 This module provides:
 - Real-time metrics tracking
 - Training curve visualization
-- Hallucination heatmaps
 - Comprehensive logging
 - Export capabilities for analysis
 """
@@ -29,8 +29,8 @@ class StepMetrics:
     correctness: float
     grounding: float
     calibration: float
-    hallucination_score: float
-    is_hallucination: bool
     confidence: float
     difficulty: str
     timestamp: float = field(default_factory=time.time)
@@ -42,8 +42,8 @@ class EpisodeMetrics:
     episode_id: str
     total_steps: int
     average_reward: float
-    total_hallucinations: int
-    hallucination_rate: float
     accuracy: float
     average_confidence: float
     calibration_error: float
@@ -69,13 +69,13 @@ class TrainingSession:
     # Aggregated metrics
     overall_accuracy: float = 0.0
-    overall_hallucination_rate: float = 0.0
     average_reward: float = 0.0
     skill_rating_progress: List[float] = field(default_factory=list)
     # Trend analysis
     reward_trend: str = "stable"  # improving, stable, declining
-    hallucination_trend: str = "stable"
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary for serialization."""
@@ -86,11 +86,11 @@ class TrainingSession:
             "total_episodes": self.total_episodes,
             "total_steps": self.total_steps,
             "overall_accuracy": self.overall_accuracy,
-            "overall_hallucination_rate": self.overall_hallucination_rate,
             "average_reward": self.average_reward,
             "skill_rating_progress": self.skill_rating_progress,
             "reward_trend": self.reward_trend,
-            "hallucination_trend": self.hallucination_trend,
         }
@@ -116,13 +116,13 @@ class MetricsTracker:
         # Rolling windows for trend analysis
         self.reward_window: List[float] = []
-        self.hallucination_window: List[bool] = []
         self.window_size = 10
         # Real-time aggregates
         self.running_reward_sum = 0.0
         self.running_reward_count = 0
-        self.running_hallucination_count = 0
         self.running_step_count = 0
         logger.info(f"Initialized MetricsTracker (session={self.session_id})")
@@ -136,8 +136,8 @@ class MetricsTracker:
             correctness=step_data.get("correctness", 0.0),
             grounding=step_data.get("grounding", 0.0),
             calibration=step_data.get("calibration", 0.0),
-            hallucination_score=step_data.get("hallucination_score", 0.0),
-            is_hallucination=step_data.get("is_hallucination", False),
             confidence=step_data.get("confidence", 0.5),
             difficulty=step_data.get("difficulty", "intermediate"),
         )
@@ -150,16 +150,16 @@ class MetricsTracker:
         self.running_reward_count += 1
         self.running_step_count += 1
-        if step_metrics.is_hallucination:
-            self.running_hallucination_count += 1
         # Update rolling windows
         self.reward_window.append(step_metrics.reward)
-        self.hallucination_window.append(step_metrics.is_hallucination)
         if len(self.reward_window) > self.window_size:
             self.reward_window.pop(0)
-            self.hallucination_window.pop(0)
         return step_metrics
@@ -169,8 +169,8 @@ class MetricsTracker:
             episode_id=episode_data.get("episode_id", ""),
             total_steps=episode_data.get("total_steps", len(self.current_episode_data)),
             average_reward=episode_data.get("average_reward", 0.0),
-            total_hallucinations=episode_data.get("total_hallucinations", 0),
-            hallucination_rate=episode_data.get("hallucination_rate", 0.0),
             accuracy=episode_data.get("accuracy", 0.0),
             average_confidence=episode_data.get("average_confidence", 0.5),
             calibration_error=episode_data.get("calibration_error", 0.0),
@@ -196,7 +196,7 @@ class MetricsTracker:
         self.current_episode_data = []
         logger.info(f"Episode {episode_metrics.episode_id} completed: reward={episode_metrics.average_reward:.3f}, "
-                    f"hallucination_rate={episode_metrics.hallucination_rate:.3f}")
         return episode_metrics
@@ -209,9 +209,9 @@ class MetricsTracker:
         total_correct = sum(ep.accuracy * ep.total_steps for ep in self.current_session.episode_metrics)
         self.current_session.overall_accuracy = total_correct / max(1, self.current_session.total_steps)
-        # Overall hallucination rate
-        total_hallucinations = sum(ep.total_hallucinations for ep in self.current_session.episode_metrics)
-        self.current_session.overall_hallucination_rate = total_hallucinations / max(1, self.current_session.total_steps)
         # Average reward
         total_reward = sum(ep.average_reward * ep.total_steps for ep in self.current_session.episode_metrics)
@@ -238,17 +238,17 @@ class MetricsTracker:
         else:
             self.current_session.reward_trend = "stable"
-        # Hallucination trend
-        if len(self.hallucination_window) >= 5:
-            recent_hallucination_rate = sum(self.hallucination_window[-5:]) / 5
-            older_hallucination_rate = sum(self.hallucination_window[:-5]) / max(1, len(self.hallucination_window) - 5)
-            if recent_hallucination_rate < older_hallucination_rate - 0.1:
-                self.current_session.hallucination_trend = "improving"
-            elif recent_hallucination_rate > older_hallucination_rate + 0.1:
-                self.current_session.hallucination_trend = "worsening"
             else:
-                self.current_session.hallucination_trend = "stable"
     def get_real_time_metrics(self) -> Dict[str, Any]:
         """Get current real-time metrics."""
@@ -257,18 +257,18 @@ class MetricsTracker:
             "episodes_completed": self.current_session.total_episodes,
             "total_steps": self.current_session.total_steps,
             "overall_accuracy": self.current_session.overall_accuracy,
-            "overall_hallucination_rate": self.current_session.overall_hallucination_rate,
             "average_reward": self.current_session.average_reward,
             "reward_trend": self.current_session.reward_trend,
-            "hallucination_trend": self.current_session.hallucination_trend,
             "recent_reward_avg": sum(self.reward_window) / max(1, len(self.reward_window)),
-            "recent_hallucination_rate": sum(self.hallucination_window) / max(1, len(self.hallucination_window)),
         }
     def get_training_curve_data(self) -> Dict[str, List[Any]]:
         """Get data for plotting training curves."""
         episode_rewards = [ep.average_reward for ep in self.current_session.episode_metrics]
-        hallucination_rates = [ep.hallucination_rate for ep in self.current_session.episode_metrics]
         accuracies = [ep.accuracy for ep in self.current_session.episode_metrics]
         skill_ratings = self.current_session.skill_rating_progress
@@ -282,15 +282,15 @@ class MetricsTracker:
             "episodes": list(range(1, len(episode_rewards) + 1)),
             "rewards": episode_rewards,
             "rewards_smooth": moving_average(episode_rewards),
-            "hallucination_rates": hallucination_rates,
-            "hallucination_rates_smooth": moving_average(hallucination_rates),
             "accuracies": accuracies,
             "skill_ratings": skill_ratings,
         }
-    def get_hallucination_heatmap_data(self) -> Dict[str, Any]:
-        """Get data for hallucination heatmap visualization."""
-        # Group by difficulty and hallucination type
         heatmap_data = {}
         for step in self.current_session.step_metrics:
@@ -298,19 +298,19 @@ class MetricsTracker:
             if difficulty not in heatmap_data:
                 heatmap_data[difficulty] = {
                     "total": 0,
-                    "hallucinations": 0,
                     "by_type": {}
                 }
             heatmap_data[difficulty]["total"] += 1
-            if step.is_hallucination:
-                heatmap_data[difficulty]["hallucinations"] += 1
         # Calculate rates
         for difficulty in heatmap_data:
             total = heatmap_data[difficulty]["total"]
-            hallucs = heatmap_data[difficulty]["hallucinations"]
-            heatmap_data[difficulty]["rate"] = hallucs / max(1, total)
         return heatmap_data
@@ -324,14 +324,14 @@ class MetricsTracker:
             "correctness": [],
             "grounding": [],
             "calibration": [],
-            "hallucination_score": [],
         }
         for step in self.current_session.step_metrics:
             components["correctness"].append(step.correctness)
             components["grounding"].append(step.grounding)
             components["calibration"].append(step.calibration)
-            components["hallucination_score"].append(step.hallucination_score)
         # Calculate statistics
         analysis = {}
@@ -366,14 +366,14 @@ class MetricsTracker:
                     "episode_id": ep.episode_id,
                     "total_steps": ep.total_steps,
                     "average_reward": ep.average_reward,
-                    "hallucination_rate": ep.hallucination_rate,
                     "accuracy": ep.accuracy,
                     "duration": ep.duration,
                 }
                 for ep in self.current_session.episode_metrics
             ],
             "training_curves": self.get_training_curve_data(),
-            "heatmap_data": self.get_hallucination_heatmap_data(),
             "reward_analysis": self.get_reward_breakdown_analysis(),
         }
@@ -390,12 +390,12 @@ class MetricsTracker:
         with open(filepath, 'w', encoding='utf-8') as f:
             # Header
-            f.write("step,episode_id,reward,correctness,grounding,calibration,hallucination_score,is_hallucination,confidence,difficulty,timestamp\n")
             # Data
             for step in self.current_session.step_metrics:
                 f.write(f"{step.step},{step.episode_id},{step.reward},{step.correctness},{step.grounding},"
-                        f"{step.calibration},{step.hallucination_score},{int(step.is_hallucination)},"
                         f"{step.confidence},{step.difficulty},{step.timestamp}\n")
         logger.info(f"Exported CSV to {filepath}")
@@ -407,7 +407,7 @@ class MetricsTracker:
         report = f"""
 ╔══════════════════════════════════════════════════════════╗
-║       HallucinationGuard-Env Training Summary            ║
 ╠══════════════════════════════════════════════════════════╣
 Session: {self.current_session.session_id}
@@ -419,15 +419,15 @@ PERFORMANCE METRICS
 ────────────────────────────────────────────────────────────
 Overall Accuracy: {metrics['overall_accuracy']:.1%}
 Average Reward: {metrics['average_reward']:.3f}
-Hallucination Rate: {metrics['overall_hallucination_rate']:.1%}
 ───────���────────────────────────────────────────────────────
 TREND ANALYSIS
 ────────────────────────────────────────────────────────────
 Reward Trend: {metrics['reward_trend'].upper()}
-Hallucination Trend: {metrics['hallucination_trend'].upper()}
 Recent Reward Avg: {metrics['recent_reward_avg']:.3f}
-Recent Hallucination Rate: {metrics['recent_hallucination_rate']:.1%}
 ────────────────────────────────────────────────────────────
 INTERPRETATION
@@ -442,12 +442,12 @@ INTERPRETATION
         else:
             report += "→ Model performance is STABLE\n"
-        if metrics['hallucination_trend'] == "improving":
-            report += "✓ Hallucination rate is DECREASING\n"
-        elif metrics['hallucination_trend'] == "worsening":
-            report += "⚠ Hallucination rate is INCREASING - review training data\n"
         else:
-            report += "→ Hallucination rate is STABLE\n"
         if metrics['overall_accuracy'] > 0.8:
             report += "\n★ EXCELLENT: Model is performing at expert level\n"
@@ -505,10 +505,10 @@ class VisualizationDataGenerator:
                     "line": {"dash": "dash"},
                 },
                 {
-                    "name": "Hallucination Rate",
                     "type": "scatter",
                     "x": curve_data["episodes"],
-                    "y": curve_data["hallucination_rates"],
                     "mode": "lines+markers",
                     "yaxis": "y2",
                 },
@@ -526,21 +526,21 @@ class VisualizationDataGenerator:
                 "xaxis": {"title": "Episode"},
                 "yaxis": {"title": "Reward / Accuracy"},
                 "yaxis2": {
-                    "title": "Hallucination Rate",
                     "overlaying": "y",
                     "side": "right",
                 },
             }
         }
-    def get_hallucination_type_distribution(self) -> Dict[str, Any]:
-        """Get hallucination type distribution for pie chart."""
         type_counts = {}
         for step in self.tracker.current_session.step_metrics:
-            if step.is_hallucination:
                 # In a full implementation, track specific types
-                type_key = "hallucination"
                 type_counts[type_key] = type_counts.get(type_key, 0) + 1
         return {
@@ -550,7 +550,7 @@ class VisualizationDataGenerator:
     def get_difficulty_performance_comparison(self) -> Dict[str, Any]:
         """Get performance comparison across difficulties."""
-        heatmap_data = self.tracker.get_hallucination_heatmap_data()
         difficulties = list(heatmap_data.keys())
         rates = [heatmap_data[d]["rate"] for d in difficulties]
@@ -558,7 +558,7 @@ class VisualizationDataGenerator:
         return {
             "difficulties": difficulties,
-            "hallucination_rates": rates,
             "sample_sizes": totals,
         }

+"""Professional-grade metrics and visualization for DataQualityGuard-Env.
 This module provides:
 - Real-time metrics tracking
 - Training curve visualization
+- DataQuality heatmaps
 - Comprehensive logging
 - Export capabilities for analysis
 """
     correctness: float
     grounding: float
     calibration: float
+    data_quality_score: float
+    is_data_quality: bool
     confidence: float
     difficulty: str
     timestamp: float = field(default_factory=time.time)
     episode_id: str
     total_steps: int
     average_reward: float
+    total_data_qualitys: int
+    data_quality_rate: float
     accuracy: float
     average_confidence: float
     calibration_error: float
     # Aggregated metrics
     overall_accuracy: float = 0.0
+    overall_data_quality_rate: float = 0.0
     average_reward: float = 0.0
     skill_rating_progress: List[float] = field(default_factory=list)
     # Trend analysis
     reward_trend: str = "stable"  # improving, stable, declining
+    data_quality_trend: str = "stable"
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary for serialization."""
             "total_episodes": self.total_episodes,
             "total_steps": self.total_steps,
             "overall_accuracy": self.overall_accuracy,
+            "overall_data_quality_rate": self.overall_data_quality_rate,
             "average_reward": self.average_reward,
             "skill_rating_progress": self.skill_rating_progress,
             "reward_trend": self.reward_trend,
+            "data_quality_trend": self.data_quality_trend,
         }
         # Rolling windows for trend analysis
         self.reward_window: List[float] = []
+        self.data_quality_window: List[bool] = []
         self.window_size = 10
         # Real-time aggregates
         self.running_reward_sum = 0.0
         self.running_reward_count = 0
+        self.running_data_quality_count = 0
         self.running_step_count = 0
         logger.info(f"Initialized MetricsTracker (session={self.session_id})")
             correctness=step_data.get("correctness", 0.0),
             grounding=step_data.get("grounding", 0.0),
             calibration=step_data.get("calibration", 0.0),
+            data_quality_score=step_data.get("data_quality_score", 0.0),
+            is_data_quality=step_data.get("is_data_quality", False),
             confidence=step_data.get("confidence", 0.5),
             difficulty=step_data.get("difficulty", "intermediate"),
         )
         self.running_reward_count += 1
         self.running_step_count += 1
+        if step_metrics.is_data_quality:
+            self.running_data_quality_count += 1
         # Update rolling windows
         self.reward_window.append(step_metrics.reward)
+        self.data_quality_window.append(step_metrics.is_data_quality)
         if len(self.reward_window) > self.window_size:
             self.reward_window.pop(0)
+            self.data_quality_window.pop(0)
         return step_metrics
             episode_id=episode_data.get("episode_id", ""),
             total_steps=episode_data.get("total_steps", len(self.current_episode_data)),
             average_reward=episode_data.get("average_reward", 0.0),
+            total_data_qualitys=episode_data.get("total_data_qualitys", 0),
+            data_quality_rate=episode_data.get("data_quality_rate", 0.0),
             accuracy=episode_data.get("accuracy", 0.0),
             average_confidence=episode_data.get("average_confidence", 0.5),
             calibration_error=episode_data.get("calibration_error", 0.0),
         self.current_episode_data = []
         logger.info(f"Episode {episode_metrics.episode_id} completed: reward={episode_metrics.average_reward:.3f}, "
+                    f"data_quality_rate={episode_metrics.data_quality_rate:.3f}")
         return episode_metrics
         total_correct = sum(ep.accuracy * ep.total_steps for ep in self.current_session.episode_metrics)
         self.current_session.overall_accuracy = total_correct / max(1, self.current_session.total_steps)
+        # Overall data_quality rate
+        total_data_qualitys = sum(ep.total_data_qualitys for ep in self.current_session.episode_metrics)
+        self.current_session.overall_data_quality_rate = total_data_qualitys / max(1, self.current_session.total_steps)
         # Average reward
         total_reward = sum(ep.average_reward * ep.total_steps for ep in self.current_session.episode_metrics)
         else:
             self.current_session.reward_trend = "stable"
+        # DataQuality trend
+        if len(self.data_quality_window) >= 5:
+            recent_data_quality_rate = sum(self.data_quality_window[-5:]) / 5
+            older_data_quality_rate = sum(self.data_quality_window[:-5]) / max(1, len(self.data_quality_window) - 5)
+            if recent_data_quality_rate < older_data_quality_rate - 0.1:
+                self.current_session.data_quality_trend = "improving"
+            elif recent_data_quality_rate > older_data_quality_rate + 0.1:
+                self.current_session.data_quality_trend = "worsening"
             else:
+                self.current_session.data_quality_trend = "stable"
     def get_real_time_metrics(self) -> Dict[str, Any]:
         """Get current real-time metrics."""
             "episodes_completed": self.current_session.total_episodes,
             "total_steps": self.current_session.total_steps,
             "overall_accuracy": self.current_session.overall_accuracy,
+            "overall_data_quality_rate": self.current_session.overall_data_quality_rate,
             "average_reward": self.current_session.average_reward,
             "reward_trend": self.current_session.reward_trend,
+            "data_quality_trend": self.current_session.data_quality_trend,
             "recent_reward_avg": sum(self.reward_window) / max(1, len(self.reward_window)),
+            "recent_data_quality_rate": sum(self.data_quality_window) / max(1, len(self.data_quality_window)),
         }
     def get_training_curve_data(self) -> Dict[str, List[Any]]:
         """Get data for plotting training curves."""
         episode_rewards = [ep.average_reward for ep in self.current_session.episode_metrics]
+        data_quality_rates = [ep.data_quality_rate for ep in self.current_session.episode_metrics]
         accuracies = [ep.accuracy for ep in self.current_session.episode_metrics]
         skill_ratings = self.current_session.skill_rating_progress
             "episodes": list(range(1, len(episode_rewards) + 1)),
             "rewards": episode_rewards,
             "rewards_smooth": moving_average(episode_rewards),
+            "data_quality_rates": data_quality_rates,
+            "data_quality_rates_smooth": moving_average(data_quality_rates),
             "accuracies": accuracies,
             "skill_ratings": skill_ratings,
         }
+    def get_data_quality_heatmap_data(self) -> Dict[str, Any]:
+        """Get data for data_quality heatmap visualization."""
+        # Group by difficulty and data_quality type
         heatmap_data = {}
         for step in self.current_session.step_metrics:
             if difficulty not in heatmap_data:
                 heatmap_data[difficulty] = {
                     "total": 0,
+                    "data_qualitys": 0,
                     "by_type": {}
                 }
             heatmap_data[difficulty]["total"] += 1
+            if step.is_data_quality:
+                heatmap_data[difficulty]["data_qualitys"] += 1
         # Calculate rates
         for difficulty in heatmap_data:
             total = heatmap_data[difficulty]["total"]
+            cleancs = heatmap_data[difficulty]["data_qualitys"]
+            heatmap_data[difficulty]["rate"] = cleancs / max(1, total)
         return heatmap_data
             "correctness": [],
             "grounding": [],
             "calibration": [],
+            "data_quality_score": [],
         }
         for step in self.current_session.step_metrics:
             components["correctness"].append(step.correctness)
             components["grounding"].append(step.grounding)
             components["calibration"].append(step.calibration)
+            components["data_quality_score"].append(step.data_quality_score)
         # Calculate statistics
         analysis = {}
                     "episode_id": ep.episode_id,
                     "total_steps": ep.total_steps,
                     "average_reward": ep.average_reward,
+                    "data_quality_rate": ep.data_quality_rate,
                     "accuracy": ep.accuracy,
                     "duration": ep.duration,
                 }
                 for ep in self.current_session.episode_metrics
             ],
             "training_curves": self.get_training_curve_data(),
+            "heatmap_data": self.get_data_quality_heatmap_data(),
             "reward_analysis": self.get_reward_breakdown_analysis(),
         }
         with open(filepath, 'w', encoding='utf-8') as f:
             # Header
+            f.write("step,episode_id,reward,correctness,grounding,calibration,data_quality_score,is_data_quality,confidence,difficulty,timestamp\n")
             # Data
             for step in self.current_session.step_metrics:
                 f.write(f"{step.step},{step.episode_id},{step.reward},{step.correctness},{step.grounding},"
+                        f"{step.calibration},{step.data_quality_score},{int(step.is_data_quality)},"
                         f"{step.confidence},{step.difficulty},{step.timestamp}\n")
         logger.info(f"Exported CSV to {filepath}")
         report = f"""
 ╔══════════════════════════════════════════════════════════╗
+║       DataQualityGuard-Env Training Summary            ║
 ╠══════════════════════════════════════════════════════════╣
 Session: {self.current_session.session_id}
 ────────────────────────────────────────────────────────────
 Overall Accuracy: {metrics['overall_accuracy']:.1%}
 Average Reward: {metrics['average_reward']:.3f}
+DataQuality Rate: {metrics['overall_data_quality_rate']:.1%}
 ───────���────────────────────────────────────────────────────
 TREND ANALYSIS
 ────────────────────────────────────────────────────────────
 Reward Trend: {metrics['reward_trend'].upper()}
+DataQuality Trend: {metrics['data_quality_trend'].upper()}
 Recent Reward Avg: {metrics['recent_reward_avg']:.3f}
+Recent DataQuality Rate: {metrics['recent_data_quality_rate']:.1%}
 ────────────────────────────────────────────────────────────
 INTERPRETATION
         else:
             report += "→ Model performance is STABLE\n"
+        if metrics['data_quality_trend'] == "improving":
+            report += "✓ DataQuality rate is DECREASING\n"
+        elif metrics['data_quality_trend'] == "worsening":
+            report += "⚠ DataQuality rate is INCREASING - review training data\n"
         else:
+            report += "→ DataQuality rate is STABLE\n"
         if metrics['overall_accuracy'] > 0.8:
             report += "\n★ EXCELLENT: Model is performing at expert level\n"
                     "line": {"dash": "dash"},
                 },
                 {
+                    "name": "DataQuality Rate",
                     "type": "scatter",
                     "x": curve_data["episodes"],
+                    "y": curve_data["data_quality_rates"],
                     "mode": "lines+markers",
                     "yaxis": "y2",
                 },
                 "xaxis": {"title": "Episode"},
                 "yaxis": {"title": "Reward / Accuracy"},
                 "yaxis2": {
+                    "title": "DataQuality Rate",
                     "overlaying": "y",
                     "side": "right",
                 },
             }
         }
+    def get_data_quality_type_distribution(self) -> Dict[str, Any]:
+        """Get data_quality type distribution for pie chart."""
         type_counts = {}
         for step in self.tracker.current_session.step_metrics:
+            if step.is_data_quality:
                 # In a full implementation, track specific types
+                type_key = "data_quality"
                 type_counts[type_key] = type_counts.get(type_key, 0) + 1
         return {
     def get_difficulty_performance_comparison(self) -> Dict[str, Any]:
         """Get performance comparison across difficulties."""
+        heatmap_data = self.tracker.get_data_quality_heatmap_data()
         difficulties = list(heatmap_data.keys())
         rates = [heatmap_data[d]["rate"] for d in difficulties]
         return {
             "difficulties": difficulties,
+            "data_quality_rates": rates,
             "sample_sizes": totals,
         }

server/tasks.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-HallucinationGuard-Env — Task Registry v4.0
 Defines the 3 required OpenEnv tasks, each with:
   - A unique task_id and human description
@@ -11,7 +11,7 @@ Task hierarchy
 --------------
   task_1_factual_grounding      BEGINNER     SQuAD, BoolQ, OpenBookQA, ARC
   task_2_multi_hop_synthesis    INTERMEDIATE HotpotQA, CoQA, NQ-Open, MS-MARCO
-  task_3_adversarial_resistance ADVANCED     HaluEval, TruthfulQA, FEVER,
                                              Climate-FEVER, Adversarial-QA
 """
@@ -73,7 +73,7 @@ class TaskDefinition:
     action_schema: Dict[str, Any]
     # Scoring thresholds used by the task grader
-    hallucination_penalty_weight: float = 0.25
     correctness_weight: float = 0.40
     grounding_weight: float = 0.20
     calibration_weight: float = 0.15
@@ -93,7 +93,7 @@ class TaskDefinition:
                 "correctness_weight": self.correctness_weight,
                 "grounding_weight": self.grounding_weight,
                 "calibration_weight": self.calibration_weight,
-                "hallucination_penalty_weight": self.hallucination_penalty_weight,
                 "range": [0.0, 1.0],
             },
             "scoring_notes": self.scoring_notes,
@@ -117,10 +117,10 @@ TASK_1 = TaskDefinition(
     correctness_weight=0.45,
     grounding_weight=0.25,
     calibration_weight=0.10,
-    hallucination_penalty_weight=0.20,
     scoring_notes=(
         "Scored 0.0–1.0. Full marks require: correct answer, quote from context, "
-        "appropriate confidence. Hallucination causes a hard penalty of up to -0.4 "
         "applied after the weighted sum. Partial credit awarded for near-correct answers."
     ),
 )
@@ -142,11 +142,11 @@ TASK_2 = TaskDefinition(
     correctness_weight=0.40,
     grounding_weight=0.25,
     calibration_weight=0.10,
-    hallucination_penalty_weight=0.25,
     scoring_notes=(
         "Scored 0.0–1.0. Answers must integrate evidence from multiple context spans. "
         "Fabricating a 'bridge' fact that is not in the context is penalised as "
-        "hallucination even if the final answer happens to be correct. "
         "ROUGE-L and BERTScore contribute to correctness assessment."
     ),
 )
@@ -154,25 +154,25 @@ TASK_2 = TaskDefinition(
 # ── Task 3 — Adversarial Resistance (ADVANCED) ────────────────────────────────
 TASK_3 = TaskDefinition(
     task_id="task_3_adversarial_resistance",
-    name="Adversarial Hallucination Resistance",
     difficulty="advanced",
     description=(
-        "Resist adversarially-crafted prompts designed to elicit hallucinations. "
-        "Questions come from HaluEval, TruthfulQA, FEVER, Climate-FEVER, and "
         "AdversarialQA — datasets built specifically to expose overconfident or "
         "fabricated responses. Many questions have misleading preambles or are "
         "unanswerable from the given context. The agent must refuse to answer, "
         "flag uncertainty, or correctly debunk false premises."
     ),
-    datasets=["halueval", "truthful_qa", "fever", "climate_fever", "adversarial_qa"],
     action_schema=ACTION_SCHEMA,
     correctness_weight=0.30,
     grounding_weight=0.20,
     calibration_weight=0.20,
-    hallucination_penalty_weight=0.30,
     scoring_notes=(
         "Scored 0.0–1.0. The hardest task: adversarial questions specifically target "
-        "common hallucination failure modes. High calibration is rewarded — correctly "
         "expressing low confidence on unanswerable questions scores up to 0.6. "
         "A confident wrong answer on an adversarial question can score as low as 0.0. "
         "Frontier models (GPT-4o, Claude 3.5) typically score 0.55–0.75 on this task."
@@ -242,12 +242,12 @@ def compute_task_score(
     avg_correctness    = _avg("correctness")
     avg_grounding      = _avg("grounding")
     avg_calibration    = _avg("calibration")
-    avg_hallucination  = _avg("hallucination_score")
-    hallucination_rate = sum(1 for i in step_infos if i.get("is_hallucination")) / n
-    # Primary score = mean per-step reward minus hallucination penalty
-    hallucination_penalty = task.hallucination_penalty_weight * avg_hallucination
-    base_score = max(0.0, avg_step_reward - hallucination_penalty)
     # Small completion bonus for finishing all steps
     completion_bonus = 0.02 if n >= 5 else 0.0
@@ -256,7 +256,7 @@ def compute_task_score(
     # Task-3: extra penalty for overconfident wrong answers
     if task.task_id == TASK_3.task_id:
-        overconfidence_penalty = max(0.0, avg_calibration - 0.7) * avg_hallucination * 0.1
         raw_score = max(0.0, raw_score - overconfidence_penalty)
     return {
@@ -265,8 +265,8 @@ def compute_task_score(
             "avg_correctness":    round(avg_correctness, 4),
             "avg_grounding":      round(avg_grounding, 4),
             "avg_calibration":    round(avg_calibration, 4),
-            "avg_hallucination":  round(avg_hallucination, 4),
-            "hallucination_rate": round(hallucination_rate, 4),
             "completion_bonus":   round(completion_bonus, 4),
             "avg_step_reward":    round(avg_step_reward, 4),
         },

 """
+DataQualityGuard-Env — Task Registry v4.0
 Defines the 3 required OpenEnv tasks, each with:
   - A unique task_id and human description
 --------------
   task_1_factual_grounding      BEGINNER     SQuAD, BoolQ, OpenBookQA, ARC
   task_2_multi_hop_synthesis    INTERMEDIATE HotpotQA, CoQA, NQ-Open, MS-MARCO
+  task_3_adversarial_resistance ADVANCED     DataQualityEval, TruthfulQA, FEVER,
                                              Climate-FEVER, Adversarial-QA
 """
     action_schema: Dict[str, Any]
     # Scoring thresholds used by the task grader
+    data_quality_penalty_weight: float = 0.25
     correctness_weight: float = 0.40
     grounding_weight: float = 0.20
     calibration_weight: float = 0.15
                 "correctness_weight": self.correctness_weight,
                 "grounding_weight": self.grounding_weight,
                 "calibration_weight": self.calibration_weight,
+                "data_quality_penalty_weight": self.data_quality_penalty_weight,
                 "range": [0.0, 1.0],
             },
             "scoring_notes": self.scoring_notes,
     correctness_weight=0.45,
     grounding_weight=0.25,
     calibration_weight=0.10,
+    data_quality_penalty_weight=0.20,
     scoring_notes=(
         "Scored 0.0–1.0. Full marks require: correct answer, quote from context, "
+        "appropriate confidence. DataQuality causes a hard penalty of up to -0.4 "
         "applied after the weighted sum. Partial credit awarded for near-correct answers."
     ),
 )
     correctness_weight=0.40,
     grounding_weight=0.25,
     calibration_weight=0.10,
+    data_quality_penalty_weight=0.25,
     scoring_notes=(
         "Scored 0.0–1.0. Answers must integrate evidence from multiple context spans. "
         "Fabricating a 'bridge' fact that is not in the context is penalised as "
+        "data_quality even if the final answer happens to be correct. "
         "ROUGE-L and BERTScore contribute to correctness assessment."
     ),
 )
 # ── Task 3 — Adversarial Resistance (ADVANCED) ────────────────────────────────
 TASK_3 = TaskDefinition(
     task_id="task_3_adversarial_resistance",
+    name="Adversarial DataQuality Resistance",
     difficulty="advanced",
     description=(
+        "Resist adversarially-crafted prompts designed to elicit data_qualitys. "
+        "Questions come from DataQualityEval, TruthfulQA, FEVER, Climate-FEVER, and "
         "AdversarialQA — datasets built specifically to expose overconfident or "
         "fabricated responses. Many questions have misleading preambles or are "
         "unanswerable from the given context. The agent must refuse to answer, "
         "flag uncertainty, or correctly debunk false premises."
     ),
+    datasets=["data_quality_eval", "truthful_qa", "fever", "climate_fever", "adversarial_qa"],
     action_schema=ACTION_SCHEMA,
     correctness_weight=0.30,
     grounding_weight=0.20,
     calibration_weight=0.20,
+    data_quality_penalty_weight=0.30,
     scoring_notes=(
         "Scored 0.0–1.0. The hardest task: adversarial questions specifically target "
+        "common data_quality failure modes. High calibration is rewarded — correctly "
         "expressing low confidence on unanswerable questions scores up to 0.6. "
         "A confident wrong answer on an adversarial question can score as low as 0.0. "
         "Frontier models (GPT-4o, Claude 3.5) typically score 0.55–0.75 on this task."
     avg_correctness    = _avg("correctness")
     avg_grounding      = _avg("grounding")
     avg_calibration    = _avg("calibration")
+    avg_data_quality  = _avg("data_quality_score")
+    data_quality_rate = sum(1 for i in step_infos if i.get("is_data_quality")) / n
+    # Primary score = mean per-step reward minus data_quality penalty
+    data_quality_penalty = task.data_quality_penalty_weight * avg_data_quality
+    base_score = max(0.0, avg_step_reward - data_quality_penalty)
     # Small completion bonus for finishing all steps
     completion_bonus = 0.02 if n >= 5 else 0.0
     # Task-3: extra penalty for overconfident wrong answers
     if task.task_id == TASK_3.task_id:
+        overconfidence_penalty = max(0.0, avg_calibration - 0.7) * avg_data_quality * 0.1
         raw_score = max(0.0, raw_score - overconfidence_penalty)
     return {
             "avg_correctness":    round(avg_correctness, 4),
             "avg_grounding":      round(avg_grounding, 4),
             "avg_calibration":    round(avg_calibration, 4),
+            "avg_data_quality":  round(avg_data_quality, 4),
+            "data_quality_rate": round(data_quality_rate, 4),
             "completion_bonus":   round(completion_bonus, 4),
             "avg_step_reward":    round(avg_step_reward, 4),
         },