williyam's picture
chore: remove rag-8000.zcodecorp.in live links and hackathon text
97f7618
"""
Gradio UI for Agentic RAG Gym - Royal Glassmorphism Theme.
Provides an interactive interface for:
- Running episodes against the environment
- Viewing task descriptions and difficulty
- Real-time step-by-step feedback with rewards
- Trajectory visualization
- Benchmark results
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
import gradio as gr
import httpx
from rag_master.rewards import _SCORE_MIN, clamp_score
_PROJECT_ROOT = Path(os.environ.get("APP_ROOT", Path(__file__).resolve().parent.parent))
_MEDIA_GH = "https://media.githubusercontent.com/media/williyam-m/agentic-rag-gym/main"
def _resolve_images(text: str) -> str:
"""Replace relative image paths with absolute GitHub media URLs for Gradio.
Uses media.githubusercontent.com instead of raw.githubusercontent.com
because the repo uses Git LFS for images — raw URLs return LFS pointers,
while media URLs serve the actual binary content.
"""
import re
def _repl(m: re.Match) -> str:
alt, path = m.group(1), m.group(2)
if path.startswith(("http://", "https://")):
return m.group(0)
return f"![{alt}]({_MEDIA_GH}/{path})"
return re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", _repl, text)
def _load_markdown(filename: str) -> str:
"""Load a markdown file from the project root, stripping YAML frontmatter."""
path = _PROJECT_ROOT / filename
if not path.exists():
return f"*{filename} not found.*"
text = path.read_text(encoding="utf-8")
# Strip YAML frontmatter if present
if text.startswith("---"):
end = text.find("---", 3)
if end != -1:
text = text[end + 3:].lstrip("\n")
return _resolve_images(text)
def _load_readme() -> str:
"""Load README.md, stripping frontmatter and badge lines."""
text = _load_markdown("README.md")
# Remove badge image lines ([![...](...)]) at the top
lines = text.split("\n")
cleaned: list[str] = []
for line in lines:
if line.strip().startswith("[![") and line.strip().endswith(")"):
continue
cleaned.append(line)
return "\n".join(cleaned)
ROYAL_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Cinzel:wght@400;600;700&family=Inter:wght@300;400;500;600&display=swap');
:root {
--gold: #D4AF37;
--gold-light: #F0D060;
--gold-dark: #A08520;
--royal-black: #0A0A0F;
--royal-dark: #12121A;
--royal-surface: #1A1A2E;
--royal-card: rgba(26, 26, 46, 0.7);
--text-primary: #FFFFFF;
--text-secondary: #B0B0C0;
--glass-border: rgba(212, 175, 55, 0.3);
--glass-bg: rgba(26, 26, 46, 0.6);
--success: #4CAF50;
--warning: #FF9800;
--error: #F44336;
}
.gradio-container {
background: linear-gradient(135deg, var(--royal-black) 0%, var(--royal-dark) 50%, #16213E 100%) !important;
font-family: 'Inter', sans-serif !important;
min-height: 100vh;
}
.main-header {
text-align: center;
padding: 2rem 1rem;
background: linear-gradient(180deg, rgba(212, 175, 55, 0.1) 0%, transparent 100%);
border-bottom: 1px solid var(--glass-border);
margin-bottom: 1.5rem;
}
.main-header h1 {
font-family: 'Cinzel', serif !important;
font-size: 2.4rem !important;
font-weight: 700 !important;
background: linear-gradient(135deg, var(--gold-light), var(--gold), var(--gold-dark));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 0.3rem;
letter-spacing: 2px;
}
.main-header p {
color: var(--text-secondary);
font-size: 1rem;
font-weight: 300;
}
.glass-card {
background: var(--glass-bg) !important;
backdrop-filter: blur(20px) !important;
-webkit-backdrop-filter: blur(20px) !important;
border: 1px solid var(--glass-border) !important;
border-radius: 16px !important;
padding: 1.5rem !important;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3) !important;
}
.gold-button {
background: linear-gradient(135deg, var(--gold-dark), var(--gold), var(--gold-light)) !important;
color: var(--royal-black) !important;
font-weight: 600 !important;
border: none !important;
border-radius: 12px !important;
padding: 12px 28px !important;
font-size: 1rem !important;
letter-spacing: 1px !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 15px rgba(212, 175, 55, 0.3) !important;
}
.gold-button:hover {
box-shadow: 0 6px 25px rgba(212, 175, 55, 0.5) !important;
transform: translateY(-2px) !important;
}
.status-badge {
display: inline-block;
padding: 4px 12px;
border-radius: 20px;
font-size: 0.85rem;
font-weight: 500;
}
.badge-easy { background: rgba(76, 175, 80, 0.2); color: #4CAF50; border: 1px solid rgba(76, 175, 80, 0.4); }
.badge-medium { background: rgba(255, 152, 0, 0.2); color: #FF9800; border: 1px solid rgba(255, 152, 0, 0.4); }
.badge-hard { background: rgba(244, 67, 54, 0.2); color: #F44336; border: 1px solid rgba(244, 67, 54, 0.4); }
.reward-display {
font-family: 'Cinzel', serif;
font-size: 2rem;
text-align: center;
padding: 1rem;
}
.step-log {
font-family: 'JetBrains Mono', monospace;
font-size: 0.85rem;
line-height: 1.6;
padding: 1rem;
background: rgba(0, 0, 0, 0.3);
border-radius: 8px;
border: 1px solid rgba(212, 175, 55, 0.15);
max-height: 400px;
overflow-y: auto;
}
.metric-card {
text-align: center;
padding: 1rem;
background: rgba(212, 175, 55, 0.05);
border: 1px solid rgba(212, 175, 55, 0.2);
border-radius: 12px;
}
.metric-card .value {
font-family: 'Cinzel', serif;
font-size: 1.8rem;
color: var(--gold);
}
.metric-card .label {
color: var(--text-secondary);
font-size: 0.85rem;
margin-top: 0.3rem;
}
/* Override Gradio defaults */
.dark .gr-box, .dark .gr-input, .dark .gr-panel {
background: var(--glass-bg) !important;
border-color: var(--glass-border) !important;
}
.dark label, .dark .gr-check-radio label {
color: var(--text-secondary) !important;
}
textarea, input[type="text"], select {
background: rgba(0, 0, 0, 0.3) !important;
border: 1px solid var(--glass-border) !important;
color: var(--text-primary) !important;
border-radius: 8px !important;
}
.gr-button.primary {
background: linear-gradient(135deg, var(--gold-dark), var(--gold)) !important;
color: var(--royal-black) !important;
border: none !important;
}
"""
API_URL = "http://localhost:7860"
# --- Task choices per domain ---
DOMAIN_TASKS = {
"aerospace": [
("Compare Propulsion Technologies (Easy)", "aero_easy_propulsion_comparison"),
("Space Debris Mitigation (Easy)", "aero_easy_debris_mitigation"),
("Mars EDL Architecture (Medium)", "aero_medium_mars_edl"),
("Deep Space Life Support (Medium)", "aero_medium_life_support"),
("Hypersonic Vehicle Design (Hard)", "aero_hard_hypersonic_vehicle"),
],
"legal_research": [
("Contract Clause Analysis (Easy)", "legal_easy_contract_review"),
("Data Privacy Compliance (Easy)", "legal_easy_privacy_compliance"),
("IP Assessment (Medium)", "legal_medium_ip_analysis"),
("M&A Due Diligence (Medium)", "legal_medium_ma_due_diligence"),
("Cross-Border Dispute (Hard)", "legal_hard_cross_border_dispute"),
],
}
DOMAIN_LABELS = {
"aerospace": "Aerospace Research",
"legal_research": "Legal Research",
}
def _call_api(method: str, endpoint: str, data: Optional[Dict] = None) -> Dict[str, Any]:
"""Make an API call to the backend."""
url = f"{API_URL}{endpoint}"
try:
with httpx.Client(timeout=60.0) as client:
if method == "GET":
resp = client.get(url)
else:
resp = client.post(url, json=data or {})
if resp.status_code >= 400:
try:
body = resp.json()
detail = body.get("detail", resp.text)
except Exception:
detail = resp.text
return {"error": detail}
return resp.json()
except Exception as exc:
return {"error": str(exc)}
def switch_domain(domain: str):
"""Switch the active domain and return updated task choices for both dropdowns."""
result = _call_api("POST", "/domain/switch", {"domain": domain})
if "error" in result:
fallback = DOMAIN_TASKS.get("aerospace", [])
return (
gr.update(choices=fallback, value=None),
gr.update(choices=fallback, value=None),
f"Error: {result['error']}",
)
tasks = DOMAIN_TASKS.get(domain, [])
default_val = tasks[0][1] if tasks else None
label = DOMAIN_LABELS.get(domain, domain)
return (
gr.update(choices=tasks, value=default_val),
gr.update(choices=tasks, value=default_val),
f"Switched to {label} domain",
)
def reset_environment(task_id: str) -> tuple:
"""Reset the environment with the selected task."""
data = {"task_id": task_id} if task_id else {}
result = _call_api("POST", "/reset", data)
if "error" in result:
return (
f"**Error:** {result['error']}",
"N/A",
"",
"0",
"0.0",
"Not started",
)
obs = result.get("observation", {})
task = obs.get("task", {})
task_info = (
f"### {task.get('name', 'Unknown')}\n\n"
f"**Difficulty:** {task.get('difficulty', 'N/A')}\n\n"
f"**Max Steps:** {task.get('max_steps', 'N/A')}\n\n"
f"**Description:**\n{task.get('description', 'N/A')}"
)
difficulty = task.get("difficulty", "unknown")
badge = f'<span class="status-badge badge-{difficulty}">{difficulty.upper()}</span>'
return (
task_info,
badge,
"",
"0",
"0.0",
"Episode started - take your first action!",
)
def take_step(action_type: str, query: str, answer: str) -> tuple:
"""Take a step in the environment."""
action_data: Dict[str, Any] = {"type": action_type}
if query:
action_data["query"] = query
if answer:
action_data["answer"] = answer
result = _call_api("POST", "/step", action_data)
# Auto-reset if environment was not initialized
if "error" in result and "not initialized" in str(result["error"]).lower():
_call_api("POST", "/reset")
result = _call_api("POST", "/step", action_data)
if "error" in result:
return ("", "0", "0.0", f"**Error:** {result['error']}", "")
obs = result.get("observation", {})
reward = result.get("reward", 0.0)
done = result.get("done", False)
info = result.get("info", {})
# Build step log
step_num = info.get("step", 0)
step_log = (
f"**Step {step_num}** | Action: `{action_type}` | "
f"Reward: `{reward:.4f}` | Done: `{done}`"
)
# Retrieved docs display
docs_display = ""
if obs.get("retrieved_docs"):
docs_display = "\n\n".join(
f"**[{d['score']:.2f}]** {d['content'][:200]}..."
for d in obs["retrieved_docs"]
)
current_answer = obs.get("current_answer", "")
status = "Episode Complete!" if done else f"Step {step_num} completed"
return (
docs_display,
str(step_num),
f"{reward:.4f}",
status,
current_answer,
)
def grade_episode() -> str:
"""Grade the current episode."""
result = _call_api("POST", "/grade")
if "error" in result:
return f"**Error:** {result['error']}"
score = clamp_score(float(result.get("score", _SCORE_MIN)))
task_id = result.get("task_id", "Unknown")
return (
f'<div class="reward-display" style="color: var(--gold);">'
f"Score: {score:.4f}"
f"</div>\n\n"
f"**Task:** {task_id}\n\n"
f"**Episode:** {result.get('episode_id', 'N/A')}"
)
def get_tasks() -> str:
"""Get list of available tasks."""
result = _call_api("GET", "/tasks")
if "error" in result:
return f"Error: {result['error']}"
tasks = result.get("tasks", [])
lines = []
for t in tasks:
difficulty = t.get("difficulty", "unknown")
lines.append(
f"| `{t['task_id']}` | {t['name']} | "
f'<span class="status-badge badge-{difficulty}">{difficulty}</span> | '
f"{t['max_steps']} |"
)
header = "| Task ID | Name | Difficulty | Max Steps |\n|---|---|---|---|\n"
return header + "\n".join(lines)
def _build_auto_steps(task_description: str, max_steps: int) -> list:
"""Build a dynamic step plan that fills exactly max_steps.
Structure: plan → [retrieve, reason, critique] cycles → answer.
The number of cycles is determined by the task's max_steps.
"""
# Queries to rotate through during retrieve steps
queries = [
task_description,
"detailed technical analysis and supporting evidence",
"specific examples and case studies",
"expert methodology and best practices",
"risks, challenges, and mitigations",
]
steps = [{"type": "plan"}]
remaining = max_steps - 2 # plan takes 1, answer takes 1
cycle_types = ["retrieve", "reason", "critique"]
cycle_idx = 0
retrieve_count = 0
while remaining > 0:
action_type = cycle_types[cycle_idx % 3]
if action_type == "retrieve":
steps.append({"type": "retrieve", "query": queries[retrieve_count % len(queries)]})
retrieve_count += 1
else:
steps.append({"type": action_type})
cycle_idx += 1
remaining -= 1
steps.append({"type": "answer"})
return steps
def run_full_episode(task_id: str) -> tuple:
"""Run a complete automated episode."""
reset_result = _call_api("POST", "/reset", {"task_id": task_id} if task_id else {})
if "error" in reset_result:
return (f"Error: {reset_result['error']}", "", "")
obs = reset_result.get("observation", {})
task_meta = obs.get("task", {})
task_name = task_meta.get("name", task_id or "Unknown")
task_description = task_meta.get("description", "")
max_steps: int = int(task_meta.get("max_steps", 12))
difficulty = task_meta.get("difficulty", "unknown")
log_lines = [
f"**Episode Started** — {task_name} ({difficulty}, {max_steps} steps)\n"
]
steps = _build_auto_steps(task_description, max_steps)
total_reward = 0.0
for i, action in enumerate(steps, 1):
result = _call_api("POST", "/step", action)
if "error" in result:
log_lines.append(f"Step {i}: Error - {result['error']}")
break
reward = result.get("reward", 0.0)
total_reward += reward
done = result.get("done", False)
# Mark done on the final answer step since orchestrator only flags
# done on the step *after* answer, which auto pilot never takes.
if action["type"] == "answer" and i == len(steps):
done = True
log_lines.append(
f"**Step {i}** | `{action['type']}` | Reward: `{reward:.4f}` | Done: `{done}`"
)
if done:
break
# Grade
grade_result = _call_api("POST", "/grade")
final_score = clamp_score(
float(grade_result.get("score", _SCORE_MIN))
if "error" not in grade_result
else _SCORE_MIN
)
log_text = "\n\n".join(log_lines)
state = _call_api("GET", "/state")
answer = state.get("generated_answer", "")
score_display = (
f'<div class="reward-display" style="color: var(--gold);">'
f"Final Score: {final_score:.4f}"
f"</div>"
)
return (log_text, answer, score_display)
def build_ui() -> gr.Blocks:
"""Build the Gradio UI with royal glassmorphism theme."""
with gr.Blocks(css=ROYAL_CSS, theme=gr.themes.Base(), title="Agentic RAG Gym") as demo:
# Header
gr.HTML(
'<div class="main-header">'
"<h1>⚜ AGENTIC RAG GYM ⚜</h1>"
"<p>RL-Enhanced Agentic RAG Framework — Extensible to Any Domain</p>"
"</div>"
)
# Domain selector - top level
with gr.Row():
domain_selector = gr.Dropdown(
choices=[
("Aerospace Research", "aerospace"),
("Legal Research", "legal_research"),
],
label="Select Domain",
value="aerospace",
scale=2,
)
domain_status = gr.Textbox(label="Domain Status", value="Active: Aerospace Research", interactive=False, scale=2)
gr.HTML(
'<div style="padding: 10px; color: var(--text-secondary); font-size: 0.85rem; text-align: center;">'
'🔮 More domains coming soon — the RAG Master framework is designed for any domain'
'</div>'
)
with gr.Tabs() as tabs:
# --- Tab 1: Interactive Mode ---
with gr.Tab("🎯 Interactive Mode", id="interactive"):
with gr.Row():
with gr.Column(scale=1):
gr.HTML('<div class="glass-card"><h3 style="color: var(--gold);">Environment Control</h3></div>')
task_dropdown = gr.Dropdown(
choices=DOMAIN_TASKS["aerospace"],
label="Select Task",
value="aero_easy_propulsion_comparison",
)
reset_btn = gr.Button("⚡ Reset Episode", variant="primary", elem_classes=["gold-button"])
difficulty_badge = gr.HTML(value="")
gr.HTML('<hr style="border-color: rgba(212,175,55,0.2); margin: 1rem 0;">')
action_type = gr.Radio(
choices=["plan", "retrieve", "reason", "critique", "verify", "answer"],
label="Action Type",
value="retrieve",
)
query_input = gr.Textbox(
label="Query (for retrieve actions)",
placeholder="Enter search query...",
lines=2,
)
answer_input = gr.Textbox(
label="Answer (for answer actions)",
placeholder="Enter your answer...",
lines=4,
)
step_btn = gr.Button("▶ Take Step", variant="primary", elem_classes=["gold-button"])
grade_btn = gr.Button("📊 Grade Episode", variant="secondary")
with gr.Column(scale=2):
task_info = gr.Markdown(label="Task Information", value="*Select a task and click Reset*")
with gr.Row():
step_counter = gr.Textbox(label="Step", value="0", interactive=False)
reward_display = gr.Textbox(label="Last Reward", value="0.0", interactive=False)
status_display = gr.Textbox(label="Status", value="Ready", interactive=False)
docs_display = gr.Markdown(label="Retrieved Documents", value="")
answer_display = gr.Textbox(label="Generated Answer", value="", lines=6, interactive=False)
grade_display = gr.HTML(label="Grade", value="")
reset_btn.click(
fn=reset_environment,
inputs=[task_dropdown],
outputs=[task_info, difficulty_badge, docs_display, step_counter, reward_display, status_display],
)
step_btn.click(
fn=take_step,
inputs=[action_type, query_input, answer_input],
outputs=[docs_display, step_counter, reward_display, status_display, answer_display],
)
grade_btn.click(fn=grade_episode, outputs=[grade_display])
# --- Tab 2: Auto Pilot ---
with gr.Tab("🚀 Auto Pilot", id="autopilot"):
gr.HTML(
'<div class="glass-card"><h3 style="color: var(--gold);">Automated Episode Runner</h3>'
"<p>Runs a complete episode automatically: Plan → Retrieve → Reason → Critique → Retrieve → Reason → Answer</p></div>"
)
with gr.Row():
auto_task = gr.Dropdown(
choices=DOMAIN_TASKS["aerospace"],
label="Select Task",
value="aero_easy_propulsion_comparison",
)
auto_run_btn = gr.Button("🏁 Run Full Episode", variant="primary", elem_classes=["gold-button"])
auto_log = gr.Markdown(label="Episode Log", value="")
auto_answer = gr.Textbox(label="Final Answer", value="", lines=8, interactive=False)
auto_score = gr.HTML(label="Final Score", value="")
auto_run_btn.click(
fn=run_full_episode,
inputs=[auto_task],
outputs=[auto_log, auto_answer, auto_score],
)
# --- Tab 3: Tasks ---
with gr.Tab("📋 Tasks", id="tasks"):
gr.HTML(
'<div class="glass-card"><h3 style="color: var(--gold);">Available Tasks</h3>'
"<p>Tasks for the currently active domain — switch domains above to see different tasks</p></div>"
)
tasks_display = gr.Markdown(value="Click refresh to load tasks")
refresh_btn = gr.Button("🔄 Refresh Tasks", variant="secondary")
refresh_btn.click(fn=get_tasks, outputs=[tasks_display])
# --- Tab 4: Blog / Writeup ---
with gr.Tab("📝 Blog / Writeup", id="blog"):
gr.Markdown(_load_markdown("Blog.MD"), sanitize_html=False)
# --- Tab 5: README ---
with gr.Tab("📖 README", id="readme"):
gr.Markdown(_load_readme(), sanitize_html=False)
# --- Tab 6: About ---
with gr.Tab("ℹ️ About", id="about"):
gr.Markdown(
"""
<div class="glass-card">
## ⚜ Agentic RAG Gym
An **open-source RL-enhanced framework** that revolutionizes
**Retrieval-Augmented Generation** by training agents through
reinforcement learning across any knowledge domain.
### Architecture
| Component | Technology |
|---|---|
| Framework | RAG Master (custom orchestrator) |
| Backend | FastAPI + Uvicorn |
| Vector Store | FAISS |
| Embeddings | sentence-transformers |
| LLM | OpenAI-compatible API |
| UI | Gradio (HF Space) |
| Domains | Aerospace Research, Legal Research, _more coming soon_ |
### Multi-Agent System
- **Retriever Agent** — Document search and query reformulation
- **Reasoner Agent** — Analysis and synthesis over documents
- **Critic Agent** — Quality evaluation and improvement suggestions
- **Planner Agent** — Strategic planning for complex tasks
- **Verifier Agent** — Factual grounding verification
### OpenEnv Compliance
Full implementation of the OpenEnv specification:
`reset()` → `step()` → `state()` → `grade()`
### Links
| Resource | URL |
|---|---|
| **HF Space** | [Agentic RAG Gym](https://huggingface.co/spaces/williyam/agentic-rag-gym) |
| **YouTube Demo** | [Watch the Demo](https://www.youtube.com/watch?v=M65DHY8za6M) |
| **Fine-Tuned Model** | [Qwen2.5 GRPO LoRA Adapter](https://huggingface.co/williyam/agentic-rag-aerospace-grpo) |
| **GitHub** | [agentic-rag-gym](https://github.com/williyam-m/agentic-rag-gym) |
| **Training Notebook** | [Google Colab](https://colab.research.google.com/drive/14il2JQmy9-id_fSGpmYbssp-j975DSDo?usp=sharing) |
</div>
"""
)
# Wire domain selector to update both task dropdowns in one call
domain_selector.change(
fn=switch_domain,
inputs=[domain_selector],
outputs=[task_dropdown, auto_task, domain_status],
)
return demo