Spaces:
Sleeping
Sleeping
| """ | |
| server/app.py β FastAPI application for ScholarEnv. | |
| Exposes the five endpoints required by OpenEnv / hackathon validation: | |
| POST /reset β start a new episode | |
| POST /step β submit an action | |
| GET /state β inspect current episode state | |
| GET /health β liveness probe (returns 200) | |
| GET /action_space β action schema documentation | |
| GET /tasks β list all available tasks | |
| All request/response bodies are JSON. | |
| CORS is enabled for HuggingFace Spaces embedding. | |
| Usage: | |
| uvicorn server.app:app --host 0.0.0.0 --port 7860 | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from fastapi import FastAPI, Request | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse, HTMLResponse | |
| # Ensure root is on path when running from server/ | |
| _ROOT = Path(__file__).resolve().parent.parent | |
| if str(_ROOT) not in sys.path: | |
| sys.path.insert(0, str(_ROOT)) | |
| from server.environment import ScholarEnvironment, TASK_CONFIG | |
| # ββ App setup βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app = FastAPI( | |
| title="ScholarEnv", | |
| description=( | |
| "OpenEnv environment for scholarly integrity verification. " | |
| "Three tasks: formatting compliance, internal consistency, " | |
| "claim-evidence audit." | |
| ), | |
| version="0.4.0", | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Single environment instance shared across requests | |
| # (stateful β one active episode at a time, sufficient for hackathon eval) | |
| _ENV: ScholarEnvironment | None = None | |
| def get_env() -> ScholarEnvironment: | |
| global _ENV | |
| if _ENV is None: | |
| data_dir = os.environ.get("DATA_DIR", "data") | |
| _ENV = ScholarEnvironment(data_dir=data_dir) | |
| return _ENV | |
| # ββ Health ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def root() -> HTMLResponse: | |
| """Landing page β shows environment overview and API reference.""" | |
| html = """<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>ScholarEnv β OpenEnv Research Integrity</title> | |
| <style> | |
| * { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; | |
| background: #0f1117; color: #e0e0e0; padding: 40px 20px; } | |
| .container { max-width: 860px; margin: 0 auto; } | |
| h1 { font-size: 2.2rem; color: #fff; margin-bottom: 8px; } | |
| h1 span { color: #f97316; } | |
| .subtitle { color: #9ca3af; margin-bottom: 32px; font-size: 1.05rem; } | |
| .badge { display: inline-block; padding: 3px 10px; border-radius: 12px; | |
| font-size: 0.75rem; font-weight: 600; margin-right: 6px; } | |
| .badge-blue { background: #1d4ed8; color: #fff; } | |
| .badge-green { background: #166534; color: #86efac; } | |
| .badge-orange { background: #7c2d12; color: #fed7aa; } | |
| .badges { margin-bottom: 28px; } | |
| h2 { font-size: 1.2rem; color: #f97316; margin: 28px 0 12px; | |
| border-bottom: 1px solid #1f2937; padding-bottom: 6px; } | |
| .task-card { background: #1f2937; border-radius: 8px; padding: 16px 20px; | |
| margin-bottom: 10px; border-left: 4px solid #f97316; } | |
| .task-card h3 { font-size: 1rem; color: #fff; margin-bottom: 4px; } | |
| .task-card p { color: #9ca3af; font-size: 0.875rem; } | |
| .task-meta { display: flex; gap: 16px; margin-top: 6px; font-size: 0.8rem; color: #6b7280; } | |
| code { background: #111827; padding: 2px 6px; border-radius: 4px; | |
| font-family: monospace; font-size: 0.875rem; color: #86efac; } | |
| .api-block { background: #111827; border-radius: 8px; padding: 16px 20px; | |
| margin-bottom: 10px; font-family: monospace; font-size: 0.82rem; color: #d1d5db; } | |
| .api-block .method { color: #60a5fa; font-weight: bold; margin-right: 8px; } | |
| .api-block .path { color: #f97316; } | |
| .links { display: flex; gap: 12px; margin-bottom: 28px; flex-wrap: wrap; } | |
| .link-btn { background: #1f2937; border: 1px solid #374151; color: #e0e0e0; | |
| padding: 8px 16px; border-radius: 6px; text-decoration: none; | |
| font-size: 0.875rem; transition: background 0.2s; } | |
| .link-btn:hover { background: #374151; } | |
| .authors { color: #9ca3af; font-size: 0.875rem; margin-top: 40px; | |
| border-top: 1px solid #1f2937; padding-top: 16px; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>π¬ <span>Scholar</span>Env</h1> | |
| <p class="subtitle">The first RL environment for AI-assisted peer review and scholarly integrity verification.</p> | |
| <div class="badges"> | |
| <span class="badge badge-blue">OpenEnv v0.4.0</span> | |
| <span class="badge badge-green">4 Tasks</span> | |
| <span class="badge badge-green">Running</span> | |
| <span class="badge badge-orange">Meta Γ PyTorch Hackathon</span> | |
| </div> | |
| <div class="links"> | |
| <a class="link-btn" href="/docs">π Interactive API Docs (Swagger)</a> | |
| <a class="link-btn" href="/health">β€οΈ Health Check</a> | |
| <a class="link-btn" href="/tasks">π List Tasks</a> | |
| <a class="link-btn" href="/state">π Current State</a> | |
| </div> | |
| <h2>Available Tasks</h2> | |
| <div class="task-card"> | |
| <h3>formatting_compliance <span class="badge badge-green">EASY</span></h3> | |
| <p>Fix IEEE manuscript formatting violations β abstract length, section order, citation style, author block.</p> | |
| <div class="task-meta"><span>Max steps: 3</span><span>Frontier baseline: 0.80β0.95</span></div> | |
| </div> | |
| <div class="task-card"> | |
| <h3>internal_consistency <span class="badge badge-blue">MEDIUM</span></h3> | |
| <p>Find internal contradictions β number mismatches, nonexistent references, inconsistent contribution counts.</p> | |
| <div class="task-meta"><span>Max steps: 4</span><span>Frontier baseline: 0.40β0.65</span></div> | |
| </div> | |
| <div class="task-card" style="border-left-color: #ef4444;"> | |
| <h3>claim_evidence_audit <span class="badge badge-orange">HARD</span></h3> | |
| <p>Find where text claims don't match table values. RL training value: frontier LLMs score 0.20β0.45 with no training.</p> | |
| <div class="task-meta"><span>Max steps: 6</span><span>Frontier baseline: <strong style="color:#f97316">0.20β0.45</strong></span></div> | |
| </div> | |
| <div class="task-card"> | |
| <h3>citation_verification <span class="badge badge-blue">MEDIUM</span></h3> | |
| <p>Identify ghost citations (fabricated) and misattributed references. SQLite cache stores verified citations across episodes.</p> | |
| <div class="task-meta"><span>Max steps: 8</span><span>Frontier baseline: 0.35β0.60</span></div> | |
| </div> | |
| <h2>API Usage</h2> | |
| <div class="api-block"> | |
| <span class="method">POST</span><span class="path">/reset</span> | |
| {"task_id": "formatting_compliance"} | |
| </div> | |
| <div class="api-block"> | |
| <span class="method">POST</span><span class="path">/step</span> | |
| {"task": "claim_evidence_audit", "action_type": "query_section", "section_name": "results"} | |
| </div> | |
| <div class="api-block"> | |
| <span class="method">POST</span><span class="path">/step</span> | |
| {"task": "claim_evidence_audit", "action_type": "submit_findings", "findings": [...]} | |
| </div> | |
| <div class="api-block"> | |
| <span class="method">GET</span> <span class="path">/state</span> | |
| Returns current episode state and curriculum summary | |
| </div> | |
| <p class="authors"> | |
| <strong>Nensi Pansuriya Β· Krushna Parmar Β· Ishita Bhojani</strong><br> | |
| Meta Γ PyTorch OpenEnv Hackathon Β· Round 1 Β· April 2026 | |
| </p> | |
| </div> | |
| </body> | |
| </html>""" | |
| return HTMLResponse(content=html) | |
| async def health() -> dict: | |
| """Liveness probe β must return 200 for hackathon validation.""" | |
| env = get_env() | |
| return { | |
| "status": "ok", | |
| "version": "0.4.0", | |
| "corpus_size": len(env.corpus), | |
| "tasks": list(TASK_CONFIG.keys()), | |
| } | |
| # ββ Reset βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def reset(request: Request) -> JSONResponse: | |
| """ | |
| Start a new episode. | |
| Body (JSON): | |
| { "task_id": "formatting_compliance" } β default if omitted | |
| Returns: | |
| { "observation": {...}, "info": {...} } | |
| """ | |
| body = await request.json() if request.headers.get("content-type") else {} | |
| task_id = body.get("task_id", "formatting_compliance") | |
| result = get_env().reset(task_id=task_id) | |
| return JSONResponse(content=result, status_code=200) | |
| # ββ Step ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def step(request: Request) -> JSONResponse: | |
| """ | |
| Submit one action. | |
| Body (JSON) β Task 1 example: | |
| { | |
| "task": "formatting_compliance", | |
| "formatted_text": "..." | |
| } | |
| Body (JSON) β Task 2/3 navigation example: | |
| { | |
| "task": "internal_consistency", | |
| "action_type": "query_section", | |
| "section_name": "results" | |
| } | |
| Body (JSON) β Task 2/3 submit example: | |
| { | |
| "task": "claim_evidence_audit", | |
| "action_type": "submit_findings", | |
| "findings": [ | |
| { | |
| "type": "table_text_mismatch", | |
| "location": "results", | |
| "claim": "Table 2 shows 87% accuracy", | |
| "contradicts": "Table 2 value is 79%", | |
| "table_id": "Table 2", | |
| "table_value": "79%" | |
| } | |
| ] | |
| } | |
| Returns: | |
| { "observation": {...}, "reward": float, "done": bool, "info": {...} } | |
| """ | |
| try: | |
| body = await request.json() | |
| except Exception: | |
| return JSONResponse( | |
| content={"error": "Request body must be valid JSON."}, | |
| status_code=400, | |
| ) | |
| result = get_env().step(body) | |
| # Always return 200 with a valid reward so evaluator never gets 400 | |
| if "error" in result: | |
| result.setdefault("reward", 1e-4) | |
| result.setdefault("done", True) | |
| result.setdefault("info", {}) | |
| result.setdefault("observation", {"task_id": "unknown", "task_description": "", | |
| "paper_id": "none", "step_count": 0, "max_steps": 1}) | |
| return JSONResponse(content=result, status_code=200) | |
| # ββ State βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def state() -> dict: | |
| """Return current episode state (for debugging and logging).""" | |
| return get_env().state() | |
| # ββ Action space ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def action_space() -> dict: | |
| return { | |
| "type": "structured", | |
| "discriminator": "task", | |
| "variants": { | |
| "formatting_compliance": { | |
| "fields": { | |
| "task": "Literal['formatting_compliance']", | |
| "formatted_text": "str β complete reformatted manuscript", | |
| } | |
| }, | |
| "internal_consistency": { | |
| "fields": { | |
| "task": "Literal['internal_consistency']", | |
| "action_type": "query_section | submit_findings", | |
| "section_name": "str (for query_section)", | |
| "findings": "list[dict] (for submit_findings)", | |
| } | |
| }, | |
| "claim_evidence_audit": { | |
| "fields": { | |
| "task": "Literal['claim_evidence_audit']", | |
| "action_type": "query_section | check_table | extract_claims | submit_findings", | |
| "section_name": "str", | |
| "table_id": "str (e.g. 'Table 1')", | |
| "findings": "list[dict]", | |
| } | |
| }, | |
| }, | |
| "finding_schema": { | |
| "required": ["type", "location", "claim", "contradicts"], | |
| "optional_for_task3": ["table_id", "table_value"], | |
| "types": [ | |
| "number_mismatch", | |
| "missing_reference", | |
| "contribution_count", | |
| "table_caption_mismatch", | |
| "table_text_mismatch", | |
| ], | |
| }, | |
| } | |
| # ββ Tasks βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def tasks() -> dict: | |
| return { | |
| "tasks": [ | |
| { | |
| "id": tid, | |
| "description": cfg["description"][:120] + "...", | |
| "max_steps": cfg["max_steps"], | |
| "navigable": cfg["allows_navigation"], | |
| } | |
| for tid, cfg in TASK_CONFIG.items() | |
| ] | |
| } | |
| # ββ Entry point (required by openenv spec) ββββββββββββββββββββββββββββββββββββ | |
| def main() -> None: | |
| """Server entry point β called by [project.scripts] and openenv runner.""" | |
| import uvicorn | |
| uvicorn.run( | |
| "server.app:app", | |
| host="0.0.0.0", | |
| port=7860, | |
| workers=1, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |