"""
server/app.py — FastAPI application for ScholarEnv.
Exposes the five endpoints required by OpenEnv / hackathon validation:
POST /reset — start a new episode
POST /step — submit an action
GET /state — inspect current episode state
GET /health — liveness probe (returns 200)
GET /action_space — action schema documentation
GET /tasks — list all available tasks
All request/response bodies are JSON.
CORS is enabled for HuggingFace Spaces embedding.
Usage:
uvicorn server.app:app --host 0.0.0.0 --port 7860
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, HTMLResponse
# Ensure root is on path when running from server/
_ROOT = Path(__file__).resolve().parent.parent
if str(_ROOT) not in sys.path:
sys.path.insert(0, str(_ROOT))
from server.environment import ScholarEnvironment, TASK_CONFIG
# ── App setup ─────────────────────────────────────────────────────────────────
app = FastAPI(
title="ScholarEnv",
description=(
"OpenEnv environment for scholarly integrity verification. "
"Three tasks: formatting compliance, internal consistency, "
"claim-evidence audit."
),
version="0.4.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Single environment instance shared across requests
# (stateful — one active episode at a time, sufficient for hackathon eval)
_ENV: ScholarEnvironment | None = None
def get_env() -> ScholarEnvironment:
global _ENV
if _ENV is None:
data_dir = os.environ.get("DATA_DIR", "data")
_ENV = ScholarEnvironment(data_dir=data_dir)
return _ENV
# ── Health ────────────────────────────────────────────────────────────────────
@app.get("/", response_class=HTMLResponse)
async def root() -> HTMLResponse:
"""Landing page — shows environment overview and API reference."""
html = """
🔬 ScholarEnv
The first RL environment for AI-assisted peer review and scholarly integrity verification.
OpenEnv v0.4.0
4 Tasks
Running
Meta × PyTorch Hackathon
Available Tasks
formatting_compliance EASY
Fix IEEE manuscript formatting violations — abstract length, section order, citation style, author block.
Max steps: 3Frontier baseline: 0.80–0.95
internal_consistency MEDIUM
Find internal contradictions — number mismatches, nonexistent references, inconsistent contribution counts.
Max steps: 4Frontier baseline: 0.40–0.65
claim_evidence_audit HARD
Find where text claims don't match table values. RL training value: frontier LLMs score 0.20–0.45 with no training.
Max steps: 6Frontier baseline: 0.20–0.45
citation_verification MEDIUM
Identify ghost citations (fabricated) and misattributed references. SQLite cache stores verified citations across episodes.
Max steps: 8Frontier baseline: 0.35–0.60
API Usage
POST/reset
{"task_id": "formatting_compliance"}
POST/step
{"task": "claim_evidence_audit", "action_type": "query_section", "section_name": "results"}
POST/step
{"task": "claim_evidence_audit", "action_type": "submit_findings", "findings": [...]}
GET /state
Returns current episode state and curriculum summary
Nensi Pansuriya · Krushna Parmar · Ishita Bhojani
Meta × PyTorch OpenEnv Hackathon · Round 1 · April 2026
"""
return HTMLResponse(content=html)
@app.get("/health")
async def health() -> dict:
"""Liveness probe — must return 200 for hackathon validation."""
env = get_env()
return {
"status": "ok",
"version": "0.4.0",
"corpus_size": len(env.corpus),
"tasks": list(TASK_CONFIG.keys()),
}
# ── Reset ─────────────────────────────────────────────────────────────────────
@app.post("/reset")
async def reset(request: Request) -> JSONResponse:
"""
Start a new episode.
Body (JSON):
{ "task_id": "formatting_compliance" } ← default if omitted
Returns:
{ "observation": {...}, "info": {...} }
"""
body = await request.json() if request.headers.get("content-type") else {}
task_id = body.get("task_id", "formatting_compliance")
result = get_env().reset(task_id=task_id)
return JSONResponse(content=result, status_code=200)
# ── Step ──────────────────────────────────────────────────────────────────────
@app.post("/step")
async def step(request: Request) -> JSONResponse:
"""
Submit one action.
Body (JSON) — Task 1 example:
{
"task": "formatting_compliance",
"formatted_text": "..."
}
Body (JSON) — Task 2/3 navigation example:
{
"task": "internal_consistency",
"action_type": "query_section",
"section_name": "results"
}
Body (JSON) — Task 2/3 submit example:
{
"task": "claim_evidence_audit",
"action_type": "submit_findings",
"findings": [
{
"type": "table_text_mismatch",
"location": "results",
"claim": "Table 2 shows 87% accuracy",
"contradicts": "Table 2 value is 79%",
"table_id": "Table 2",
"table_value": "79%"
}
]
}
Returns:
{ "observation": {...}, "reward": float, "done": bool, "info": {...} }
"""
try:
body = await request.json()
except Exception:
return JSONResponse(
content={"error": "Request body must be valid JSON."},
status_code=400,
)
result = get_env().step(body)
# Always return 200 with a valid reward so evaluator never gets 400
if "error" in result:
result.setdefault("reward", 1e-4)
result.setdefault("done", True)
result.setdefault("info", {})
result.setdefault("observation", {"task_id": "unknown", "task_description": "",
"paper_id": "none", "step_count": 0, "max_steps": 1})
return JSONResponse(content=result, status_code=200)
# ── State ─────────────────────────────────────────────────────────────────────
@app.get("/state")
async def state() -> dict:
"""Return current episode state (for debugging and logging)."""
return get_env().state()
# ── Action space ──────────────────────────────────────────────────────────────
@app.get("/action_space")
async def action_space() -> dict:
return {
"type": "structured",
"discriminator": "task",
"variants": {
"formatting_compliance": {
"fields": {
"task": "Literal['formatting_compliance']",
"formatted_text": "str — complete reformatted manuscript",
}
},
"internal_consistency": {
"fields": {
"task": "Literal['internal_consistency']",
"action_type": "query_section | submit_findings",
"section_name": "str (for query_section)",
"findings": "list[dict] (for submit_findings)",
}
},
"claim_evidence_audit": {
"fields": {
"task": "Literal['claim_evidence_audit']",
"action_type": "query_section | check_table | extract_claims | submit_findings",
"section_name": "str",
"table_id": "str (e.g. 'Table 1')",
"findings": "list[dict]",
}
},
},
"finding_schema": {
"required": ["type", "location", "claim", "contradicts"],
"optional_for_task3": ["table_id", "table_value"],
"types": [
"number_mismatch",
"missing_reference",
"contribution_count",
"table_caption_mismatch",
"table_text_mismatch",
],
},
}
# ── Tasks ─────────────────────────────────────────────────────────────────────
@app.get("/tasks")
async def tasks() -> dict:
return {
"tasks": [
{
"id": tid,
"description": cfg["description"][:120] + "...",
"max_steps": cfg["max_steps"],
"navigable": cfg["allows_navigation"],
}
for tid, cfg in TASK_CONFIG.items()
]
}
# ── Entry point (required by openenv spec) ────────────────────────────────────
def main() -> None:
"""Server entry point — called by [project.scripts] and openenv runner."""
import uvicorn
uvicorn.run(
"server.app:app",
host="0.0.0.0",
port=7860,
workers=1,
)
if __name__ == "__main__":
main()