Spaces:

openenv123
/

openenv-email-triage

Sleeping

App Files Files Community

openenv-email-triage / validate.py

Nanny7

Initial commit

e44f8be 4 days ago

raw

history blame contribute delete

7.9 kB

	#!/usr/bin/env python3
	"""
	validate.py — Pre-submission validation script.
	Checks all OpenEnv compliance requirements before submitting.
	Run: python validate.py
	"""
	import sys
	import json
	import yaml
	import importlib
	from pathlib import Path

	PASS = "✅"
	FAIL = "❌"
	WARN = "⚠️ "

	errors = []
	warnings = []

	def check(condition: bool, label: str, detail: str = ""):
	if condition:
	print(f" {PASS} {label}")
	else:
	print(f" {FAIL} {label}" + (f": {detail}" if detail else ""))
	errors.append(label)

	def warn(condition: bool, label: str, detail: str = ""):
	if condition:
	print(f" {PASS} {label}")
	else:
	print(f" {WARN} {label}" + (f": {detail}" if detail else ""))
	warnings.append(label)

	print("\n" + "="*60)
	print(" OpenEnv Validation — email-triage-env")
	print("="*60 + "\n")

	# ─── 1. File structure ────────────────────────────────────────────────────────
	print("1. Required files")
	required_files = [
	"openenv.yaml", "Dockerfile", "requirements.txt", "inference.py",
	"README.md", "models.py", "environment.py", "server.py",
	"graders.py", "dataset.py",
	]
	for f in required_files:
	check(Path(f).exists(), f)

	# ─── 2. openenv.yaml ─────────────────────────────────────────────────────────
	print("\n2. openenv.yaml spec")
	try:
	with open("openenv.yaml") as fh:
	cfg = yaml.safe_load(fh)
	check("name" in cfg, "has name field")
	check("version" in cfg, "has version field")
	check("tasks" in cfg and len(cfg["tasks"]) >= 3, "has 3+ tasks")
	check("endpoints" in cfg, "has endpoints section")
	check("observation_space" in cfg, "has observation_space")
	check("action_space" in cfg, "has action_space")
	except Exception as e:
	check(False, "openenv.yaml parseable", str(e))

	# ─── 3. Pydantic models ───────────────────────────────────────────────────────
	print("\n3. Typed models (Pydantic)")
	try:
	from models import Observation, Action, Reward, StepResponse, EnvState
	check(True, "Observation model imports")
	check(True, "Action model imports")
	check(True, "Reward model imports")
	check(True, "StepResponse model imports")
	check(True, "EnvState model imports")

	# Validate field ranges
	r = Reward(value=0.5, feedback="test")
	check(0.0 <= r.value <= 1.0, "Reward value in [0.0, 1.0]")
	except Exception as e:
	check(False, "Models import cleanly", str(e))

	# ─── 4. Environment API ───────────────────────────────────────────────────────
	print("\n4. Environment API (reset/step/state)")
	try:
	from environment import EmailTriageEnv
	from models import Action, UrgencyLevel, EmailCategory, EmailAction

	env = EmailTriageEnv()

	# reset()
	obs = env.reset("task_easy")
	check(obs is not None, "reset() returns Observation")
	check(obs.current_email is not None, "reset() observation has current_email")
	check(obs.task_id == "task_easy", "reset() sets task_id")

	# state()
	state = env.state()
	check(state is not None, "state() returns EnvState")
	check(state.task_id == "task_easy", "state() has correct task_id")

	# step()
	act = Action(
	urgency=UrgencyLevel.MEDIUM,
	category=EmailCategory.SPAM,
	action=EmailAction.DELETE,
	)
	result = env.step(act)
	check(result is not None, "step() returns StepResponse")
	check(0.0 <= result.reward.value <= 1.0, "step() reward in [0.0, 1.0]")
	check(isinstance(result.done, bool), "step() returns done boolean")
	check(result.info.get("episode_id") is not None, "step() info has episode_id")

	# All 3 tasks
	for tid in ["task_easy", "task_medium", "task_hard"]:
	env2 = EmailTriageEnv()
	obs2 = env2.reset(tid)
	check(obs2.emails_remaining > 0, f"task {tid} has emails")

	except Exception as e:
	check(False, "Environment API works", str(e))

	# ─── 5. Graders ───────────────────────────────────────────────────────────────
	print("\n5. Task graders (3 tasks, scores in [0,1])")
	try:
	from graders import grade
	from dataset import TASK_EMAILS

	for tid in ["task_easy", "task_medium", "task_hard"]:
	emails = TASK_EMAILS[tid]
	rewards = []
	for email in emails[:3]: # spot-check first 3
	act = Action(
	urgency=UrgencyLevel.MEDIUM,
	category=EmailCategory.OTHER,
	action=EmailAction.ARCHIVE,
	)
	r = grade(tid, act, email)
	rewards.append(r.value)
	all_valid = all(0.0 <= v <= 1.0 for v in rewards)
	check(all_valid, f"{tid} grader scores in [0.0, 1.0]", str(rewards))

	except Exception as e:
	check(False, "Graders work", str(e))

	# ─── 6. Dockerfile ────────────────────────────────────────────────────────────
	print("\n6. Dockerfile")
	try:
	dockerfile = Path("Dockerfile").read_text()
	check("FROM python" in dockerfile, "has Python base image")
	check("EXPOSE" in dockerfile, "has EXPOSE directive")
	check("HEALTHCHECK" in dockerfile, "has HEALTHCHECK directive")
	check("uvicorn" in dockerfile or "CMD" in dockerfile, "has CMD to start server")
	except Exception as e:
	check(False, "Dockerfile readable", str(e))

	# ─── 7. inference.py ──────────────────────────────────────────────────────────
	print("\n7. inference.py")
	try:
	src = Path("inference.py").read_text()
	check("API_BASE_URL" in src, "reads API_BASE_URL")
	check("MODEL_NAME" in src, "reads MODEL_NAME")
	check("HF_TOKEN" in src, "reads HF_TOKEN")
	check("[START]" in src, "emits [START] log")
	check("[STEP]" in src, "emits [STEP] log")
	check("[END]" in src, "emits [END] log")
	check("OpenAI(" in src, "uses OpenAI client")
	except Exception as e:
	check(False, "inference.py readable", str(e))

	# ─── 8. README ────────────────────────────────────────────────────────────────
	print("\n8. README.md")
	try:
	readme = Path("README.md").read_text().lower()
	check("action" in readme, "documents action space")
	check("observation" in readme, "documents observation space")
	check("task" in readme, "describes tasks")
	check("docker" in readme, "includes Docker instructions")
	check("baseline" in readme or "score" in readme, "includes baseline scores")
	except Exception as e:
	check(False, "README.md readable", str(e))

	# ─── Summary ──────────────────────────────────────────────────────────────────
	print("\n" + "="*60)
	if not errors:
	print(f" {PASS} ALL CHECKS PASSED — Ready to submit!")
	else:
	print(f" {FAIL} {len(errors)} check(s) FAILED:")
	for e in errors:
	print(f" • {e}")
	if warnings:
	print(f"\n {WARN} {len(warnings)} warning(s):")
	for w in warnings:
	print(f" • {w}")
	print("="*60 + "\n")
	sys.exit(0 if not errors else 1)