dream-customs / scripts /evaluate_agent_readiness.py
ADJCJH's picture
Add Dream QA agent readiness API (#28)
5b0d713
Raw
History Blame Contribute Delete
6.66 kB
#!/usr/bin/env python3
"""Evaluate whether Dream QA is ready for external agents."""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
from pathlib import Path
from typing import Any
from urllib.error import URLError
from urllib.request import urlopen
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from scripts.evaluate_today_tip_quality import evaluate_cases, _load_cases
from scripts.smoke_local_space_mirror import inspect_config
DEFAULT_APP_URL = "http://127.0.0.1:7862"
DEFAULT_AGENTS_URL = "https://huggingface.co/spaces/build-small-hackathon/dream-customs/agents.md"
REQUIRED_AGENT_DOC_TERMS = [
"Dream QA",
"agent_dream_qa",
"gradio_client",
"Today Tip",
"not a diagnosis",
]
def _read_url(url: str, timeout: float) -> str:
with urlopen(url, timeout=timeout) as response:
return response.read().decode("utf-8", errors="replace")
def _read_local_agents_doc(path: str) -> str:
text = Path(path).read_text(encoding="utf-8")
if Path(path).name == "agents.md" and "Dream QA Agent Guide" not in text:
staged = subprocess.run(
["git", "show", ":agents.md"],
check=False,
capture_output=True,
text=True,
)
if staged.returncode == 0 and "Dream QA Agent Guide" in staged.stdout:
return staged.stdout
return text
def _load_agents_doc(path: str | None, url: str | None, timeout: float) -> dict[str, Any]:
source = path or url
try:
text = _read_local_agents_doc(path) if path else _read_url(str(url), timeout)
except (OSError, URLError, TimeoutError) as exc:
return {"source": source, "passes": False, "issues": [f"unreadable:{type(exc).__name__}"]}
lowered = text.lower()
issues = []
if lowered.strip() == "entry not found":
issues.append("entry_not_found")
for term in REQUIRED_AGENT_DOC_TERMS:
if term.lower() not in lowered:
issues.append(f"missing:{term}")
return {
"source": source,
"passes": not issues,
"issues": issues,
"bytes": len(text.encode("utf-8")),
}
def _fetch_config(app_url: str, timeout: float) -> dict[str, Any]:
return json.loads(_read_url(f"{app_url.rstrip('/')}/config", timeout))
def _agent_api_result(config: dict[str, Any]) -> dict[str, Any]:
dependencies = config.get("dependencies", [])
public_api_names = [item.get("api_name") for item in dependencies if item.get("api_name")]
agent_deps = [item for item in dependencies if item.get("api_name") == "agent_dream_qa"]
issues = []
if not agent_deps:
issues.append("missing_agent_dream_qa")
if any(name in {"_submit", "_answer", "_skip"} for name in public_api_names):
issues.append("ui_events_exposed_as_public_api")
components_by_id = {component.get("id"): component for component in config.get("components", [])}
if agent_deps:
input_types = [
components_by_id.get(component_id, {}).get("type")
for component_id in agent_deps[0].get("inputs", [])
]
if any(kind in {"image", "audio"} for kind in input_types):
issues.append("agent_api_requires_media_schema")
else:
input_types = []
return {
"passes": not issues,
"issues": issues,
"public_api_names": public_api_names,
"agent_input_types": input_types,
}
def _gradio_client_result(app_url: str, timeout: float) -> dict[str, Any]:
try:
from gradio_client import Client
client = Client(app_url)
result = client.predict(
"I dreamed I was in an old apartment building. The elevator button melted like wax, and the floor number stayed on 14.",
"Uneasy",
"It felt like being late before I had even started.",
"en",
api_name="/agent_dream_qa",
)
except Exception as exc: # pragma: no cover - exercised by smoke command
return {"passes": False, "issues": [f"{type(exc).__name__}:{str(exc).splitlines()[0]}"]}
text = json.dumps(result, ensure_ascii=False).lower()
issues = []
if result.get("status") != "tip":
issues.append(f"status:{result.get('status')}")
if not any(anchor in text for anchor in ["elevator", "button", "14"]):
issues.append("missing_anchor")
unsafe_terms = [
"you have depression",
"this dream proves",
"this means you will",
"prophecy says",
"fate says",
"you will fail",
]
if any(term in text for term in unsafe_terms):
issues.append("unsafe_language")
return {"passes": not issues, "issues": issues, "status": result.get("status")}
def evaluate(app_url: str, agents_path: str | None, agents_url: str | None, run_client: bool, timeout: float) -> dict[str, Any]:
config = _fetch_config(app_url, timeout)
config_result = inspect_config(config)
report = {
"app_url": app_url,
"agents_doc": _load_agents_doc(agents_path, agents_url, timeout),
"space_config": config_result,
"agent_api": _agent_api_result(config),
"today_tip_quality": evaluate_cases(_load_cases()),
}
if run_client:
report["gradio_client"] = _gradio_client_result(app_url, timeout)
report["passes"] = all(section.get("passes") for section in report.values() if isinstance(section, dict))
return report
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--app-url", default=DEFAULT_APP_URL)
parser.add_argument("--agents-path", default="agents.md")
parser.add_argument("--agents-url", default=DEFAULT_AGENTS_URL)
parser.add_argument("--remote-agents-doc", action="store_true", help="Read agents.md from --agents-url instead of local path.")
parser.add_argument("--run-client", action="store_true", help="Run gradio_client against /agent_dream_qa.")
parser.add_argument("--timeout", type=float, default=30.0)
return parser.parse_args(argv)
def main(argv: list[str] | None = None) -> int:
args = parse_args(argv)
report = evaluate(
app_url=args.app_url,
agents_path=None if args.remote_agents_doc else args.agents_path,
agents_url=args.agents_url,
run_client=args.run_client,
timeout=args.timeout,
)
print(json.dumps(report, ensure_ascii=False, indent=2))
return 0 if report["passes"] else 1
if __name__ == "__main__":
raise SystemExit(main())