Spaces:
Sleeping
Sleeping
File size: 3,222 Bytes
95f11da 379f291 87c40c2 379f291 9ae9432 34a93bb 379f291 87c40c2 379f291 87c40c2 379f291 e32a33b 379f291 e32a33b 379f291 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | from runners.inference import run_inference
from core.models import ChargebackOpsAction
from server.app import baseline, grader, root, tasks
from server.chargeback_ops_environment import ChargebackOpsEnvironment
def test_tasks_endpoint_payload():
payload = tasks()
assert len(payload.tasks) >= 3
assert "properties" in payload.action_schema
def test_root_endpoint_payload():
response = root()
assert response.status_code == 200
assert b"ChargebackOps" in response.body
assert b"tasks_url" in response.body
assert b"demo_url" in response.body
assert b"huggingface.co/spaces" in response.body
assert b"interactive_demo_url" in response.body
def test_baseline_endpoint_works_without_api_key(monkeypatch):
monkeypatch.delenv("HF_TOKEN", raising=False)
monkeypatch.delenv("API_BASE_URL", raising=False)
monkeypatch.delenv("MODEL_NAME", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("GROQ_API_KEY", raising=False)
payload = baseline()
assert payload.mode == "heuristic_fallback"
assert len(payload.task_results) >= 3
def test_inference_script_falls_back_without_hf_token(monkeypatch):
monkeypatch.delenv("HF_TOKEN", raising=False)
monkeypatch.delenv("API_BASE_URL", raising=False)
monkeypatch.delenv("MODEL_NAME", raising=False)
payload = run_inference()
assert payload.mode == "heuristic_fallback"
assert len(payload.task_results) >= 3
def test_grader_endpoint_after_completed_episode():
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env.step(ChargebackOpsAction(action_type="select_case", case_id="CB-E1"))
env.step(ChargebackOpsAction(action_type="inspect_case", case_id="CB-E1"))
env.step(
ChargebackOpsAction(
action_type="query_system",
case_id="CB-E1",
system_name="orders",
)
)
env.step(
ChargebackOpsAction(
action_type="query_system",
case_id="CB-E1",
system_name="shipping",
)
)
env.step(
ChargebackOpsAction(
action_type="query_system",
case_id="CB-E1",
system_name="support",
)
)
env.step(
ChargebackOpsAction(
action_type="add_evidence",
case_id="CB-E1",
evidence_ids=[
"E1-ORDER-CONF",
"E1-DELIVERY-SCAN",
"E1-SIGNATURE",
"E1-SUPPORT-ACK",
],
)
)
env.step(
ChargebackOpsAction(
action_type="set_strategy",
case_id="CB-E1",
strategy="contest",
)
)
final_obs = env.step(
ChargebackOpsAction(
action_type="submit_representment",
case_id="CB-E1",
)
)
assert final_obs.grader_report is not None
payload = grader(final_obs.grader_report.episode_id)
assert payload["episode_id"] == final_obs.grader_report.episode_id
assert 0.0 <= payload["normalized_score"] <= 1.0
|