# openenv.yaml — consumed by `openenv validate` # Schema source: https://github.com/meta-pytorch/OpenEnv (v1.0). # Deploy spec: docs/modules/deploy_env_space.md §4.3. schema_version: "1.0" env: id: driftcall version: "0.1.0" display_name: "DriftCall — Indic Voice Concierge under Schema Drift" description: > OpenEnv-compliant RL environment where a voice-first agent completes Indic consumer concierge tasks while vendor APIs undergo mid-episode schema, policy, T&C, pricing, and auth drift. Five independent reward components; deterministic seeded drift; Hindi/Tamil/Kannada/Hinglish briefs via Kokoro TTS + faster-whisper ASR. license: apache-2.0 tags: - openenv - rl - voice - indic - schema-drift - grpo entrypoint: type: http base_url: "https://driftcall-driftcall-env.hf.space" endpoints: reset: "/reset" step: "/step" state: "/state" close: "/close" health: "/healthz" auth: type: bearer secret_env: DRIFTCALL_ENV_TOKEN action_space: ref: "cells.step_04_models:DriftCallAction" observation_space: ref: "cells.step_04_models:DriftCallObservation" episode: max_turns: 16 reset_config: seed: type: int required: false curriculum_stage: type: int range: [1, 3] required: false language_weights: type: object required: false audio_boundary_enabled: type: bool required: false reward: shape: scalar range: [-1.0, 1.0] # The reward function lives in `cells/step_08_rewards.py`. Five independent # components are computed at episode termination; combined into a quality # score, calibrated by a Brier penalty + uncertain floor, then clamped. # Implementation entrypoint: impl: "cells.step_08_rewards:compute_rewards" pipeline: - "cells.step_08_rewards:combine_quality" # weighted mix of R1..R5 - "cells.step_08_rewards:brier_penalty" # confidence calibration - "cells.step_08_rewards:apply_uncertain_floor" # 0.50 floor when uncertain - "cells.step_08_rewards:final_reward" # final scalar in [-1, 1] components: - id: R1 name: task_completion weight: 0.40 impl: "cells.step_08_rewards:task_completion" description: > Goal achieved (correct booking, payment success, vendor confirmation). - id: R2 name: drift_detection weight: 0.20 impl: "cells.step_08_rewards:drift_detection" description: > Agent detects mid-episode schema/policy/auth drift and adapts. - id: R3 name: constraint_adherence weight: 0.20 impl: "cells.step_08_rewards:constraint_adherence" description: > Honours user constraints (budget, time window, dietary, lang). - id: R4 name: format_compliance weight: 0.10 impl: "cells.step_08_rewards:format_compliance" description: > Tool args parse cleanly against the (possibly drifted) schema. - id: R5 name: anti_hack_penalty weight: 0.10 impl: "cells.step_08_rewards:anti_hack_penalty" description: > Penalty for known reward-hacking patterns flagged in probe set. docs: "docs/modules/rewards.md"