driftcall / openenv.yaml
saumilyajj's picture
Upload folder using huggingface_hub
b43d8da verified
# openenv.yaml — consumed by `openenv validate`
# Schema source: https://github.com/meta-pytorch/OpenEnv (v1.0).
# Deploy spec: docs/modules/deploy_env_space.md §4.3.
schema_version: "1.0"
env:
id: driftcall
version: "0.1.0"
display_name: "DriftCall — Indic Voice Concierge under Schema Drift"
description: >
OpenEnv-compliant RL environment where a voice-first agent completes Indic
consumer concierge tasks while vendor APIs undergo mid-episode schema,
policy, T&C, pricing, and auth drift. Five independent reward components;
deterministic seeded drift; Hindi/Tamil/Kannada/Hinglish briefs via
Kokoro TTS + faster-whisper ASR.
license: apache-2.0
tags:
- openenv
- rl
- voice
- indic
- schema-drift
- grpo
entrypoint:
type: http
base_url: "https://driftcall-driftcall-env.hf.space"
endpoints:
reset: "/reset"
step: "/step"
state: "/state"
close: "/close"
health: "/healthz"
auth:
type: bearer
secret_env: DRIFTCALL_ENV_TOKEN
action_space:
ref: "cells.step_04_models:DriftCallAction"
observation_space:
ref: "cells.step_04_models:DriftCallObservation"
episode:
max_turns: 16
reset_config:
seed:
type: int
required: false
curriculum_stage:
type: int
range: [1, 3]
required: false
language_weights:
type: object
required: false
audio_boundary_enabled:
type: bool
required: false
reward:
shape: scalar
range: [-1.0, 1.0]
# The reward function lives in `cells/step_08_rewards.py`. Five independent
# components are computed at episode termination; combined into a quality
# score, calibrated by a Brier penalty + uncertain floor, then clamped.
# Implementation entrypoint:
impl: "cells.step_08_rewards:compute_rewards"
pipeline:
- "cells.step_08_rewards:combine_quality" # weighted mix of R1..R5
- "cells.step_08_rewards:brier_penalty" # confidence calibration
- "cells.step_08_rewards:apply_uncertain_floor" # 0.50 floor when uncertain
- "cells.step_08_rewards:final_reward" # final scalar in [-1, 1]
components:
- id: R1
name: task_completion
weight: 0.40
impl: "cells.step_08_rewards:task_completion"
description: >
Goal achieved (correct booking, payment success, vendor confirmation).
- id: R2
name: drift_detection
weight: 0.20
impl: "cells.step_08_rewards:drift_detection"
description: >
Agent detects mid-episode schema/policy/auth drift and adapts.
- id: R3
name: constraint_adherence
weight: 0.20
impl: "cells.step_08_rewards:constraint_adherence"
description: >
Honours user constraints (budget, time window, dietary, lang).
- id: R4
name: format_compliance
weight: 0.10
impl: "cells.step_08_rewards:format_compliance"
description: >
Tool args parse cleanly against the (possibly drifted) schema.
- id: R5
name: anti_hack_penalty
weight: 0.10
impl: "cells.step_08_rewards:anti_hack_penalty"
description: >
Penalty for known reward-hacking patterns flagged in probe set.
docs: "docs/modules/rewards.md"