Spaces:

DGXAI
/

driftcall-env

Runtime error

App Files Files Community

driftcall-env / openenv.yaml

saumilyajj

Upload folder using huggingface_hub

2725475 verified 22 days ago

raw

history blame contribute delete

3.37 kB

	# openenv.yaml — consumed by `openenv validate`
	# Schema source: https://github.com/meta-pytorch/OpenEnv (v1.0).
	# Deploy spec: docs/modules/deploy_env_space.md §4.3.
	schema_version: "1.0"

	env:
	id: driftcall
	version: "0.1.0"
	display_name: "DriftCall — Indic Voice Concierge under Schema Drift"
	description: >
	OpenEnv-compliant RL environment where a voice-first agent completes Indic
	consumer concierge tasks while vendor APIs undergo mid-episode schema,
	policy, T&C, pricing, and auth drift. Five independent reward components;
	deterministic seeded drift; Hindi/Tamil/Kannada/Hinglish briefs via
	Kokoro TTS + faster-whisper ASR.
	license: apache-2.0
	tags:
	- openenv
	- rl
	- voice
	- indic
	- schema-drift
	- grpo

	entrypoint:
	type: http
	base_url: "https://driftcall-driftcall-env.hf.space"
	endpoints:
	reset: "/reset"
	step: "/step"
	state: "/state"
	close: "/close"
	health: "/healthz"
	auth:
	type: bearer
	secret_env: DRIFTCALL_ENV_TOKEN

	action_space:
	ref: "cells.step_04_models:DriftCallAction"

	observation_space:
	ref: "cells.step_04_models:DriftCallObservation"

	episode:
	max_turns: 16
	reset_config:
	seed:
	type: int
	required: false
	curriculum_stage:
	type: int
	range: [1, 3]
	required: false
	language_weights:
	type: object
	required: false
	audio_boundary_enabled:
	type: bool
	required: false

	reward:
	shape: scalar
	range: [-1.0, 1.0]
	# The reward function lives in `cells/step_08_rewards.py`. Five independent
	# components are computed at episode termination; combined into a quality
	# score, calibrated by a Brier penalty + uncertain floor, then clamped.
	# Implementation entrypoint:
	impl: "cells.step_08_rewards:compute_rewards"
	pipeline:
	- "cells.step_08_rewards:combine_quality" # weighted mix of R1..R5
	- "cells.step_08_rewards:brier_penalty" # confidence calibration
	- "cells.step_08_rewards:apply_uncertain_floor" # 0.50 floor when uncertain
	- "cells.step_08_rewards:final_reward" # final scalar in [-1, 1]
	components:
	- id: R1
	name: task_completion
	weight: 0.40
	impl: "cells.step_08_rewards:task_completion"
	description: >
	Goal achieved (correct booking, payment success, vendor confirmation).
	- id: R2
	name: drift_detection
	weight: 0.20
	impl: "cells.step_08_rewards:drift_detection"
	description: >
	Agent detects mid-episode schema/policy/auth drift and adapts.
	- id: R3
	name: constraint_adherence
	weight: 0.20
	impl: "cells.step_08_rewards:constraint_adherence"
	description: >
	Honours user constraints (budget, time window, dietary, lang).
	- id: R4
	name: format_compliance
	weight: 0.10
	impl: "cells.step_08_rewards:format_compliance"
	description: >
	Tool args parse cleanly against the (possibly drifted) schema.
	- id: R5
	name: anti_hack_penalty
	weight: 0.10
	impl: "cells.step_08_rewards:anti_hack_penalty"
	description: >
	Penalty for known reward-hacking patterns flagged in probe set.
	docs: "docs/modules/rewards.md"