Spaces:

Nomearod
/

agentbench

Running

Jane Yeung

feat: infrastructure sprint — vLLM/Modal, Helm, Terraform (#8)

a9d4375 about 2 months ago

322 Bytes

	"""Shared constants for Modal deployments."""

	MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
	GPU_TYPE = "a10g"
	VLLM_MAX_MODEL_LEN = 8192
	VLLM_DTYPE = "half"
	VLLM_GPU_MEMORY_UTILIZATION = 0.85

	# Cost tracking (for provider comparison report)
	# Modal A10G: ~$0.000361/sec (~$1.30/hr)
	MODAL_A10G_COST_PER_SEC = 0.000361