Spaces:

build-small-hackathon
/

multi-agent-lab

Running on Zero

agharsallah

feat(media): introduce MediaRouter and stubs for image and speech generation

8400d8c 19 days ago

7.36 kB

	openapi: 3.1.0
	info:
	title: Multi-Agent Land — Model Serving API
	version: "1.0.0"
	description: >
	OpenAI-compatible inference API served by the Modal apps in this repo
	(`nvidia-llms`, `openbmb-llms`, `google-llms`). Each model is exposed as its
	own endpoint and speaks the OpenAI REST protocol, so any OpenAI-compatible
	client works by pointing `base_url` at the endpoint URL.

	Every running endpoint also serves a live, model-specific spec at
	`/openapi.json` and an interactive Swagger UI at `/docs`. This checked-in
	spec documents the shared, stable surface across all endpoints.
	servers:
	- url: https://{workspace}--{endpoint}.modal.run/v1
	description: Per-model Modal endpoint (one server per model).
	variables:
	workspace:
	default: your-workspace
	description: Your Modal workspace slug.
	endpoint:
	default: google-llms-gemma-4-12b
	description: >
	The Modal function label: <app-name>-<endpoint_name>, where app-name is
	the modal.App (nvidia-llms / openbmb-llms / google-llms) and endpoint_name
	is the per-model slug from registry.py.
	enum:
	- nvidia-llms-nemotron-3-nano-4b
	- openbmb-llms-minicpm-o-4-5
	- openbmb-llms-minicpm-4-1-8b
	- google-llms-gemma-4-26b
	- google-llms-gemma-4-12b
	security:
	- bearerAuth: []
	paths:
	/models:
	get:
	operationId: listModels
	summary: List the model(s) served by this endpoint.
	responses:
	"200":
	description: Available models.
	content:
	application/json:
	schema:
	$ref: "#/components/schemas/ModelList"
	/chat/completions:
	post:
	operationId: createChatCompletion
	summary: Create a chat completion (OpenAI-compatible).
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: "#/components/schemas/ChatCompletionRequest"
	responses:
	"200":
	description: Chat completion. A stream of SSE chunks when `stream` is true.
	content:
	application/json:
	schema:
	$ref: "#/components/schemas/ChatCompletionResponse"
	"401":
	$ref: "#/components/responses/Unauthorized"
	/completions:
	post:
	operationId: createCompletion
	summary: Create a text completion (OpenAI-compatible).
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: "#/components/schemas/CompletionRequest"
	responses:
	"200":
	description: Text completion.
	content:
	application/json:
	schema:
	$ref: "#/components/schemas/CompletionResponse"
	"401":
	$ref: "#/components/responses/Unauthorized"
	components:
	securitySchemes:
	bearerAuth:
	type: http
	scheme: bearer
	description: >
	Required only when the apps are deployed with MODAL_LLM_REQUIRE_AUTH=1.
	The token is the value of the `llm-api-key` Modal Secret (VLLM_API_KEY).
	Without auth the endpoints are public and any token is accepted.
	responses:
	Unauthorized:
	description: Missing or invalid bearer token (auth-enabled deploys only).
	content:
	application/json:
	schema:
	$ref: "#/components/schemas/Error"
	schemas:
	Model:
	type: object
	properties:
	id:
	type: string
	description: Served model id (the Hugging Face repo id), e.g. "google/gemma-4-12B".
	object:
	type: string
	const: model
	created:
	type: integer
	owned_by:
	type: string
	ModelList:
	type: object
	properties:
	object:
	type: string
	const: list
	data:
	type: array
	items:
	$ref: "#/components/schemas/Model"
	ChatMessage:
	type: object
	required: [role, content]
	properties:
	role:
	type: string
	enum: [system, user, assistant, tool]
	content:
	description: >
	Plain text, or an array of content parts for multimodal models
	(e.g. MiniCPM-o) with `type` of text / image_url / input_audio.
	oneOf:
	- type: string
	- type: array
	items:
	type: object
	ChatCompletionRequest:
	type: object
	required: [model, messages]
	properties:
	model:
	type: string
	description: Served model id (must match the endpoint's model).
	messages:
	type: array
	items:
	$ref: "#/components/schemas/ChatMessage"
	max_tokens:
	type: integer
	temperature:
	type: number
	default: 1.0
	top_p:
	type: number
	default: 1.0
	stream:
	type: boolean
	default: false
	tools:
	type: array
	description: Tool/function definitions (models with a tool_call_parser).
	items:
	type: object
	tool_choice:
	description: "auto \| none \| required \| a specific tool."
	oneOf:
	- type: string
	- type: object
	ChatCompletionResponse:
	type: object
	properties:
	id:
	type: string
	object:
	type: string
	const: chat.completion
	created:
	type: integer
	model:
	type: string
	choices:
	type: array
	items:
	type: object
	properties:
	index:
	type: integer
	message:
	$ref: "#/components/schemas/ChatMessage"
	finish_reason:
	type: string
	usage:
	$ref: "#/components/schemas/Usage"
	CompletionRequest:
	type: object
	required: [model, prompt]
	properties:
	model:
	type: string
	prompt:
	oneOf:
	- type: string
	- type: array
	items:
	type: string
	max_tokens:
	type: integer
	temperature:
	type: number
	stream:
	type: boolean
	default: false
	CompletionResponse:
	type: object
	properties:
	id:
	type: string
	object:
	type: string
	const: text_completion
	model:
	type: string
	choices:
	type: array
	items:
	type: object
	properties:
	index:
	type: integer
	text:
	type: string
	finish_reason:
	type: string
	usage:
	$ref: "#/components/schemas/Usage"
	Usage:
	type: object
	properties:
	prompt_tokens:
	type: integer
	completion_tokens:
	type: integer
	total_tokens:
	type: integer
	Error:
	type: object
	properties:
	error:
	type: object
	properties:
	message:
	type: string
	type:
	type: string
	code:
	type: string