Spaces:

huggingface
/

ml-intern-api

Running

App Files Files Community

ml-intern-api / backend /models.py

abidlabs HF Staff

Deploy ML Intern API (backend + self-documenting frontend)

1635e66 verified 5 days ago

Raw

History Blame Contribute Delete

7.17 kB

	"""Pydantic models for API requests and responses."""

	from enum import Enum
	from typing import Any, Literal

	from pydantic import BaseModel, Field


	class OpType(str, Enum):
	"""Operation types matching agent/core/agent_loop.py."""

	USER_INPUT = "user_input"
	EXEC_APPROVAL = "exec_approval"
	UNDO = "undo"
	COMPACT = "compact"
	SHUTDOWN = "shutdown"


	class Operation(BaseModel):
	"""Operation to be submitted to the agent."""

	op_type: OpType
	data: dict[str, Any] \| None = None


	class Submission(BaseModel):
	"""Submission wrapper with ID and operation."""

	id: str
	operation: Operation


	class ToolApproval(BaseModel):
	"""Approval decision for a single tool call."""

	tool_call_id: str
	approved: bool
	feedback: str \| None = None
	edited_script: str \| None = None
	namespace: str \| None = None


	class ApprovalRequest(BaseModel):
	"""Request to approve/reject tool calls."""

	session_id: str
	approvals: list[ToolApproval]


	class SubmitRequest(BaseModel):
	"""Request to submit user input."""

	session_id: str
	# Cap text size to prevent context-bloat / cost-amplification: a malicious
	# or runaway client could otherwise attach megabytes that then ride along
	# in every subsequent turn until /api/compact is called.
	text: str = Field(..., min_length=1, max_length=100_000)


	class TruncateRequest(BaseModel):
	"""Request to truncate conversation history to before a specific user message."""

	user_message_index: int


	class SessionResponse(BaseModel):
	"""Response when creating a new session."""

	session_id: str
	ready: bool = True
	model: str \| None = None


	class PendingApprovalTool(BaseModel):
	"""A tool waiting for user approval."""

	tool: str
	tool_call_id: str
	arguments: dict[str, Any] = {}


	class SessionAutoApprovalInfo(BaseModel):
	"""Per-session auto-approval budget state."""

	enabled: bool = False
	cost_cap_usd: float \| None = None
	estimated_spend_usd: float = 0.0
	remaining_usd: float \| None = None


	class SessionInfo(BaseModel):
	"""Session metadata."""

	session_id: str
	created_at: str
	usage_window_started_at: str \| None = None
	is_active: bool
	is_processing: bool = False
	message_count: int
	user_id: str = "dev"
	pending_approval: list[PendingApprovalTool] \| None = None
	model: str \| None = None
	title: str \| None = None
	notification_destinations: list[str] = Field(default_factory=list)
	auto_approval: SessionAutoApprovalInfo = Field(
	default_factory=SessionAutoApprovalInfo
	)


	class SessionNotificationsRequest(BaseModel):
	"""Replace the session's auto-notification destinations."""

	destinations: list[str]


	class SessionYoloRequest(BaseModel):
	"""Update a session's auto-approval policy."""

	enabled: bool
	cost_cap_usd: float \| None = Field(default=None, ge=0)


	class UsageBucket(BaseModel):
	"""App-attributed usage totals for a session."""

	session_id: str \| None = None
	total_usd: float = 0.0
	inference_usd: float = 0.0
	hf_jobs_estimated_usd: float = 0.0
	sandbox_estimated_usd: float = 0.0
	llm_calls: int = 0
	hf_jobs_count: int = 0
	sandbox_count: int = 0
	prompt_tokens: int = 0
	completion_tokens: int = 0
	cache_read_tokens: int = 0
	cache_creation_tokens: int = 0
	total_tokens: int = 0
	hf_jobs_billable_seconds_estimate: int = 0
	sandbox_billable_seconds_estimate: int = 0


	class HfAccountUsageBucket(BaseModel):
	"""HF account billing usage for a time window."""

	window_start: str \| None = None
	window_end: str \| None = None
	timezone: str \| None = None
	total_usd: float = 0.0
	inference_providers_usd: float = 0.0
	hf_jobs_usd: float = 0.0
	inference_provider_requests: int = 0
	hf_jobs_minutes: float = 0.0


	class HfInferenceProvidersCredits(BaseModel):
	"""Included and configured Inference Providers account credits."""

	included_usd: float = 0.0
	used_usd: float = 0.0
	remaining_included_usd: float = 0.0
	limit_usd: float = 0.0
	remaining_limit_usd: float = 0.0
	num_requests: int = 0
	period_start: str \| None = None
	period_end: str \| None = None


	class HfAccountUsage(BaseModel):
	"""Authoritative HF account billing usage from the signed-in token."""

	source: Literal["hf_billing"]
	available: bool = False
	error: str \| None = None
	current_session: HfAccountUsageBucket \| None = None
	month: HfAccountUsageBucket \| None = None
	inference_providers_credits: HfInferenceProvidersCredits \| None = None


	class UsageResponse(BaseModel):
	"""Current-user app-attributed usage response."""

	source: Literal["app_telemetry"]
	currency: Literal["USD"]
	generated_at: str
	timezone: str
	session: UsageBucket \| None = None
	hf_account: HfAccountUsage \| None = None
	auto_approval: SessionAutoApprovalInfo \| None = None
	links: dict[str, str] = Field(default_factory=dict)


	class DatasetUploadResponse(BaseModel):
	"""Response for a dataset file uploaded to the Hub."""

	session_id: str
	repo_id: str
	repo_type: Literal["dataset"] = "dataset"
	private: bool = True
	upload_id: str
	config_name: str
	filename: str
	path_in_repo: str
	size_bytes: int
	format: Literal["csv", "json", "jsonl"]
	hub_url: str
	load_dataset_snippet: str


	class V1InputMessage(BaseModel):
	"""One message in a /v1/responses structured input list."""

	role: Literal["user", "assistant", "system", "developer"] = "user"
	content: str = Field(..., min_length=1, max_length=100_000)


	class V1CreateResponseRequest(BaseModel):
	"""Body for POST /v1/responses (OpenAI Responses-API style)."""

	model: str \| None = None
	input: str \| list[V1InputMessage] = Field(..., max_length=100_000)
	instructions: str \| None = Field(default=None, max_length=20_000)
	background: bool = False
	stream: bool = False
	previous_response_id: str \| None = None
	# Session-cumulative YOLO auto-approval cap. None falls back to the
	# server default (DEFAULT_YOLO_COST_CAP_USD).
	max_cost_usd: float \| None = Field(default=None, gt=0, le=500)
	# How long a synchronous (non-stream, non-background) call waits for the
	# turn to finish before returning the in-progress response object.
	wait_timeout_seconds: float = Field(default=900, ge=1, le=3600)
	metadata: dict[str, str] \| None = None


	class V1ApprovalDecisionRequest(BaseModel):
	"""Body for POST /v1/responses/{id}/approvals."""

	approve: bool
	new_max_cost_usd: float \| None = Field(default=None, gt=0, le=500)
	feedback: str \| None = Field(default=None, max_length=10_000)


	class HealthResponse(BaseModel):
	"""Health check response."""

	status: str = "ok"
	active_sessions: int = 0
	max_sessions: int = 0


	class LLMHealthResponse(BaseModel):
	"""LLM provider health check response."""

	status: str # "ok" \| "error" \| "skipped"
	model: str
	error: str \| None = None
	error_type: str \| None = (
	None # "auth" \| "credits" \| "rate_limit" \| "network" \| "unknown"
	)