Spaces:

huggingface
/

ml-intern-api

Running

File size: 7,174 Bytes

1635e66

"""Pydantic models for API requests and responses."""

from enum import Enum
from typing import Any, Literal

from pydantic import BaseModel, Field


class OpType(str, Enum):
    """Operation types matching agent/core/agent_loop.py."""

    USER_INPUT = "user_input"
    EXEC_APPROVAL = "exec_approval"
    UNDO = "undo"
    COMPACT = "compact"
    SHUTDOWN = "shutdown"


class Operation(BaseModel):
    """Operation to be submitted to the agent."""

    op_type: OpType
    data: dict[str, Any] | None = None


class Submission(BaseModel):
    """Submission wrapper with ID and operation."""

    id: str
    operation: Operation


class ToolApproval(BaseModel):
    """Approval decision for a single tool call."""

    tool_call_id: str
    approved: bool
    feedback: str | None = None
    edited_script: str | None = None
    namespace: str | None = None


class ApprovalRequest(BaseModel):
    """Request to approve/reject tool calls."""

    session_id: str
    approvals: list[ToolApproval]


class SubmitRequest(BaseModel):
    """Request to submit user input."""

    session_id: str
    # Cap text size to prevent context-bloat / cost-amplification: a malicious
    # or runaway client could otherwise attach megabytes that then ride along
    # in every subsequent turn until /api/compact is called.
    text: str = Field(..., min_length=1, max_length=100_000)


class TruncateRequest(BaseModel):
    """Request to truncate conversation history to before a specific user message."""

    user_message_index: int


class SessionResponse(BaseModel):
    """Response when creating a new session."""

    session_id: str
    ready: bool = True
    model: str | None = None


class PendingApprovalTool(BaseModel):
    """A tool waiting for user approval."""

    tool: str
    tool_call_id: str
    arguments: dict[str, Any] = {}


class SessionAutoApprovalInfo(BaseModel):
    """Per-session auto-approval budget state."""

    enabled: bool = False
    cost_cap_usd: float | None = None
    estimated_spend_usd: float = 0.0
    remaining_usd: float | None = None


class SessionInfo(BaseModel):
    """Session metadata."""

    session_id: str
    created_at: str
    usage_window_started_at: str | None = None
    is_active: bool
    is_processing: bool = False
    message_count: int
    user_id: str = "dev"
    pending_approval: list[PendingApprovalTool] | None = None
    model: str | None = None
    title: str | None = None
    notification_destinations: list[str] = Field(default_factory=list)
    auto_approval: SessionAutoApprovalInfo = Field(
        default_factory=SessionAutoApprovalInfo
    )


class SessionNotificationsRequest(BaseModel):
    """Replace the session's auto-notification destinations."""

    destinations: list[str]


class SessionYoloRequest(BaseModel):
    """Update a session's auto-approval policy."""

    enabled: bool
    cost_cap_usd: float | None = Field(default=None, ge=0)


class UsageBucket(BaseModel):
    """App-attributed usage totals for a session."""

    session_id: str | None = None
    total_usd: float = 0.0
    inference_usd: float = 0.0
    hf_jobs_estimated_usd: float = 0.0
    sandbox_estimated_usd: float = 0.0
    llm_calls: int = 0
    hf_jobs_count: int = 0
    sandbox_count: int = 0
    prompt_tokens: int = 0
    completion_tokens: int = 0
    cache_read_tokens: int = 0
    cache_creation_tokens: int = 0
    total_tokens: int = 0
    hf_jobs_billable_seconds_estimate: int = 0
    sandbox_billable_seconds_estimate: int = 0


class HfAccountUsageBucket(BaseModel):
    """HF account billing usage for a time window."""

    window_start: str | None = None
    window_end: str | None = None
    timezone: str | None = None
    total_usd: float = 0.0
    inference_providers_usd: float = 0.0
    hf_jobs_usd: float = 0.0
    inference_provider_requests: int = 0
    hf_jobs_minutes: float = 0.0


class HfInferenceProvidersCredits(BaseModel):
    """Included and configured Inference Providers account credits."""

    included_usd: float = 0.0
    used_usd: float = 0.0
    remaining_included_usd: float = 0.0
    limit_usd: float = 0.0
    remaining_limit_usd: float = 0.0
    num_requests: int = 0
    period_start: str | None = None
    period_end: str | None = None


class HfAccountUsage(BaseModel):
    """Authoritative HF account billing usage from the signed-in token."""

    source: Literal["hf_billing"]
    available: bool = False
    error: str | None = None
    current_session: HfAccountUsageBucket | None = None
    month: HfAccountUsageBucket | None = None
    inference_providers_credits: HfInferenceProvidersCredits | None = None


class UsageResponse(BaseModel):
    """Current-user app-attributed usage response."""

    source: Literal["app_telemetry"]
    currency: Literal["USD"]
    generated_at: str
    timezone: str
    session: UsageBucket | None = None
    hf_account: HfAccountUsage | None = None
    auto_approval: SessionAutoApprovalInfo | None = None
    links: dict[str, str] = Field(default_factory=dict)


class DatasetUploadResponse(BaseModel):
    """Response for a dataset file uploaded to the Hub."""

    session_id: str
    repo_id: str
    repo_type: Literal["dataset"] = "dataset"
    private: bool = True
    upload_id: str
    config_name: str
    filename: str
    path_in_repo: str
    size_bytes: int
    format: Literal["csv", "json", "jsonl"]
    hub_url: str
    load_dataset_snippet: str


class V1InputMessage(BaseModel):
    """One message in a /v1/responses structured input list."""

    role: Literal["user", "assistant", "system", "developer"] = "user"
    content: str = Field(..., min_length=1, max_length=100_000)


class V1CreateResponseRequest(BaseModel):
    """Body for POST /v1/responses (OpenAI Responses-API style)."""

    model: str | None = None
    input: str | list[V1InputMessage] = Field(..., max_length=100_000)
    instructions: str | None = Field(default=None, max_length=20_000)
    background: bool = False
    stream: bool = False
    previous_response_id: str | None = None
    # Session-cumulative YOLO auto-approval cap. None falls back to the
    # server default (DEFAULT_YOLO_COST_CAP_USD).
    max_cost_usd: float | None = Field(default=None, gt=0, le=500)
    # How long a synchronous (non-stream, non-background) call waits for the
    # turn to finish before returning the in-progress response object.
    wait_timeout_seconds: float = Field(default=900, ge=1, le=3600)
    metadata: dict[str, str] | None = None


class V1ApprovalDecisionRequest(BaseModel):
    """Body for POST /v1/responses/{id}/approvals."""

    approve: bool
    new_max_cost_usd: float | None = Field(default=None, gt=0, le=500)
    feedback: str | None = Field(default=None, max_length=10_000)


class HealthResponse(BaseModel):
    """Health check response."""

    status: str = "ok"
    active_sessions: int = 0
    max_sessions: int = 0


class LLMHealthResponse(BaseModel):
    """LLM provider health check response."""

    status: str  # "ok" | "error" | "skipped"
    model: str
    error: str | None = None
    error_type: str | None = (
        None  # "auth" | "credits" | "rate_limit" | "network" | "unknown"
    )