Spaces:

build-small-hackathon
/

borderless

Running

File size: 3,083 Bytes

# ui/agent/completion.py
from __future__ import annotations

from typing import Any

from huggingface_hub import InferenceClient

from .messages import parse_text_tool_calls, parse_tool_calls


def complete_turn(

    client: InferenceClient,

    api_messages: list[dict[str, Any]],

    *,

    max_tokens: int,

    temperature: float,

    top_p: float,

    tools: list[dict[str, Any]] | None,

    tool_choice: Any = "auto",

) -> tuple[str, str, list[dict[str, Any]] | None]:
    """Run one model turn, preferring streaming and falling back to a single request."""
    content = ""
    reasoning = ""
    tool_calls_map: dict[int, dict[str, Any]] = {}

    stream = client.chat_completion(
        api_messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        tools=tools,
        tool_choice=tool_choice if tools else None,
        stream=True,
    )
    for chunk in stream:
        if not chunk.choices:
            continue
        delta = chunk.choices[0].delta
        if delta.content:
            content += delta.content
        if delta.reasoning:
            reasoning += delta.reasoning
        if delta.tool_calls:
            for tool_call in delta.tool_calls:
                idx = tool_call.index
                if idx not in tool_calls_map:
                    tool_calls_map[idx] = {
                        "id": tool_call.id,
                        "type": tool_call.type,
                        "function": {
                            "name": tool_call.function.name or "",
                            "arguments": tool_call.function.arguments or "",
                        },
                    }
                    continue
                if tool_call.function.name:
                    tool_calls_map[idx]["function"]["name"] = tool_call.function.name
                if tool_call.function.arguments:
                    tool_calls_map[idx]["function"]["arguments"] += (
                        tool_call.function.arguments
                    )

    text_tool_calls = parse_text_tool_calls(content) or parse_text_tool_calls(reasoning)
    tool_calls = list(tool_calls_map.values()) or text_tool_calls
    if content or reasoning or tool_calls:
        if text_tool_calls:
            content = ""
            reasoning = ""
        return content, reasoning, tool_calls

    response = client.chat_completion(
        api_messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        tools=tools,
        tool_choice=tool_choice if tools else None,
    )
    assistant_msg = response.choices[0].message
    content = assistant_msg.content or ""
    reasoning = assistant_msg.reasoning or ""
    text_tool_calls = parse_text_tool_calls(content) or parse_text_tool_calls(reasoning)
    tool_calls = parse_tool_calls(assistant_msg) or text_tool_calls
    return (
        "" if text_tool_calls else content,
        "" if text_tool_calls else reasoning,
        tool_calls,
    )