# ui/agent/respond.py from __future__ import annotations import json import os import time import uuid from typing import Any import gradio as gr from gradio import ChatMessage from huggingface_hub import InferenceClient from huggingface_hub.errors import HfHubHTTPError from .completion import complete_turn from .config import MAX_TOOL_ROUNDS, MODEL_ID, THINK_TOOLS, TOOLS, hub_token_available from .messages import history_to_api_messages, multimodal_input_to_api_content from .streaming import yield_response from .tools import emit_thinking_message, execute_tool_calls, should_emit_reasoning from .synthesis import build_structured_final_answer, findings_from_ui_messages AUTH_REQUIRED_MESSAGE = ( "Please sign in with your Hugging Face account to use Borderless. " "Click **Log in with Hugging Face** at the top of the page, then try again." ) SESSION_EXPIRED_MESSAGE = ( "Your Hugging Face session has expired. " "Click **Log in with Hugging Face** at the top of the page to sign in again, " "then retry your message." ) def _auth_error_message(exc: Exception) -> str | None: if ( isinstance(exc, HfHubHTTPError) and exc.response is not None and exc.response.status_code == 401 ): return SESSION_EXPIRED_MESSAGE message = str(exc).lower() if ( "401" in message or "unauthorized" in message or "oauth token has expired" in message ): return SESSION_EXPIRED_MESSAGE return None def respond( message: str | dict[str, Any], history: list[dict[str, Any]], system_message, max_tokens, temperature, top_p, globe_state: dict[str, Any], hf_token: gr.OAuthToken | None, ): """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ if hf_token is None and not hub_token_available(): yield from yield_response([], AUTH_REQUIRED_MESSAGE, globe_state) return if hf_token is not None and hf_token.expires_at <= time.time(): yield from yield_response([], SESSION_EXPIRED_MESSAGE, globe_state) return api_token = hf_token.token if hf_token is not None else os.environ.get("HF_TOKEN", "") client = InferenceClient(api_key=api_token, model=MODEL_ID) user_content = multimodal_input_to_api_content(message) if not user_content: yield from yield_response( [], "Please enter a message or attach a file.", globe_state, ) return api_messages: list[dict[str, Any]] = [ {"role": "system", "content": system_message}, *history_to_api_messages(history), {"role": "user", "content": user_content}, ] ui_messages: list[ChatMessage] = [] try: content, reasoning, tool_calls = complete_turn( client, api_messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, tools=THINK_TOOLS, tool_choice="required", ) if not tool_calls: thought = content or reasoning or "Plan the response before taking action." tool_calls = [ { "id": f"call_{uuid.uuid4().hex}", "type": "function", "function": { "name": "think", "arguments": json.dumps({"thought": thought}), }, } ] content = "" if should_emit_reasoning(reasoning, tool_calls): for ui_msg, globe_state in emit_thinking_message( ui_messages, reasoning, globe_state ): yield ui_msg, globe_state for ui_msg, globe_state in execute_tool_calls( api_messages, ui_messages, tool_calls, content, globe_state, ): yield ui_msg, globe_state for _ in range(MAX_TOOL_ROUNDS): content, reasoning, tool_calls = complete_turn( client, api_messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, tools=TOOLS, tool_choice="auto", ) if should_emit_reasoning(reasoning, tool_calls): for ui_msg, globe_state in emit_thinking_message( ui_messages, reasoning, globe_state ): yield ui_msg, globe_state if tool_calls: for ui_msg, globe_state in execute_tool_calls( api_messages, ui_messages, tool_calls, content, globe_state, ): yield ui_msg, globe_state continue answer = content if answer: yield from yield_response(ui_messages, answer, globe_state) return # Retry once with a required tool call when the model returned nothing. content, reasoning, tool_calls = complete_turn( client, api_messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, tools=TOOLS, tool_choice="required", ) if should_emit_reasoning(reasoning, tool_calls): for ui_msg, globe_state in emit_thinking_message( ui_messages, reasoning, globe_state ): yield ui_msg, globe_state if tool_calls: for ui_msg, globe_state in execute_tool_calls( api_messages, ui_messages, tool_calls, content, globe_state, ): yield ui_msg, globe_state continue answer = content if answer: yield from yield_response(ui_messages, answer, globe_state) return # Some providers return an empty first turn when tools are enabled. content, reasoning, _ = complete_turn( client, api_messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, tools=None, ) answer = content if answer: yield from yield_response(ui_messages, answer, globe_state) return yield from yield_response( ui_messages, "I could not generate a response. Please try again.", globe_state, ) return except Exception as exc: message = _auth_error_message(exc) or ( f"Sorry, something went wrong while generating a response: {exc}" ) yield from yield_response(ui_messages, message, globe_state) return try: content, reasoning, _ = complete_turn( client, api_messages + [ { "role": "user", "content": ( "Synthesize the research gathered so far into the best " "possible partial answer. Clearly label any items that " "still need verification on official sources." ), } ], max_tokens=max_tokens, temperature=temperature, top_p=top_p, tools=None, ) answer = content if answer: yield from yield_response(ui_messages, answer, globe_state) return except Exception: pass findings = findings_from_ui_messages(ui_messages) fallback_answer = build_structured_final_answer( profile_summary="", findings=findings, preamble=( "I could not complete model synthesis, but here is the structured " "research gathered so far." ), ) yield from yield_response(ui_messages, fallback_answer, globe_state)