Spaces:

veeiiinnnnn
/

Qurio

Running

App Files Files Community

Qurio / backend-python /src /services /stream_chat.py

veeiiinnnnn

new

592cb1d 2 days ago

raw

history blame contribute delete

141 kB

	"""
	Stream chat service implemented with Agno SDK (Agent + tools + DB).
	"""

	from __future__ import annotations

	import ast
	import asyncio
	import json
	import os
	import re
	import time
	from collections.abc import AsyncGenerator
	from datetime import datetime
	from pathlib import Path
	from typing import Any
	from zoneinfo import ZoneInfo

	from agno.agent import Agent, RunEvent
	from agno.models.message import Message
	from agno.run.agent import RunOutput, ToolCallCompletedEvent, ToolCallStartedEvent
	from agno.run.team import TeamRunEvent
	from agno.utils.log import logger

	from ..models.stream_chat import (
	AgentStatusEvent,
	DoneEvent,
	ErrorEvent,
	FormRequestEvent, # New: HITL form request event
	SourceEvent,
	StreamChatRequest,
	TextEvent,
	ThoughtEvent,
	ToolCallEvent,
	ToolResultEvent,
	)
	from .agent_registry import get_agent_for_provider, build_team, resolve_agent_config
	from .hitl_storage import get_hitl_storage
	from .summary_service import update_session_summary
	from .tool_registry import resolve_tool_name

	MEMORY_OPTIMIZE_THRESHOLD = 50
	MEMORY_OPTIMIZE_INTERVAL_SECONDS = 60 * 60 * 12
	THINK_TAG_REGEX = re.compile(r"</?(?:think\|thought)>", re.IGNORECASE)
	PROTOCOL_TAG_REGEX = re.compile(
	r"(?:<\s[\|｜]\s(?P<tag>[a-zA-Z0-9_]+)\s[\|｜]\s>)"
	r"\|(?:<\s(?P<dsml_close>/?)\s[\|｜]\sDSML\s[\|｜]\s*(?P<dsml_body>[^>]+)>)",
	re.IGNORECASE,
	)
	TOOL_TRACE_BEGIN_TAGS = {
	"tool_calls_begin",
	"tool_calls_section_begin",
	"tool_call_begin",
	"tool_argument_begin",
	"tool_call_argument_begin",
	}
	TOOL_TRACE_END_TAGS = {
	"tool_argument_end",
	"tool_call_argument_end",
	"tool_call_end",
	"tool_calls_end",
	"tool_calls_section_end",
	}


	def _strip_internal_tool_trace(text: str) -> str:
	"""Remove explicit protocol marker tokens without truncating normal text."""
	if not text:
	return ""
	cleaned = str(text)
	cleaned = re.sub(r"</?(?:think\|thought)>", "", cleaned, flags=re.IGNORECASE)
	# Remove protocol markers like <\|tool_calls_begin\|> and spaced variants.
	cleaned = re.sub(r"<\s[\|｜]\s[^\|>\|｜]\s[\|｜]\s*>", "", cleaned, flags=re.IGNORECASE)
	cleaned = re.sub(r"</?\s[\|｜]\sDSML\s[\|｜]\s[^>]*>", "", cleaned, flags=re.IGNORECASE)
	cleaned = re.sub(r"</?(?:session_memory\|today_local_time)>", "", cleaned, flags=re.IGNORECASE)
	cleaned = re.sub(r"\[SYSTEM INJECTED CONTEXT\]", "", cleaned, flags=re.IGNORECASE)
	# Remove inline tool-call-like snippets leaked by some models.
	cleaned = re.sub(
	r"([:：]\s)?[a-zA-Z_][a-zA-Z0-9_]{1,80}\s\{[^{}\n]{0,1200}\}",
	"",
	cleaned,
	flags=re.IGNORECASE,
	)
	return cleaned

	def _split_content_by_think_tags(text: str, in_think: bool) -> tuple[list[tuple[str, str]], bool]:
	"""Split a content chunk into ordered thought/text segments by <think>/<thought> tags."""
	if not text:
	return [], in_think
	segments: list[tuple[str, str]] = []
	cursor = 0
	current_in_think = in_think
	for match in THINK_TAG_REGEX.finditer(text):
	start, end = match.span()
	if start > cursor:
	piece = text[cursor:start]
	if piece:
	segments.append(("thought" if current_in_think else "text", piece))
	tag = match.group(0).lower()
	current_in_think = not tag.startswith("</")
	cursor = end
	if cursor < len(text):
	piece = text[cursor:]
	if piece:
	segments.append(("thought" if current_in_think else "text", piece))
	return segments, current_in_think


	def _strip_inline_tool_protocol(
	text: str,
	tool_trace_depth: int,
	protocol_tail: str,
	) -> tuple[str, int, str, bool]:
	"""
	Strip inline tool-protocol payloads from content chunks safely across chunk boundaries.

	Returns:
	cleaned_text, next_tool_trace_depth, next_protocol_tail, had_protocol_tokens
	"""
	combined = f"{protocol_tail}{text or ''}"
	if not combined:
	return "", tool_trace_depth, "", False

	# Keep trailing incomplete protocol marker for next chunk.
	tail = ""
	tail_start = -1
	for m in re.finditer(r"<\s/?\s[\|｜]", combined):
	tail_start = m.start()
	if tail_start != -1 and ">" not in combined[tail_start:]:
	tail = combined[tail_start:]
	combined = combined[:tail_start]

	if not combined:
	return "", tool_trace_depth, tail, bool(tail)

	# Non-destructive stripping: remove marker tokens only.
	matches = list(PROTOCOL_TAG_REGEX.finditer(combined))
	had_protocol = bool(matches)
	if not had_protocol:
	return combined, 0, tail, bool(tail)

	cleaned = PROTOCOL_TAG_REGEX.sub("", combined)
	return cleaned, 0, tail, True


	def _squash_whitespace(text: Any) -> str:
	return re.sub(r"\s+", "", str(text or ""))


	def _extract_agent_info_from_event(
	run_event: Any,
	leader_id: str \| None = None,
	leader_name: str \| None = None,
	leader_emoji: str \| None = None,
	agent_metadata: dict[str, Any] \| None = None,
	) -> dict[str, Any]:
	"""
	Extract agent identification from a run event.

	For Team mode, member events have agent_id and agent_name directly on the event.
	If these are set, it's a member event; otherwise it's from the leader.

	Returns:
	dict with 'agent_id', 'agent_name', 'agent_role', 'agent_emoji', 'model', 'provider' keys
	"""
	agent_id = getattr(run_event, "agent_id", None)
	agent_name = getattr(run_event, "agent_name", None)
	agent_emoji = getattr(run_event, "agent_emoji", None)

	# Check if this matches the leader.
	# Important: some providers might use slightly different names, but if IDs match it's definitely leader.
	is_leader = False
	if leader_id and agent_id == leader_id:
	is_leader = True
	elif not agent_id and not agent_name:
	# Default to leader if no info is present
	is_leader = True
	elif not leader_id and agent_name == leader_name:
	is_leader = True
	# If the ID starts with 'qurio-' (default Agno IDs often follow this pattern)
	# and we are in team mode, and it's not explicitly a member ID in our metadata,
	# it's highly likely the leader's initialization event.
	elif str(agent_id or "").startswith("qurio-") and agent_metadata:
	is_leader = agent_id not in agent_metadata

	if is_leader:
	res = {
	"agent_id": leader_id,
	"agent_name": leader_name,
	"agent_role": "leader",
	"agent_emoji": leader_emoji,
	"model": None,
	"provider": None,
	}
	if agent_metadata and leader_id in agent_metadata:
	res.update(agent_metadata[leader_id])
	return res

	# Otherwise treat as member
	if agent_id or agent_name:
	# Log at DEBUG level to reduce main stream noise, as switch logs will provide context.
	logger.debug(f"[TEAM] Member event detected: agent_id={agent_id}, agent_name={agent_name}")
	res = {
	"agent_id": agent_id,
	"agent_name": agent_name,
	"agent_role": "member",
	"agent_emoji": agent_emoji,
	"model": None,
	"provider": None,
	}
	# Enrich from metadata if possible
	if agent_metadata:
	if agent_id in agent_metadata:
	res.update(agent_metadata[agent_id])
	elif agent_name in agent_metadata:
	res.update(agent_metadata[agent_name])
	elif not agent_id and agent_name: # Fallback lookup by name if ID missing on event
	for meta_id, meta in agent_metadata.items():
	if meta.get("name") == agent_name:
	res.update(meta)
	res["agent_id"] = meta_id
	break
	return res

	# Fallback to leader if totally ambiguous
	res = {
	"agent_id": leader_id,
	"agent_name": leader_name,
	"agent_role": "leader",
	"agent_emoji": leader_emoji,
	"model": None,
	"provider": None,
	}
	if agent_metadata and leader_id in agent_metadata:
	res.update(agent_metadata[leader_id])
	return res


	def _is_reasoning_duplicate_of_content(reasoning: str, content: str) -> bool:
	"""
	Detect provider chunks where answer text is mirrored in reasoning_content.
	This prevents answer paragraphs from being rendered as a second thought block.
	"""
	reasoning_flat = _squash_whitespace(reasoning)
	content_flat = _squash_whitespace(content)
	if not reasoning_flat or not content_flat:
	return False
	if reasoning_flat == content_flat:
	return True

	shorter, longer = (
	(reasoning_flat, content_flat)
	if len(reasoning_flat) <= len(content_flat)
	else (content_flat, reasoning_flat)
	)
	if len(shorter) < 12:
	return False
	if shorter in longer and len(shorter) >= int(len(longer) * 0.75):
	return True
	return False


	def _is_stream_trace_enabled() -> bool:
	value = str(os.getenv("QURIO_STREAM_TRACE", "")).strip().lower()
	return value in {"1", "true", "yes", "on", "debug"}


	def _is_verbose_logs_enabled() -> bool:
	value = str(os.getenv("QURIO_VERBOSE_LOGS", "0")).strip().lower()
	return value in {"1", "true", "yes", "on", "debug"}


	def _log_verbose_info(message: str) -> None:
	if _is_verbose_logs_enabled():
	logger.info(message)
	else:
	logger.debug(message)


	def _preview(text: Any, limit: int = 140) -> str:
	raw = str(text or "").replace("\n", "\\n")
	return raw[:limit] + ("..." if len(raw) > limit else "")


	def _extract_message_from_payload(payload: Any) -> str \| None:
	if payload is None:
	return None
	if hasattr(payload, "model_dump"):
	try:
	payload = payload.model_dump()
	except Exception:
	payload = str(payload)

	if isinstance(payload, dict):
	for key in ("message", "error", "detail", "msg"):
	value = payload.get(key)
	if isinstance(value, str) and value.strip():
	return value.strip()
	nested = _extract_message_from_payload(value)
	if nested:
	return nested
	for value in payload.values():
	nested = _extract_message_from_payload(value)
	if nested:
	return nested
	return None

	if isinstance(payload, (list, tuple)):
	for item in payload:
	nested = _extract_message_from_payload(item)
	if nested:
	return nested
	return None

	text = str(payload).strip()
	if not text:
	return None

	# Agno RunErrorEvent repr: RunErrorEvent(..., content='Unknown model error', ...)
	run_error_content_match = re.search(
	r"""content\s=\s(['"])(.*?)\1""",
	text,
	re.IGNORECASE \| re.DOTALL,
	)
	if run_error_content_match and run_error_content_match.group(2).strip():
	return run_error_content_match.group(2).strip()

	for parser in (json.loads, ast.literal_eval):
	try:
	parsed = parser(text)
	nested = _extract_message_from_payload(parsed)
	if nested:
	return nested
	except Exception:
	pass

	json_like = re.search(r"(\{[\s\S]*\})", text)
	if json_like:
	snippet = json_like.group(1)
	for parser in (json.loads, ast.literal_eval):
	try:
	parsed = parser(snippet)
	nested = _extract_message_from_payload(parsed)
	if nested:
	return nested
	except Exception:
	pass

	message_match = re.search(r"""['"]message['"]\s:\s['"](.+?)['"]""", text, re.IGNORECASE)
	if message_match and message_match.group(1).strip():
	return message_match.group(1).strip()

	return text


	def _extract_best_error_message(exc: Exception \| Any) -> str:
	"""Extract the most actionable provider message from nested exceptions."""
	generic_markers = ("unknown model error", "unknown error", "model provider error")

	def _is_generic(text: str) -> bool:
	lowered = text.strip().lower()
	return any(marker in lowered for marker in generic_markers)

	candidates: list[str] = []
	queue: list[Any] = [exc]
	seen: set[int] = set()
	while queue:
	current = queue.pop(0)
	if current is None:
	continue
	marker = id(current)
	if marker in seen:
	continue
	seen.add(marker)

	extracted = _extract_message_from_payload(current)
	if extracted and extracted.strip():
	candidates.append(extracted.strip())

	if isinstance(current, BaseException):
	queue.append(getattr(current, "__cause__", None))
	queue.append(getattr(current, "__context__", None))
	args = getattr(current, "args", None)
	if isinstance(args, tuple):
	queue.extend(args)

	for attr in ("content", "error", "message", "detail", "model_provider_data"):
	if hasattr(current, attr):
	queue.append(getattr(current, attr, None))

	for msg in candidates:
	if not _is_generic(msg):
	return msg
	if candidates:
	# Avoid dumping full event repr like "RunErrorEvent(...)" to UI.
	filtered = [msg for msg in candidates if not msg.strip().lower().startswith("runerrorevent(")]
	if filtered:
	return min(filtered, key=len)
	return min(candidates, key=len)
	return str(exc or "Unknown error")


	def _extract_text_chunk(run_event: Any) -> str:
	"""Extract assistant text only from explicit content fields.

	Shared between stream_chat() and _continue_hitl_run() to avoid duplication.
	"""
	provider_data = getattr(run_event, "model_provider_data", None)
	if isinstance(provider_data, dict):
	choices = provider_data.get("choices") or []
	if choices and isinstance(choices[0], dict):
	delta = choices[0].get("delta") or {}
	raw_content = delta.get("content")
	if isinstance(raw_content, str) and raw_content:
	return raw_content
	if isinstance(raw_content, list):
	parts: list[str] = []
	for item in raw_content:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if text_part:
	parts.append(str(text_part))
	elif isinstance(item, str) and item:
	parts.append(item)
	if parts:
	return "".join(parts)
	raw_reasoning = delta.get("reasoning_content")
	if isinstance(raw_reasoning, str) and raw_reasoning:
	return ""
	if isinstance(raw_reasoning, list):
	reasoning_parts: list[str] = []
	for item in raw_reasoning:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if text_part:
	reasoning_parts.append(str(text_part))
	elif isinstance(item, str) and item:
	reasoning_parts.append(item)
	if reasoning_parts:
	return ""

	content = getattr(run_event, "content", None)
	if isinstance(content, str) and content:
	return content
	if isinstance(content, list):
	parts: list[str] = []
	for item in content:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if isinstance(text_part, str) and text_part:
	parts.append(text_part)
	elif isinstance(item, str) and item:
	parts.append(item)
	if parts:
	return "".join(parts)

	if isinstance(provider_data, dict):
	choices = provider_data.get("choices") or []
	if choices and isinstance(choices[0], dict):
	delta = choices[0].get("delta") or {}
	raw_content = delta.get("content")
	if isinstance(raw_content, str) and raw_content:
	return raw_content
	if isinstance(raw_content, list):
	parts: list[str] = []
	for item in raw_content:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if text_part:
	parts.append(str(text_part))
	elif isinstance(item, str) and item:
	parts.append(item)
	if parts:
	return "".join(parts)
	return ""


	def _extract_reasoning_chunk(
	run_event: Any,
	trace_fn: Any = None,
	) -> str:
	"""Extract reasoning/thought content from a stream event.

	Shared between stream_chat() and _continue_hitl_run() to avoid duplication.
	``trace_fn`` is an optional callable(stage, **kwargs) for trace logging.
	"""
	def _trace(stage: str, **kwargs: Any) -> None:
	if trace_fn:
	trace_fn(stage, **kwargs)

	provider_data = getattr(run_event, "model_provider_data", None)
	if isinstance(provider_data, dict):
	choices = provider_data.get("choices") or []
	if choices and isinstance(choices[0], dict):
	delta = choices[0].get("delta") or {}
	raw_reasoning = delta.get("reasoning_content")
	if isinstance(raw_reasoning, str) and raw_reasoning:
	_trace("reasoning_source", source="provider_data.delta.reasoning_content")
	return raw_reasoning
	if isinstance(raw_reasoning, list):
	parts: list[str] = []
	for item in raw_reasoning:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if text_part:
	parts.append(str(text_part))
	elif isinstance(item, str) and item:
	parts.append(item)
	if parts:
	_trace("reasoning_source", source="provider_data.delta.reasoning_content[]")
	return "".join(parts)

	reasoning = getattr(run_event, "reasoning_content", None)
	if isinstance(reasoning, str) and reasoning:
	_trace("reasoning_source", source="run_event.reasoning_content")
	return reasoning
	if isinstance(reasoning, list):
	parts: list[str] = []
	for item in reasoning:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if text_part:
	parts.append(str(text_part))
	elif isinstance(item, str) and item:
	parts.append(item)
	if parts:
	_trace("reasoning_source", source="run_event.reasoning_content[]")
	return "".join(parts)

	if isinstance(provider_data, dict):
	choices = provider_data.get("choices") or []
	if choices and isinstance(choices[0], dict):
	delta = choices[0].get("delta") or {}
	raw_reasoning = delta.get("reasoning_content")
	if isinstance(raw_reasoning, str) and raw_reasoning:
	_trace("reasoning_source", source="provider_data.delta.reasoning_content")
	return raw_reasoning
	if isinstance(raw_reasoning, list):
	parts: list[str] = []
	for item in raw_reasoning:
	if isinstance(item, dict):
	text_part = item.get("text") or item.get("content")
	if text_part:
	parts.append(str(text_part))
	if parts:
	_trace("reasoning_source", source="provider_data.delta.reasoning_content[]")
	return "".join(parts)
	# NOTE:
	# Some providers (e.g. DeepSeek-compatible streams) may place
	# assistant answer tokens under `delta.reasoning`.
	# Treating that field as reasoning can misclassify answer text as thought.
	# Keep reasoning extraction strict to `reasoning_content` only.
	return ""


	def _is_raw_events_log_enabled() -> bool:
	value = str(os.getenv("QURIO_RAW_EVENTS_LOG", "0")).strip().lower()
	return value not in {"0", "false", "off", "no"}


	def _raw_events_log_path() -> Path:
	configured = str(os.getenv("QURIO_RAW_EVENTS_LOG_PATH", "")).strip()
	if configured:
	return Path(configured)
	logs_dir = Path(__file__).resolve().parents[2] / "logs"
	logs_dir.mkdir(parents=True, exist_ok=True)
	date_tag = datetime.utcnow().strftime("%Y%m%d")
	return logs_dir / f"agno_raw_events_{date_tag}.jsonl"


	def _append_raw_event_log(
	*,
	phase: str,
	request: StreamChatRequest,
	run_id: str \| None,
	run_event: Any,
	) -> None:
	if not _is_raw_events_log_enabled():
	return
	try:
	event_name = str(getattr(run_event, "event", "") or "")
	content_chunk = _extract_text_chunk(run_event)
	reasoning_chunk = _extract_reasoning_chunk(run_event)
	payload = {
	"timestamp": datetime.utcnow().isoformat() + "Z",
	"phase": phase,
	"provider": request.provider,
	"model": request.model,
	"conversation_id": request.conversation_id,
	"run_id": run_id or getattr(run_event, "run_id", None),
	"event_name": event_name,
	"raw_event_type": type(run_event).__name__,
	"has_content": bool(str(content_chunk or "").strip()),
	"has_reasoning_content": bool(str(reasoning_chunk or "").strip()),
	"content_preview": _preview(content_chunk),
	"reasoning_preview": _preview(reasoning_chunk),
	"raw_event": repr(run_event),
	}
	log_path = _raw_events_log_path()
	with log_path.open("a", encoding="utf-8") as f:
	f.write(json.dumps(payload, ensure_ascii=False) + "\n")
	except Exception:
	# Never break stream flow due to diagnostics logging failures.
	return


	def _extract_completed_content_and_output(
	run_event: Any,
	streamed_content: str = "",
	) -> tuple[str, Any]:
	"""
	Extract final assistant content/output from RunCompleted-style events.

	Shared between normal stream_chat() and HITL continuation to keep behavior aligned.
	"""
	agn_content = getattr(run_event, "content", None)
	run_response = getattr(run_event, "run_response", None)
	if not agn_content and run_response is not None:
	agn_content = getattr(run_response, "content", None)

	final_content = streamed_content or ""
	output = None

	# Structured output should override streamed text to preserve canonical payload.
	if agn_content and hasattr(agn_content, "model_dump"):
	output = agn_content
	final_content = json.dumps(agn_content.model_dump(), ensure_ascii=False)
	elif isinstance(agn_content, (dict, list)):
	output = agn_content
	final_content = json.dumps(agn_content, ensure_ascii=False)
	elif isinstance(agn_content, str) and agn_content.strip() and not final_content:
	final_content = agn_content

	# Fallback for providers that only keep final assistant text in run_response.messages.
	if not final_content and run_response is not None:
	try:
	rr_messages = getattr(run_response, "messages", None) or []
	for rr_msg in reversed(rr_messages):
	rr_role = getattr(rr_msg, "role", None)
	rr_content = getattr(rr_msg, "content", None)
	if rr_role != "assistant":
	continue
	extracted = _extract_text_from_message_content(rr_content).strip()
	if extracted:
	final_content = extracted
	break
	except Exception:
	pass

	return final_content, output


	def _extract_text_from_message_content(content: Any) -> str:
	"""
	Best-effort text extraction for provider-specific assistant message payloads.
	"""
	if isinstance(content, str):
	return content

	if isinstance(content, list):
	parts: list[str] = []
	for item in content:
	if isinstance(item, str):
	parts.append(item)
	continue
	if isinstance(item, dict):
	text_part = item.get("text")
	if isinstance(text_part, str):
	parts.append(text_part)
	continue
	content_part = item.get("content")
	if isinstance(content_part, str):
	parts.append(content_part)
	continue
	if isinstance(content_part, (list, dict)):
	nested = _extract_text_from_message_content(content_part)
	if nested:
	parts.append(nested)
	parts_part = item.get("parts")
	if isinstance(parts_part, (list, dict)):
	nested = _extract_text_from_message_content(parts_part)
	if nested:
	parts.append(nested)
	return "".join(parts)

	if isinstance(content, dict):
	text_part = content.get("text")
	if isinstance(text_part, str):
	return text_part
	content_part = content.get("content")
	if isinstance(content_part, str):
	return content_part
	if isinstance(content_part, (list, dict)):
	nested = _extract_text_from_message_content(content_part)
	if nested:
	return nested
	parts_part = content.get("parts")
	if isinstance(parts_part, (list, dict)):
	nested = _extract_text_from_message_content(parts_part)
	if nested:
	return nested

	return ""


	def _coerce_tool_result_payload(output: Any) -> Any:
	"""
	Normalize tool output payload into JSON-friendly objects when possible.
	"""
	normalized = output
	if normalized and isinstance(normalized, str):
	try:
	normalized = json.loads(normalized)
	except json.JSONDecodeError:
	pass
	if isinstance(normalized, str):
	try:
	parsed = ast.literal_eval(normalized)
	if isinstance(parsed, dict):
	normalized = parsed
	except (ValueError, SyntaxError):
	pass
	return normalized


	def _build_tool_result_event(
	tool: Any,
	duration_ms: int \| None,
	normalize_tool_output_fn: Any,
	agent_info: dict[str, str \| None] \| None = None,
	) -> tuple[dict[str, Any], Any]:
	"""
	Build frontend ToolResultEvent payload and return parsed tool output.
	"""
	output = _coerce_tool_result_payload(normalize_tool_output_fn(getattr(tool, "result", None)))
	event = ToolResultEvent(
	id=getattr(tool, "tool_call_id", None),
	name=getattr(tool, "tool_name", "") or "",
	status="done" if not getattr(tool, "tool_call_error", None) else "error",
	output=output,
	durationMs=duration_ms,
	agent_id=agent_info.get("agent_id") if agent_info else None,
	agent_name=agent_info.get("agent_name") if agent_info else None,
	agent_role=agent_info.get("agent_role") if agent_info else None,
	agent_emoji=agent_info.get("agent_emoji") if agent_info else None,
	).model_dump(by_alias=True, exclude_none=True)
	return event, output


	def _build_tool_call_event(
	tool: Any,
	text_index: int,
	include_none: bool = False,
	agent_info: dict[str, str \| None] \| None = None,
	) -> dict[str, Any]:
	payload = ToolCallEvent(
	id=getattr(tool, "tool_call_id", None),
	name=getattr(tool, "tool_name", "") or "",
	arguments=json.dumps(getattr(tool, "tool_args", None) or {}),
	text_index=text_index,
	agent_id=agent_info.get("agent_id") if agent_info else None,
	agent_name=agent_info.get("agent_name") if agent_info else None,
	agent_role=agent_info.get("agent_role") if agent_info else None,
	agent_emoji=agent_info.get("agent_emoji") if agent_info else None,
	)
	if include_none:
	return payload.model_dump(by_alias=True, exclude_none=False)
	return payload.model_dump(by_alias=True, exclude_none=True)


	def _normalize_interactive_form_fields(raw_fields: Any) -> list[dict[str, Any]]:
	"""
	Normalize interactive_form fields to a strict list[dict].

	Some providers/tool runtimes return fields as a JSON string; this helper
	parses and sanitizes that shape so FormRequestEvent validation won't fail.
	"""
	parsed = raw_fields

	if isinstance(parsed, str):
	text = parsed.strip()
	if not text:
	return []
	try:
	parsed = json.loads(text)
	except Exception:
	try:
	parsed = ast.literal_eval(text)
	except Exception:
	logger.warning("interactive_form fields is invalid string, fallback to empty list")
	return []

	if isinstance(parsed, dict):
	maybe_fields = parsed.get("fields")
	if isinstance(maybe_fields, list):
	parsed = maybe_fields
	else:
	return []

	if not isinstance(parsed, list):
	return []

	normalized: list[dict[str, Any]] = []
	used_names: set[str] = set()

	def _slugify_name(value: Any, fallback_index: int) -> str:
	base = re.sub(r"[^a-zA-Z0-9_]+", "_", str(value or "").strip().lower()).strip("_")
	if not base:
	base = f"field_{fallback_index}"
	candidate = base
	suffix = 2
	while candidate in used_names:
	candidate = f"{base}_{suffix}"
	suffix += 1
	used_names.add(candidate)
	return candidate

	def _normalize_field_type(value: Any) -> str:
	candidate = str(value or "").strip().lower()
	if candidate in {"text", "number", "select", "checkbox", "range"}:
	return candidate
	return "text"

	for idx, item in enumerate(parsed, start=1):
	if isinstance(item, str):
	label = item.strip()
	if not label:
	continue
	normalized.append(
	{
	"name": _slugify_name(label, idx),
	"label": label,
	"type": "text",
	"required": False,
	}
	)
	continue

	if not isinstance(item, dict):
	logger.warning("interactive_form field item is not dict, skipped: %s", type(item).__name__)
	continue

	raw_name = item.get("name")
	raw_label = item.get("label")
	label = str(raw_label or raw_name or f"Field {idx}").strip() or f"Field {idx}"
	field_name = _slugify_name(raw_name or label, idx)
	field_type = _normalize_field_type(item.get("type"))

	normalized_item = dict(item)
	normalized_item["name"] = field_name
	normalized_item["label"] = label
	normalized_item["type"] = field_type
	normalized_item["required"] = bool(item.get("required", False))
	normalized.append(normalized_item)

	return normalized


	def _extract_interactive_form_payload(req: Any, default_title: str) -> tuple[str \| None, str, list[dict[str, Any]]]:
	"""Extract interactive form payload from requirement in a validation-safe way."""
	tool_args = req.tool_execution.tool_args if getattr(req, "tool_execution", None) else {}
	if not isinstance(tool_args, dict):
	tool_args = {}
	form_id = tool_args.get("id")
	title = str(tool_args.get("title") or default_title)
	fields = _normalize_interactive_form_fields(tool_args.get("fields", []))
	return form_id, title, fields


	def _is_interactive_form_requirement(req: Any) -> bool:
	tool_exec = getattr(req, "tool_execution", None)
	tool_name = getattr(tool_exec, "tool_name", None) if tool_exec else None
	return tool_name == "interactive_form"

	class StreamChatService:
	"""Stream chat service implemented using Agno Agent streaming events."""

	def __init__(self) -> None:
	self._last_memory_optimization: dict[str, float] = {}

	async def stream_chat(
	self,
	request: StreamChatRequest,
	) -> AsyncGenerator[dict[str, Any], None]:
	"""
	Stream chat completion with HITL support.

	If request.run_id is present, this is a resumption request after form submission.
	Otherwise, this is a normal chat request.
	"""
	# ================================================================
	# HITL: Check if this is a resumption request
	# ================================================================
	if request.run_id and request.field_values:
	_log_verbose_info(f"Detected HITL resumption request (run_id: {request.run_id})")
	async for event in self._continue_hitl_run(request):
	yield event
	return

	# Debug: Log request fields for expert mode
	if request.expert_mode:
	logger.info(f"[DEBUG] Expert mode request - leader_agent_id: {getattr(request, 'leader_agent_id', 'NOT_FOUND')}, team_agent_ids: {getattr(request, 'team_agent_ids', [])}")

	# ================================================================
	# Normal chat flow
	# ================================================================
	try:
	if not request.provider:
	raise ValueError("Missing required field: provider")
	if not request.messages:
	raise ValueError("Missing required field: messages")

	# Enable skills for the definitive user-facing chat agent
	request.enable_skills = True

	# Build standard agent or Team
	agent_metadata: dict[str, Any] = {}
	if request.expert_mode and getattr(request, "team_agent_ids", []):
	# Log leader configuration for debugging
	logger.info(f"[TEAM] Building team - leader_agent_id: {getattr(request, 'leader_agent_id', None)}, team_agent_ids: {request.team_agent_ids}")

	# 1. Resolve Leader Configuration if ID is provided
	if getattr(request, "leader_agent_id", None):
	request = resolve_agent_config(request.leader_agent_id, request)
	logger.info(f"[TEAM] Leader resolved - agent_id: {getattr(request, 'agent_id', None)}, agent_name: {getattr(request, 'agent_name', None)}")
	if request.agent_id:
	agent_metadata[request.agent_id] = {
	"model": request.model,
	"provider": request.provider,
	}

	# 2. Resolve Member Agents
	members = []
	for a_id in request.team_agent_ids:
	import copy
	sub_req = copy.deepcopy(request)
	sub_req.expert_mode = False
	# Fetch actual member config from DB
	member_req = resolve_agent_config(a_id, sub_req)
	members.append(get_agent_for_provider(member_req))
	# Capture member metadata
	if member_req.agent_id:
	agent_metadata[member_req.agent_id] = {
	"model": member_req.model,
	"provider": member_req.provider,
	}
	if member_req.agent_name:
	agent_metadata[member_req.agent_name] = {
	"model": member_req.model,
	"provider": member_req.provider,
	}

	# 3. Build the Team with resolved leader (request) and members
	agent = build_team(request, members)
	is_team_mode = True
	else:
	agent = get_agent_for_provider(request)
	is_team_mode = False
	sources_map: dict[str, Any] = {}
	full_content = ""
	full_thought = ""
	tool_start_times: dict[str, float] = {}
	should_break_next_thought = False
	in_reasoning_phase = False
	reasoning_closed_for_current_cycle = False
	in_content_think_block = False
	inline_tool_trace_depth = 0
	inline_protocol_tail = ""
	stream_trace = _is_stream_trace_enabled()
	# Current agent info for Team mode (updated per event)
	current_agent_info: dict[str, Any] = {"agent_id": None, "agent_name": None}
	last_active_agent_id = None

	def trace_stream(stage: str, **kwargs: Any) -> None:
	if not stream_trace:
	return
	payload = ", ".join([f"{k}={v}" for k, v in kwargs.items()])
	logger.info(f"[STREAM_TRACE][main] {stage} \| {payload}")

	def emit_thought_part(part: str):
	nonlocal full_thought, full_content, should_break_next_thought, in_reasoning_phase, reasoning_closed_for_current_cycle
	text = _strip_internal_tool_trace(str(part or ""))
	if not text or not text.strip():
	return

	should_break_next_thought = False
	in_reasoning_phase = True
	full_thought += text
	trace_stream("emit_reasoning", reasoning_preview=_preview(text))
	current_text_index = len(full_content)
	yield ThoughtEvent(
	content=text,
	text_index=current_text_index,
	agent_id=current_agent_info.get("agent_id"),
	agent_name=current_agent_info.get("agent_name"),
	agent_status=current_agent_info.get("status"),
	).model_dump(by_alias=True, exclude_none=True)

	def process_text(text: str):
	nonlocal full_content, in_reasoning_phase, should_break_next_thought, reasoning_closed_for_current_cycle
	clean_text = _strip_internal_tool_trace(text)
	if clean_text:
	has_visible_text = bool(clean_text.strip())
	if has_visible_text:
	in_reasoning_phase = False
	should_break_next_thought = True
	reasoning_closed_for_current_cycle = True
	full_content += clean_text
	yield TextEvent(
	content=clean_text,
	agent_id=current_agent_info.get("agent_id"),
	agent_name=current_agent_info.get("agent_name"),
	agent_status=current_agent_info.get("status"),
	).model_dump(by_alias=True, exclude_none=True)

	# Agent status tracking for Team mode
	agent_statuses: dict[str, str] = {}
	def set_agent_status(agent_id: str \| None, status: str):
	if not agent_id: return
	if agent_statuses.get(agent_id) == status: return
	agent_statuses[agent_id] = status
	return AgentStatusEvent(agentId=agent_id, status=status).model_dump(by_alias=True)

	async def update_status_and_yield(agent_id: str \| None, status: str):
	event = set_agent_status(agent_id, status)
	if event:
	yield event

	# Context management now handled by Agno's num_history_runs parameter
	messages = request.messages
	pre_events: list[dict[str, Any]] = []

	messages = self._inject_local_time_context(messages, request, pre_events)
	enabled_tool_names = self._collect_enabled_tool_names(request)
	messages = self._inject_tool_guidance(messages, enabled_tool_names, request)

	for event in pre_events:
	yield event

	# ================================================================
	# MANUAL CONTEXT MANAGEMENT (Rolling Summary + Fixed Window)
	# ================================================================

	# 1. Fetch Session Summary from DB
	session_summary_text = None
	old_summary_json = None
	if request.conversation_id:
	try:
	from ..models.db import DbFilter, DbQueryRequest
	from .db_service import execute_db_async, get_db_adapter

	adapter = get_db_adapter(request.database_provider)
	if adapter:
	req = DbQueryRequest(
	providerId=adapter.config.id,
	action="select",
	table="conversations",
	columns=["session_summary"],
	filters=[DbFilter(op="eq", column="id", value=request.conversation_id)],
	maybeSingle=True,
	)

	result = await execute_db_async(adapter, req)

	if result.data and isinstance(result.data, dict):
	row = result.data
	raw_summary = row.get("session_summary")

	if raw_summary:
	# Parsing handled by adapter often, but double check
	if isinstance(raw_summary, str):
	try:
	old_summary_json = json.loads(raw_summary)
	except (ValueError, json.JSONDecodeError):
	pass
	elif isinstance(raw_summary, dict):
	old_summary_json = raw_summary

	if old_summary_json:
	session_summary_text = old_summary_json.get("summary")
	except Exception as e:
	logger.warning(f"Failed to fetch session summary: {e}")
	logger.error(f"Failed to fetch session summary: {e}")

	# 2. Slice History (Turn-Based Window)
	# Strategy: Keep all System messages + Last N User turns (User + AI + Tools)
	# N comes from frontend context setting: contextTurns.
	raw_turn_limit = request.context_turn_limit
	turn_limit = (
	max(1, min(50, int(raw_turn_limit)))
	if isinstance(raw_turn_limit, int) and raw_turn_limit > 0
	else 2
	)

	# Separate System and Non-System
	system_messages = [m for m in messages if m.get("role") == "system"]
	chat_messages = [m for m in messages if m.get("role") != "system"]

	# Find the indices of User messages to determine run boundaries
	user_indices = [i for i, m in enumerate(chat_messages) if m.get("role") == "user"]

	user_turn_count = len(user_indices)
	if user_turn_count > turn_limit:
	cutoff_index = user_indices[-turn_limit]
	recent_history = chat_messages[cutoff_index:]
	else:
	recent_history = chat_messages

	# For single-turn requests (common during first-turn regenerate),
	# using persisted summary can re-introduce stale assistant text.
	# In this case, use fresh request messages only and rebuild summary from this turn.
	is_single_user_turn = user_turn_count <= 1
	# Force rebuild if it's the first turn OR if the user is editing/regenerating
	should_rebuild_summary = bool(is_single_user_turn or request.is_editing)
	# Inject summary only when history exceeds turn window and request is not rebuild flow.
	should_inject_summary = bool(session_summary_text) and (user_turn_count > turn_limit) and (not should_rebuild_summary)
	if not should_inject_summary and session_summary_text:
	_log_verbose_info(
	"Skipping session summary injection (within turn window or single-turn rebuild context)."
	)
	session_summary_text = None
	old_summary_json = None

	# 3. Inject Summary into System Prompt
	if session_summary_text:
	summary_prompt = (
	"\n\nSession memory summary:\n"
	"Here is a summary of the conversation so far. Use this to understand long-term context, "
	"but prioritize the details in the recent messages below.\n"
	f"{session_summary_text}\n"
	)
	# Inject into the LAST system message, or create a new one if none exist
	if system_messages:
	last_sys = system_messages[-1]
	# Avoid appending if already present (defensive)
	if "Session memory summary:" not in str(last_sys.get("content", "")):
	new_content = str(last_sys.get("content", "")) + summary_prompt
	# Update the dict (need to be careful not to mutate original request list in place if reused, but here it's fine)
	last_sys["content"] = new_content
	else:
	system_messages.append({"role": "system", "content": summary_prompt})

	# Final Agent Input
	agent_input = system_messages + recent_history

	stream = agent.arun(
	input=agent_input,
	stream=True,
	stream_events=True,
	user_id=request.user_id,
	session_id=request.conversation_id,
	# Only pass explicit structured-output schema.
	# Do not fallback to response_format, otherwise {"type":"json_object"}
	# may be treated as grammar and trigger provider-side grammar cache errors.
	output_schema=request.output_schema,
	)

	# ================================================================
	# Stream processing with HITL support
	# ================================================================
	async for run_event in stream:
	_append_raw_event_log(
	phase="main",
	request=request,
	run_id=getattr(run_event, "run_id", None),
	run_event=run_event,
	)
	# ============================================================
	# HITL: Check if agent paused for user input
	# ============================================================
	if hasattr(run_event, 'is_paused') and run_event.is_paused:
	logger.info(f"Agent paused for HITL (run_id: {run_event.run_id})")

	# Extract requirements
	requirements = getattr(run_event, 'active_requirements', None) or getattr(run_event, 'requirements', None)

	if requirements:
	form_requirements = [req for req in requirements if _is_interactive_form_requirement(req)]
	if not form_requirements:
	logger.info("Agent paused without interactive_form; skipping HITL form handling")
	yield DoneEvent(
	content=full_content or "",
	thought=full_thought.strip() or None,
	sources=list(sources_map.values()) or None,
	).model_dump()
	return

	# Save to Supabase
	try:
	paused_tools = getattr(run_event, "tools", None) or []
	serialized_tools = [
	tool.to_dict() if hasattr(tool, "to_dict") else tool
	for tool in paused_tools
	if tool is not None
	]
	serialized_requirements = [
	req.to_dict() if hasattr(req, "to_dict") else req
	for req in form_requirements
	]
	paused_run_output = {
	"run_id": getattr(run_event, "run_id", None),
	"session_id": getattr(run_event, "session_id", None)
	or request.conversation_id,
	"user_id": request.user_id,
	"messages": messages or [],
	"tools": serialized_tools,
	"requirements": serialized_requirements,
	"status": "PAUSED",
	}
	logger.info(
	f"[HITL] Saving pending run_id={run_event.run_id} "
	f"with database_provider={request.database_provider}"
	)
	hitl_storage = get_hitl_storage(request.database_provider)
	saved = await hitl_storage.save_pending_run(
	run_id=run_event.run_id,
	requirements=form_requirements,
	conversation_id=request.conversation_id,
	user_id=request.user_id,
	agent_model=request.model,
	messages=messages,
	run_output=paused_run_output,
	)
	if not saved:
	raise RuntimeError("Failed to persist HITL pending run")

	# Extract form fields for frontend
	for req in form_requirements:
	# Handle external execution (e.g., interactive_form with external_execution=True)
	if (hasattr(req, 'needs_external_execution') and req.needs_external_execution) or \
	(req.tool_execution and req.tool_execution.tool_name == "interactive_form"):
	form_id, title, fields = _extract_interactive_form_payload(
	req,
	default_title="Please provide the following information",
	)

	# Send form_request event to frontend
	yield FormRequestEvent(
	run_id=run_event.run_id,
	form_id=form_id,
	title=title,
	fields=fields
	).model_dump()

	# Fallback handle traditional user input (e.g., get_user_input)
	elif req.needs_user_input and req.user_input_schema:
	# Convert from user_input_schema
	form_id = None
	title = "Please provide the following information"
	fields = [
	{
	"name": field.name,
	"type": self._map_field_type_to_frontend(field.field_type),
	"label": field.description or field.name,
	"required": True,
	"value": field.value
	}
	for field in req.user_input_schema
	]

	# Send form_request event to frontend
	yield FormRequestEvent(
	run_id=run_event.run_id,
	form_id=form_id,
	title=title,
	fields=fields
	).model_dump()

	# Send done event to indicate pause
	yield DoneEvent(
	content=full_content or "",
	thought=full_thought.strip() or None,
	sources=list(sources_map.values()) or None,
	).model_dump()

	_log_verbose_info(f"HITL pause successful, waiting for user submission (run_id: {run_event.run_id})")
	return # Exit stream, wait for user to submit form

	except Exception as e:
	logger.error(f"Failed to save HITL state: {e}")
	yield ErrorEvent(error=f"Failed to pause for form: {str(e)}").model_dump()
	return
	else:
	logger.warning("Agent paused but no requirements found")
	yield DoneEvent(
	content=full_content or "",
	thought=full_thought.strip() or None,
	sources=list(sources_map.values()) or None,
	).model_dump()
	return

	# ============================================================
	# Normal streaming events (use stream_events for details)
	# ============================================================
	# Check if this is a detailed event (from stream_events=True)
	if hasattr(run_event, 'event'):
	# Extract agent info for Team mode (member vs leader identification)
	current_agent_info = _extract_agent_info_from_event(
	run_event,
	leader_id=request.agent_id,
	leader_name=request.agent_name,
	leader_emoji=request.agent_emoji,
	agent_metadata=agent_metadata,
	)

	# Log active agent switch in Team mode
	if is_team_mode:
	current_id = current_agent_info.get("agent_id")
	if current_id != last_active_agent_id:
	last_active_agent_id = current_id
	active_name = current_agent_info.get("agent_name")
	active_role = current_agent_info.get("agent_role")
	active_model = current_agent_info.get("model")
	active_provider = current_agent_info.get("provider")
	logger.info(
	f"[TEAM] >>> Active Agent Switch: {active_name} ({active_role}) "
	f"\| Model: {active_model} \| Provider: {active_provider}"
	)

	# Apply current tracked status to info for text/thought events
	current_agent_info["status"] = agent_statuses.get(current_id, "active")

	if current_agent_info.get("agent_role") == "member":
	trace_stream(
	"member_event",
	agent_id=current_agent_info.get("agent_id"),
	agent_name=current_agent_info.get("agent_name"),
	)

	match run_event.event:
	case RunEvent.run_started.value \| TeamRunEvent.run_started:
	if is_team_mode:
	active_id = current_agent_info.get("agent_id")
	active_name = current_agent_info.get("agent_name")
	active_role = current_agent_info.get("agent_role")
	active_model = current_agent_info.get("model")
	active_provider = current_agent_info.get("provider")
	logger.info(
	f"[TEAM] >>> run_started: {active_name} ({active_role}) "
	f"\| Model: {active_model} \| Provider: {active_provider}"
	)

	# Member starts -> Leader waits, Member active
	if active_role == "member":
	# Ensure leader is set to waiting when member starts
	async for e in update_status_and_yield(request.agent_id, "waiting"):
	yield e
	async for e in update_status_and_yield(active_id, "active"):
	yield e
	else:
	# Leader starts -> Leader active
	async for e in update_status_and_yield(active_id, "active"):
	yield e
	continue

	case TeamRunEvent.run_completed:
	if is_team_mode:
	active_id = current_agent_info.get("agent_id")
	active_role = current_agent_info.get("agent_role")
	if active_role == "member":
	# Member finished -> Leader still waiting (until it resumes), Member ready
	async for e in update_status_and_yield(active_id, "ready"):
	yield e
	continue

	# Handle both Agent RunEvent and Team TeamRunEvent for content streaming
	case RunEvent.run_content.value \| TeamRunEvent.run_content:
	raw_content_chunk = _extract_text_chunk(run_event)
	raw_content_chunk, inline_tool_trace_depth, inline_protocol_tail, had_protocol = _strip_inline_tool_protocol(
	raw_content_chunk,
	inline_tool_trace_depth,
	inline_protocol_tail,
	)
	if had_protocol:
	trace_stream(
	"strip_tool_protocol",
	depth=inline_tool_trace_depth,
	tail_len=len(inline_protocol_tail),
	cleaned_preview=_preview(raw_content_chunk),
	)
	raw_reasoning = _extract_reasoning_chunk(run_event, trace_fn=trace_stream)
	content_segments, in_content_think_block = _split_content_by_think_tags(
	raw_content_chunk,
	in_content_think_block,
	)
	content_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "text"
	)
	inline_thought_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "thought"
	)
	reasoning = raw_reasoning
	if reasoning and content_chunk and _is_reasoning_duplicate_of_content(
	str(reasoning),
	str(content_chunk),
	):
	trace_stream(
	"suppress_reasoning_overlap",
	event="run_content",
	reasoning_preview=_preview(reasoning),
	content_preview=_preview(content_chunk),
	)
	reasoning = ""
	has_content_chunk = bool(content_chunk)
	has_inline_thought = bool(inline_thought_chunk)
	trace_stream(
	"run_content",
	has_content=has_content_chunk,
	has_reasoning=bool(reasoning) or has_inline_thought,
	reasoning_closed=reasoning_closed_for_current_cycle,
	content_preview=_preview(content_chunk),
	reasoning_preview=_preview((reasoning or "") + (inline_thought_chunk or "")),
	)

	has_any_thought = bool(reasoning) or has_inline_thought
	if has_any_thought and reasoning_closed_for_current_cycle:
	# Re-open reasoning phase (e.g. after tool call or interleaved model output).
	reasoning_closed_for_current_cycle = False
	in_reasoning_phase = False
	should_break_next_thought = True

	if reasoning:
	for event in emit_thought_part(str(reasoning)):
	yield event
	if has_inline_thought:
	for event in emit_thought_part(str(inline_thought_chunk)):
	yield event

	if content_chunk:
	for e in process_text(content_chunk):
	yield e

	case RunEvent.reasoning_content_delta.value \| TeamRunEvent.reasoning_content_delta:
	raw_content_chunk = _extract_text_chunk(run_event)
	raw_content_chunk, inline_tool_trace_depth, inline_protocol_tail, had_protocol = _strip_inline_tool_protocol(
	raw_content_chunk,
	inline_tool_trace_depth,
	inline_protocol_tail,
	)
	if had_protocol:
	trace_stream(
	"strip_tool_protocol",
	depth=inline_tool_trace_depth,
	tail_len=len(inline_protocol_tail),
	cleaned_preview=_preview(raw_content_chunk),
	)
	raw_reasoning = _extract_reasoning_chunk(run_event)
	content_segments, in_content_think_block = _split_content_by_think_tags(
	raw_content_chunk,
	in_content_think_block,
	)
	content_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "text"
	)
	inline_thought_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "thought"
	)
	reasoning = raw_reasoning
	if reasoning and content_chunk and _is_reasoning_duplicate_of_content(
	str(reasoning),
	str(content_chunk),
	):
	trace_stream(
	"suppress_reasoning_overlap",
	event="reasoning_content_delta",
	reasoning_preview=_preview(reasoning),
	content_preview=_preview(content_chunk),
	)
	reasoning = ""
	has_content_chunk = bool(content_chunk)
	has_inline_thought = bool(inline_thought_chunk)
	trace_stream(
	"reasoning_delta",
	has_content=has_content_chunk,
	has_reasoning=bool(reasoning) or has_inline_thought,
	reasoning_closed=reasoning_closed_for_current_cycle,
	content_preview=_preview(content_chunk),
	reasoning_preview=_preview((reasoning or "") + (inline_thought_chunk or "")),
	)

	has_any_thought = bool(reasoning) or has_inline_thought
	if has_any_thought and reasoning_closed_for_current_cycle:
	reasoning_closed_for_current_cycle = False
	in_reasoning_phase = False
	should_break_next_thought = True

	if reasoning:
	for event in emit_thought_part(str(reasoning)):
	yield event
	if has_inline_thought:
	for event in emit_thought_part(str(inline_thought_chunk)):
	yield event

	if content_chunk:
	for e in process_text(content_chunk):
	yield e
	case RunEvent.tool_call_started.value \| TeamRunEvent.tool_call_started:
	tool_event: ToolCallStartedEvent = run_event # type: ignore[assignment]
	tool = tool_event.tool
	if tool:
	in_reasoning_phase = False
	should_break_next_thought = True
	reasoning_closed_for_current_cycle = False
	in_content_think_block = False
	inline_tool_trace_depth = 0
	inline_protocol_tail = ""
	if getattr(tool, "tool_call_id", None):
	tool_start_times[tool.tool_call_id] = time.time()

	current_id = current_agent_info.get("agent_id")
	# If leader calls a tool, it's either an internal tool (code, etc) or delegation.
	# During the tool call itself, the agent is "active".
	async for e in update_status_and_yield(current_id, "active"):
	yield e

	trace_stream(
	"tool_call_started",
	tool_name=getattr(tool, "tool_name", ""),
	tool_call_id=getattr(tool, "tool_call_id", None),
	)
	current_text_index = len(full_content)
	yield _build_tool_call_event(
	tool,
	current_text_index,
	agent_info=current_agent_info
	)

	case RunEvent.tool_call_completed.value \| TeamRunEvent.tool_call_completed:
	tool_event: ToolCallCompletedEvent = run_event # type: ignore[assignment]
	tool = tool_event.tool
	if tool:
	in_reasoning_phase = False
	should_break_next_thought = True
	reasoning_closed_for_current_cycle = False
	in_content_think_block = False
	inline_tool_trace_depth = 0
	inline_protocol_tail = ""
	duration_ms = None
	if tool.tool_call_id and tool.tool_call_id in tool_start_times:
	duration_ms = int((time.time() - tool_start_times[tool.tool_call_id]) * 1000)
	trace_stream(
	"tool_call_completed",
	tool_name=tool.tool_name or "",
	tool_call_id=tool.tool_call_id,
	is_error=bool(tool.tool_call_error),
	)
	tool_result_event, output = _build_tool_result_event(
	tool,
	duration_ms,
	self._normalize_tool_output,
	agent_info=current_agent_info,
	)
	yield tool_result_event
	self._collect_search_sources(output, sources_map)

	case RunEvent.run_completed.value \| TeamRunEvent.run_completed:
	# Extract agent info to check if this is a member or leader
	event_agent_info = _extract_agent_info_from_event(
	run_event,
	leader_id=request.agent_id,
	leader_name=request.agent_name,
	leader_emoji=request.agent_emoji,
	agent_metadata=agent_metadata,
	)
	is_member_completion = event_agent_info.get("agent_role") == "member"

	# In Team Mode, only terminate when the LEADER (no agent_id on event) completes.
	# Member completions should just let the main loop continue.
	if is_team_mode and is_member_completion:
	active_id = event_agent_info.get("agent_id")
	active_name = event_agent_info.get("agent_name")
	active_model = event_agent_info.get("model")
	active_provider = event_agent_info.get("provider")
	logger.info(
	f"[TEAM] Member {active_name} completed "
	f"(Model: {active_model} \| Provider: {active_provider}). "
	"Continuing stream..."
	)
	# Ensure member is marked as ready if not already handled by TeamRunEvent.run_completed
	async for e in update_status_and_yield(active_id, "ready"):
	yield e
	continue

	# Leader completed
	if is_team_mode:
	async for e in update_status_and_yield(request.agent_id, "idle"):
	yield e

	final_content, output = _extract_completed_content_and_output(
	run_event,
	full_content,
	)

	yield DoneEvent(
	content=final_content or "",
	output=output,
	thought=full_thought.strip() or None,
	sources=list(sources_map.values()) or None,
	).model_dump()

	if request:
	asyncio.create_task(self._maybe_optimize_memories(agent, request))

	# 4. Trigger Async Session Summary Update
	# Only if conversation_id exists (Main Chat Flow) AND summary is enabled
	if request.conversation_id and request.enable_session_summary:
	# Prepare summary lines:
	# - Normal flow: incremental update with last user + new assistant
	# - Regenerate/Edit (or single-turn rebuild): rebuild from current request context + new assistant
	if should_rebuild_summary:
	new_lines = [
	m for m in messages
	if m.get("role") in ("user", "assistant")
	]
	new_lines.append({"role": "assistant", "content": final_content})
	else:
	new_lines = []
	last_user = next((m for m in reversed(messages) if m.get("role") == "user"), None)
	if last_user:
	new_lines.append(last_user)
	new_lines.append({"role": "assistant", "content": final_content})

	_log_verbose_info(f"Triggering async summary update for {request.conversation_id} with {len(new_lines)} messages (rebuild: {should_rebuild_summary}, is_editing: {request.is_editing})")
	asyncio.create_task(update_session_summary(
	conversation_id=request.conversation_id,
	old_summary=old_summary_json,
	new_messages=new_lines,
	database_provider=request.database_provider,
	memory_provider=request.memory_provider,
	memory_model=request.memory_model,
	memory_api_key=request.memory_api_key,
	memory_base_url=request.memory_base_url,
	summary_provider=request.summary_provider,
	summary_model=request.summary_model,
	summary_api_key=request.summary_api_key,
	summary_base_url=request.summary_base_url,
	rebuild_from_scratch=should_rebuild_summary,
	))

	return

	case RunEvent.run_error.value \| TeamRunEvent.run_error:
	error_msg = _extract_best_error_message(run_event)
	active_id = current_agent_info.get("agent_id")
	if active_id:
	async for e in update_status_and_yield(active_id, "error"):
	yield e
	yield ErrorEvent(error=error_msg).model_dump()
	return
	else:
	# Simple event Fallback (no detailed event type), just check for content
	raw_content_chunk = _extract_text_chunk(run_event)
	raw_content_chunk, inline_tool_trace_depth, inline_protocol_tail, _ = _strip_inline_tool_protocol(
	raw_content_chunk,
	inline_tool_trace_depth,
	inline_protocol_tail,
	)
	raw_reasoning = _extract_reasoning_chunk(run_event)
	content_segments, in_content_think_block = _split_content_by_think_tags(
	raw_content_chunk,
	in_content_think_block,
	)
	content_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "text"
	)
	inline_thought_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "thought"
	)
	if raw_reasoning:
	for event in emit_thought_part(str(raw_reasoning)):
	yield event
	if inline_thought_chunk:
	for event in emit_thought_part(str(inline_thought_chunk)):
	yield event
	if content_chunk:
	for e in process_text(content_chunk):
	yield e

	except Exception as exc:
	logger.error(f"Stream chat error: {exc}")
	yield ErrorEvent(error=_extract_best_error_message(exc)).model_dump()

	async def _continue_hitl_run(
	self,
	request: StreamChatRequest,
	) -> AsyncGenerator[dict[str, Any], None]:
	"""
	Continue a paused HITL run after user submits form.

	This method:
	1. Retrieves requirements from storage
	2. Rebuilds continuation messages with submitted form values
	3. Runs agent.arun() and streams completion
	4. Cleans up storage record
	"""
	try:
	run_id = request.run_id
	field_values = request.field_values or {}

	_log_verbose_info(
	f"[HITL] Continuing run_id={run_id!r} "
	f"with database_provider={request.database_provider} "
	f"and field_values={list(field_values.keys())}"
	)

	# 1. Fetch Session Summary from DB
	session_summary_text = None
	old_summary_json = None
	if request.conversation_id:
	try:
	from ..models.db import DbFilter, DbQueryRequest
	from .db_service import execute_db_async, get_db_adapter

	adapter = get_db_adapter(request.database_provider)
	if adapter:
	req = DbQueryRequest(
	providerId=adapter.config.id,
	action="select",
	table="conversations",
	columns=["session_summary"],
	filters=[DbFilter(op="eq", column="id", value=request.conversation_id)],
	maybeSingle=True,
	)

	result = await execute_db_async(adapter, req)

	if result.data and isinstance(result.data, dict):
	row = result.data
	raw_summary = row.get("session_summary")

	if raw_summary:
	if isinstance(raw_summary, str):
	try:
	old_summary_json = json.loads(raw_summary)
	except (ValueError, json.JSONDecodeError):
	pass
	elif isinstance(raw_summary, dict):
	old_summary_json = raw_summary

	if old_summary_json:
	session_summary_text = old_summary_json.get("summary")
	except Exception as e:
	logger.warning(f"Failed to fetch session summary in HITL flow: {e}")

	# Retrieve requirements from Supabase
	hitl_storage = get_hitl_storage(request.database_provider)
	pending = await hitl_storage.get_pending_run(run_id)

	requirements = None
	saved_messages = None
	saved_run_output = None
	if isinstance(pending, dict):
	requirements = pending.get("requirements")
	saved_messages = pending.get("messages")
	saved_run_output = pending.get("run_output")
	else:
	requirements = pending

	_log_verbose_info(
	"[HITL] loaded pending payload: "
	f"has_requirements={bool(requirements)}, "
	f"messages_type={type(saved_messages).__name__ if saved_messages is not None else 'None'}, "
	f"has_run_output={saved_run_output is not None}, "
	f"run_output_type={type(saved_run_output).__name__ if saved_run_output is not None else 'None'}"
	)

	if not requirements:
	logger.error(
	f"[HITL] No pending run found for run_id={run_id!r} "
	f"(database_provider={request.database_provider})"
	)
	yield ErrorEvent(error="Form session expired or not found").model_dump()
	return

	# Enable skills for the definitive user-facing chat agent
	request.enable_skills = True
	# Get agent (same provider as original request)
	agent = get_agent_for_provider(request)
	_log_verbose_info(f"[HITL Continue] Agent instructions: {getattr(agent, 'instructions', None)}")

	full_content = ""
	full_thought = ""
	sources_map: dict[str, Any] = {}
	tool_start_times: dict[str, float] = {}
	should_break_next_thought = False
	in_reasoning_phase = False
	reasoning_closed_for_current_cycle = False
	in_content_think_block = False
	inline_tool_trace_depth = 0
	inline_protocol_tail = ""
	stream_trace = _is_stream_trace_enabled()
	paused_again = False # Flag to prevent cleanup when multi-form chaining occurs
	stream_had_error = False
	completed_content_fallback = ""
	saw_terminal_completion = False
	continuation_event_count = 0
	last_event_name: str \| None = None
	last_event_type: str \| None = None
	last_event_run_id: str \| None = None
	# Current agent info for Team mode (updated per event)
	current_agent_info: dict[str, Any] = {"agent_id": request.agent_id, "agent_name": request.agent_name}

	def trace_stream(stage: str, **kwargs: Any) -> None:
	if not stream_trace:
	return
	payload = ", ".join([f"{k}={v}" for k, v in kwargs.items()])
	logger.info(f"[STREAM_TRACE][hitl] {stage} \| {payload}")

	def emit_thought_part(part: str):
	nonlocal full_thought, full_content, should_break_next_thought, in_reasoning_phase, reasoning_closed_for_current_cycle
	text = _strip_internal_tool_trace(str(part or ""))
	if not text or not text.strip():
	return
	should_break_next_thought = False
	in_reasoning_phase = True
	full_thought += text
	trace_stream("emit_reasoning", reasoning_preview=_preview(text))
	current_text_index = len(full_content)
	yield ThoughtEvent(
	content=text,
	text_index=current_text_index,
	agent_id=current_agent_info.get("agent_id"),
	agent_name=current_agent_info.get("agent_name"),
	).model_dump(by_alias=True, exclude_none=True)

	def process_text(text: str):
	nonlocal full_content, in_reasoning_phase, should_break_next_thought, reasoning_closed_for_current_cycle
	clean_text = _strip_internal_tool_trace(text)
	if clean_text:
	has_visible_text = bool(clean_text.strip())
	if has_visible_text:
	in_reasoning_phase = False
	should_break_next_thought = True
	reasoning_closed_for_current_cycle = True
	full_content += clean_text
	yield TextEvent(
	content=clean_text,
	agent_id=current_agent_info.get("agent_id"),
	agent_name=current_agent_info.get("agent_name"),
	).model_dump(by_alias=True, exclude_none=True)
	async def _iterate_run_stream(stream: Any):
	"""
	Normalize both async and sync Agno run streams into an async iterator.
	"""
	if hasattr(stream, "__aiter__"):
	async for item in stream:
	yield item
	return

	iterator = iter(stream)
	sentinel = object()
	while True:
	item = await asyncio.to_thread(lambda: next(iterator, sentinel))
	if item is sentinel:
	break
	yield item

	async def _stream_events(stream):
	nonlocal full_content, full_thought, sources_map, tool_start_times, paused_again, stream_had_error
	nonlocal in_reasoning_phase, should_break_next_thought, reasoning_closed_for_current_cycle
	nonlocal in_content_think_block, inline_tool_trace_depth, inline_protocol_tail
	nonlocal completed_content_fallback, saw_terminal_completion
	nonlocal continuation_event_count, last_event_name, last_event_type, last_event_run_id
	async for run_event in _iterate_run_stream(stream):
	_append_raw_event_log(
	phase="hitl_continuation",
	request=request,
	run_id=run_id,
	run_event=run_event,
	)
	continuation_event_count += 1
	last_event_type = type(run_event).__name__
	last_event_name = str(getattr(run_event, "event", None) or last_event_type)
	last_event_run_id = str(raw_event_run_id) if raw_event_run_id else None

	# Extract agent info for Team mode (though Team HITL is currently disabled)
	current_agent_info = _extract_agent_info_from_event(
	run_event,
	leader_id=request.agent_id,
	leader_name=request.agent_name,
	leader_emoji=request.agent_emoji,
	)

	# When yield_run_output=True, acontinue_run may yield the final RunOutput object.
	# Capture its canonical content as a robust fallback for providers that emit sparse events.
	# IMPORTANT: Do NOT re-emit this content via process_text() if we already received
	# streaming chunks (run_content events). Doing so would cause the full answer to be
	# appended a second time, resulting in visible duplication in the UI.
	# Only use RunOutput.content as a fallback when the stream produced nothing.
	if isinstance(run_event, RunOutput):
	saw_terminal_completion = True
	completed_content_fallback, _ = _extract_completed_content_and_output(
	run_event,
	completed_content_fallback or full_content,
	)
	# Only emit if no content was streamed yet (sparse-event provider fallback).
	if not full_content:
	text_from_output = _extract_text_from_message_content(
	getattr(run_event, "content", None)
	).strip()
	if text_from_output:
	for e in process_text(text_from_output):
	yield e
	continue

	# HITL Pause Check
	if hasattr(run_event, 'is_paused') and run_event.is_paused:
	logger.info(f"Agent paused again during continuation (multi-form chain, run_id: {run_id})")

	# Extract new requirements
	new_requirements = getattr(run_event, 'active_requirements', None) or getattr(run_event, 'requirements', None)

	if new_requirements:
	form_requirements = [req for req in new_requirements if _is_interactive_form_requirement(req)]

	if form_requirements:
	# Save new form requirements (overwrites previous in memory)
	hitl_storage_multi = get_hitl_storage(request.database_provider)
	saved = await hitl_storage_multi.save_pending_run(
	run_id=run_id,
	requirements=form_requirements,
	conversation_id=request.conversation_id,
	user_id=request.user_id,
	agent_model=request.model,
	messages=saved_messages, # Reuse saved messages
	run_output=(
	{
	"run_id": getattr(run_event, "run_id", None) or run_id,
	"session_id": getattr(run_event, "session_id", None)
	or request.conversation_id,
	"user_id": request.user_id,
	"messages": saved_messages or [],
	"tools": [
	tool.to_dict() if hasattr(tool, "to_dict") else tool
	for tool in (getattr(run_event, "tools", None) or [])
	if tool is not None
	],
	"requirements": [
	req.to_dict() if hasattr(req, "to_dict") else req
	for req in form_requirements
	],
	"status": "PAUSED",
	}
	),
	)
	if not saved:
	raise RuntimeError("Failed to persist chained HITL pending run")

	# Extract form fields and notify frontend
	for req in form_requirements:
	if (hasattr(req, 'needs_external_execution') and req.needs_external_execution) or \
	(req.tool_execution and req.tool_execution.tool_name == "interactive_form"):
	form_id, title, fields = _extract_interactive_form_payload(
	req,
	default_title="Please provide additional information",
	)

	yield FormRequestEvent(
	run_id=run_id,
	form_id=form_id,
	title=title,
	fields=fields
	).model_dump()

	# Send partial done event
	yield DoneEvent(
	content=full_content or "",
	thought=full_thought.strip() or None,
	sources=list(sources_map.values()) or None,
	).model_dump()

	logger.info(f"Multi-form: saved second form, waiting for user (run_id: {run_id})")
	paused_again = True # Mark as paused again to skip cleanup
	return

	# If no form requirements, just continue
	logger.warning(f"Agent paused again but no interactive_form found (run_id: {run_id})")
	yield ErrorEvent(error="Agent paused unexpectedly").model_dump()
	return

	# Check if this is a detailed event (from stream_events=True or implicit)
	if hasattr(run_event, 'event'):
	match run_event.event:
	case RunEvent.run_content.value:
	raw_content_chunk = _extract_text_chunk(run_event)
	raw_content_chunk, inline_tool_trace_depth, inline_protocol_tail, had_protocol = _strip_inline_tool_protocol(
	raw_content_chunk,
	inline_tool_trace_depth,
	inline_protocol_tail,
	)
	if had_protocol:
	trace_stream(
	"strip_tool_protocol",
	depth=inline_tool_trace_depth,
	tail_len=len(inline_protocol_tail),
	cleaned_preview=_preview(raw_content_chunk),
	)
	raw_reasoning = _extract_reasoning_chunk(run_event, trace_fn=trace_stream)
	content_segments, in_content_think_block = _split_content_by_think_tags(
	raw_content_chunk,
	in_content_think_block,
	)
	content_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "text"
	)
	inline_thought_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "thought"
	)
	reasoning = raw_reasoning
	if reasoning and content_chunk and _is_reasoning_duplicate_of_content(
	str(reasoning),
	str(content_chunk),
	):
	trace_stream(
	"suppress_reasoning_overlap",
	event="run_content",
	reasoning_preview=_preview(reasoning),
	content_preview=_preview(content_chunk),
	)
	reasoning = ""
	has_content_chunk = bool(content_chunk)
	has_inline_thought = bool(inline_thought_chunk)
	trace_stream(
	"run_content",
	has_content=has_content_chunk,
	has_reasoning=bool(reasoning) or has_inline_thought,
	reasoning_closed=reasoning_closed_for_current_cycle,
	content_preview=_preview(content_chunk),
	reasoning_preview=_preview((reasoning or "") + (inline_thought_chunk or "")),
	)

	has_any_thought = bool(reasoning) or has_inline_thought
	if has_any_thought and reasoning_closed_for_current_cycle:
	reasoning_closed_for_current_cycle = False
	in_reasoning_phase = False
	should_break_next_thought = True

	if reasoning:
	for event in emit_thought_part(str(reasoning)):
	yield event
	if has_inline_thought:
	for event in emit_thought_part(str(inline_thought_chunk)):
	yield event

	if content_chunk:
	for e in process_text(content_chunk):
	yield e
	trace_stream(
	"emit_content",
	reasoning_closed=reasoning_closed_for_current_cycle,
	content_preview=_preview(content_chunk),
	)

	case RunEvent.reasoning_content_delta.value:
	raw_content_chunk = _extract_text_chunk(run_event)
	raw_content_chunk, inline_tool_trace_depth, inline_protocol_tail, had_protocol = _strip_inline_tool_protocol(
	raw_content_chunk,
	inline_tool_trace_depth,
	inline_protocol_tail,
	)
	if had_protocol:
	trace_stream(
	"strip_tool_protocol",
	depth=inline_tool_trace_depth,
	tail_len=len(inline_protocol_tail),
	cleaned_preview=_preview(raw_content_chunk),
	)
	raw_reasoning = _extract_reasoning_chunk(run_event)
	content_segments, in_content_think_block = _split_content_by_think_tags(
	raw_content_chunk,
	in_content_think_block,
	)
	content_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "text"
	)
	inline_thought_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "thought"
	)
	reasoning = raw_reasoning
	if reasoning and content_chunk and _is_reasoning_duplicate_of_content(
	str(reasoning),
	str(content_chunk),
	):
	trace_stream(
	"suppress_reasoning_overlap",
	event="reasoning_content_delta",
	reasoning_preview=_preview(reasoning),
	content_preview=_preview(content_chunk),
	)
	reasoning = ""
	has_content_chunk = bool(content_chunk)
	has_inline_thought = bool(inline_thought_chunk)
	trace_stream(
	"reasoning_delta",
	has_content=has_content_chunk,
	has_reasoning=bool(reasoning) or has_inline_thought,
	reasoning_closed=reasoning_closed_for_current_cycle,
	content_preview=_preview(content_chunk),
	reasoning_preview=_preview((reasoning or "") + (inline_thought_chunk or "")),
	)

	has_any_thought = bool(reasoning) or has_inline_thought
	if has_any_thought and reasoning_closed_for_current_cycle:
	reasoning_closed_for_current_cycle = False
	in_reasoning_phase = False
	should_break_next_thought = True

	if reasoning:
	for event in emit_thought_part(str(reasoning)):
	yield event
	if has_inline_thought:
	for event in emit_thought_part(str(inline_thought_chunk)):
	yield event

	if content_chunk:
	for e in process_text(content_chunk):
	yield e
	trace_stream(
	"emit_content",
	reasoning_closed=reasoning_closed_for_current_cycle,
	content_preview=_preview(content_chunk),
	)

	case RunEvent.tool_call_started.value:
	tool_event: ToolCallStartedEvent = run_event # type: ignore[assignment]
	tool = tool_event.tool
	if tool:
	in_reasoning_phase = False
	should_break_next_thought = True
	reasoning_closed_for_current_cycle = False
	in_content_think_block = False
	inline_tool_trace_depth = 0
	inline_protocol_tail = ""
	if tool.tool_call_id:
	tool_start_times[tool.tool_call_id] = time.time()
	trace_stream(
	"tool_call_started",
	tool_name=tool.tool_name or "",
	tool_call_id=tool.tool_call_id,
	)
	current_text_index = len(full_content)
	yield _build_tool_call_event(tool, current_text_index)

	case RunEvent.tool_call_completed.value:
	tool_event: ToolCallCompletedEvent = run_event # type: ignore[assignment]
	tool = tool_event.tool
	if tool:
	in_reasoning_phase = False
	should_break_next_thought = True
	reasoning_closed_for_current_cycle = False
	in_content_think_block = False
	inline_tool_trace_depth = 0
	inline_protocol_tail = ""
	duration_ms = None
	if tool.tool_call_id and tool.tool_call_id in tool_start_times:
	duration_ms = int((time.time() - tool_start_times[tool.tool_call_id]) * 1000)
	trace_stream(
	"tool_call_completed",
	tool_name=tool.tool_name or "",
	tool_call_id=tool.tool_call_id,
	is_error=bool(tool.tool_call_error),
	)
	tool_result_event, output = _build_tool_result_event(
	tool,
	duration_ms,
	self._normalize_tool_output,
	)
	yield tool_result_event
	self._collect_search_sources(output, sources_map)

	case RunEvent.run_completed.value:
	saw_terminal_completion = True
	completed_content_fallback, _ = _extract_completed_content_and_output(
	run_event,
	completed_content_fallback or full_content,
	)

	case RunEvent.run_error.value:
	error_msg = _extract_best_error_message(run_event)
	stream_had_error = True
	yield ErrorEvent(error=error_msg).model_dump()
	return
	else:
	# Simple event Fallback
	raw_content_chunk = _extract_text_chunk(run_event)
	raw_content_chunk, inline_tool_trace_depth, inline_protocol_tail, _ = _strip_inline_tool_protocol(
	raw_content_chunk,
	inline_tool_trace_depth,
	inline_protocol_tail,
	)
	raw_reasoning = _extract_reasoning_chunk(run_event)
	content_segments, in_content_think_block = _split_content_by_think_tags(
	raw_content_chunk,
	in_content_think_block,
	)
	content_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "text"
	)
	inline_thought_chunk = "".join(
	seg_text for seg_type, seg_text in content_segments if seg_type == "thought"
	)
	if raw_reasoning:
	for event in emit_thought_part(str(raw_reasoning)):
	yield event
	if inline_thought_chunk:
	for event in emit_thought_part(str(inline_thought_chunk)):
	yield event
	if content_chunk:
	for e in process_text(content_chunk):
	yield e

	def _build_fallback_messages():
	if not saved_messages:
	return None
	updated_messages = list(saved_messages)
	for req in requirements:
	tool_exec = getattr(req, 'tool_execution', None)
	tool_name = getattr(tool_exec, 'tool_name', None) if tool_exec else None
	if tool_name != "interactive_form":
	continue
	tool_args = getattr(tool_exec, 'tool_args', {}) if tool_exec else {}
	tool_call_id = getattr(req, "id", None) or tool_args.get("id") or f"form-{int(time.time() * 1000)}"
	updated_messages.append({
	"role": "assistant",
	"content": None,
	"tool_calls": [{
	"id": tool_call_id,
	"type": "function",
	"function": {
	"name": "interactive_form",
	"arguments": json.dumps(tool_args or {}),
	}
	}],
	})
	updated_messages.append({
	"role": "tool",
	"content": json.dumps(field_values),
	"tool_call_id": tool_call_id,
	})
	return updated_messages

	def _apply_field_values_to_requirements() -> list[Any]:
	"""
	Resolve pending HITL requirements with the submitted form payload.

	Prefer Agno-native requirement resolution so we can use acontinue_run().
	"""
	resolved_requirements: list[Any] = []
	serialized_values = json.dumps(field_values, ensure_ascii=False)

	for req in requirements or []:
	try:
	if hasattr(req, "needs_external_execution") and req.needs_external_execution:
	req.set_external_execution_result(serialized_values)
	tool_exec = getattr(req, "tool_execution", None)
	if tool_exec is not None and getattr(tool_exec, "result", None) is None:
	tool_exec.result = serialized_values
	elif hasattr(req, "needs_user_input") and req.needs_user_input:
	req.provide_user_input(field_values)
	elif hasattr(req, "needs_confirmation") and req.needs_confirmation:
	req.confirm()
	except Exception as req_err:
	logger.warning(
	f"[HITL] Failed to resolve requirement {getattr(req, 'id', None)} for run_id={run_id}: {req_err}"
	)
	resolved_requirements.append(req)

	return resolved_requirements

	def _build_continuation_agent_input(base_messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
	messages = self._inject_local_time_context(list(base_messages), request, [])
	system_messages = [m for m in messages if m.get("role") == "system"]
	chat_messages = [m for m in messages if m.get("role") != "system"]

	raw_turn_limit = request.context_turn_limit
	turn_limit = (
	max(1, min(50, int(raw_turn_limit)))
	if isinstance(raw_turn_limit, int) and raw_turn_limit > 0
	else 2
	)
	user_indices = [i for i, m in enumerate(chat_messages) if m.get("role") == "user"]
	user_turn_count = len(user_indices)
	if user_turn_count > turn_limit:
	cutoff_index = user_indices[-turn_limit]
	recent_history = chat_messages[cutoff_index:]
	else:
	recent_history = chat_messages

	should_inject_summary = bool(session_summary_text) and (user_turn_count > turn_limit)
	if should_inject_summary:
	summary_prompt = (
	"\n\nSession memory summary:\n"
	"Here is a summary of the conversation so far. Use this to understand long-term context, "
	"but prioritize the details in the recent messages below.\n"
	f"{session_summary_text}\n"
	)
	if system_messages:
	last_sys = system_messages[-1]
	if "Session memory summary:" not in str(last_sys.get("content", "")):
	last_sys["content"] = str(last_sys.get("content", "")) + summary_prompt
	else:
	system_messages.append({"role": "system", "content": summary_prompt})

	return system_messages + recent_history

	resolved_requirements = _apply_field_values_to_requirements()
	stream = None
	try:
	restored_run_output = None
	if isinstance(saved_run_output, dict):
	try:
	restored_run_output = RunOutput.from_dict(dict(saved_run_output))
	# Ensure external-execution tool results are concretely attached to
	# run_response.tools before acontinue_run() processes updates.
	if restored_run_output and isinstance(restored_run_output.tools, list):
	serialized_values = json.dumps(field_values, ensure_ascii=False)
	tool_result_by_id: dict[str, Any] = {}
	for req in resolved_requirements or []:
	tool_exec = getattr(req, "tool_execution", None)
	if not tool_exec:
	continue
	tcid = getattr(tool_exec, "tool_call_id", None)
	if tcid:
	tool_result_by_id[str(tcid)] = getattr(tool_exec, "result", None)

	for tool in restored_run_output.tools:
	if getattr(tool, "result", None) is not None:
	continue
	tcid = getattr(tool, "tool_call_id", None)
	if tcid and str(tcid) in tool_result_by_id:
	tool.result = tool_result_by_id[str(tcid)]
	continue
	if getattr(tool, "tool_name", None) == "interactive_form":
	tool.result = serialized_values

	# OpenAI-compatible tool flow requires an assistant message
	# with matching tool_calls before any tool message can appear.
	if restored_run_output and isinstance(restored_run_output.tools, list):
	existing_messages = (
	list(restored_run_output.messages)
	if isinstance(restored_run_output.messages, list)
	else []
	)
	existing_tool_call_ids: set[str] = set()
	for msg in existing_messages:
	msg_tool_calls = getattr(msg, "tool_calls", None) or []
	for tc in msg_tool_calls:
	if isinstance(tc, dict) and tc.get("id"):
	existing_tool_call_ids.add(str(tc.get("id")))

	for tool in restored_run_output.tools:
	tool_call_id = getattr(tool, "tool_call_id", None)
	tool_name = getattr(tool, "tool_name", None) or "interactive_form"
	if not tool_call_id or str(tool_call_id) in existing_tool_call_ids:
	continue
	tool_args = getattr(tool, "tool_args", None) or {}
	existing_messages.append(
	Message(
	role="assistant",
	content="",
	tool_calls=[
	{
	"id": str(tool_call_id),
	"type": "function",
	"function": {
	"name": str(tool_name),
	"arguments": json.dumps(tool_args, ensure_ascii=False),
	},
	}
	],
	)
	)
	existing_tool_call_ids.add(str(tool_call_id))

	restored_run_output.messages = existing_messages
	if resolved_requirements:
	restored_run_output.requirements = resolved_requirements
	except Exception as restore_err:
	logger.warning(
	f"[HITL] Failed to restore RunOutput for run_id={run_id}, fallback to run_id path: {restore_err}"
	)
	_log_verbose_info(
	f"[HITL] continuation restore mode: {'run_response' if restored_run_output is not None else 'run_id'} "
	f"(has_saved_run_output={isinstance(saved_run_output, dict)})"
	)
	_log_verbose_info(
	f"Running HITL continuation via continue_run (run_id: {run_id}, session_id: {request.conversation_id})"
	)
	if restored_run_output is not None:
	stream = agent.continue_run(
	run_response=restored_run_output,
	stream=True,
	stream_events=True,
	yield_run_output=True,
	user_id=request.user_id,
	session_id=request.conversation_id,
	output_schema=request.output_schema,
	)
	else:
	stream = agent.continue_run(
	run_id=run_id,
	requirements=resolved_requirements,
	stream=True,
	stream_events=True,
	yield_run_output=True,
	user_id=request.user_id,
	session_id=request.conversation_id,
	output_schema=request.output_schema,
	)
	except Exception as continue_err:
	logger.warning(
	f"[HITL] continue_run failed for run_id={run_id}, fallback to rebuilt arun: {continue_err}"
	)
	fallback_messages = _build_fallback_messages()
	if not fallback_messages:
	yield ErrorEvent(error="Form session cannot be resumed (missing state)").model_dump()
	return
	agent_input = _build_continuation_agent_input(fallback_messages)
	stream = agent.arun(
	input=agent_input,
	stream=True,
	stream_events=True,
	user_id=request.user_id,
	session_id=request.conversation_id,
	output_schema=request.output_schema,
	)

	async for event in _stream_events(stream):
	yield event

	_log_verbose_info(
	"[HITL] continuation stream summary: "
	f"run_id={run_id}, "
	f"events={continuation_event_count}, "
	f"last_event={last_event_name}, "
	f"last_event_type={last_event_type}, "
	f"last_event_run_id={last_event_run_id}, "
	f"saw_terminal_completion={saw_terminal_completion}, "
	f"stream_had_error={stream_had_error}, "
	f"paused_again={paused_again}"
	)

	if stream_had_error:
	logger.warning(f"HITL run {run_id} ended with stream error; skipping done/cleanup")
	return
	if not saw_terminal_completion:
	logger.warning(
	f"HITL run {run_id} stream ended without terminal completion event; skipping done/cleanup"
	)
	yield ErrorEvent(error="HITL continuation ended before completion").model_dump()
	return

	# Stream completed, send done event
	yield DoneEvent(
	content=full_content or completed_content_fallback,
	thought=full_thought.strip() or None,
	sources=list(sources_map.values()) or None,
	).model_dump()

	# Clean up Supabase (skip if paused again for multi-form)
	if not paused_again:
	await hitl_storage.delete_pending_run(run_id)
	logger.info(f"HITL run {run_id} completed and cleaned up")
	else:
	logger.info(f"HITL run {run_id} paused again (multi-form), skipping cleanup")

	# 6. Trigger Async Session Summary Update
	if request.conversation_id and not paused_again:
	# For HITL resumption, extract the last turn's context from saved_messages
	# (matching normal flow: only last user + assistant, not full history)
	summary_messages = []

	# Extract only the last user-assistant turn from saved_messages
	if saved_messages:
	last_user_idx = -1
	for i in range(len(saved_messages) - 1, -1, -1):
	if saved_messages[i].get("role") == "user":
	last_user_idx = i
	break

	if last_user_idx >= 0:
	# Include from last user message to end of saved_messages
	# This captures: user question -> assistant form(s) -> any intermediate interactions
	for msg in saved_messages[last_user_idx:]:
	role = msg.get("role")
	content = msg.get("content")
	# Only include user/assistant messages with content for summary
	if role in ("user", "assistant") and content:
	summary_messages.append({"role": role, "content": content})

	# Add the form submission as user input (provides structured data context)
	form_submission_text = f"[Form Submitted] Values: {json.dumps(field_values)}"
	summary_messages.append({"role": "user", "content": form_submission_text})

	# Add the new assistant response (based on form data)
	summary_messages.append({"role": "assistant", "content": full_content})

	_log_verbose_info(f"Triggering async summary update for {request.conversation_id} (Resumed HITL flow, {len(summary_messages)} messages)")
	asyncio.create_task(update_session_summary(
	conversation_id=request.conversation_id,
	old_summary=old_summary_json,
	new_messages=summary_messages,
	database_provider=request.database_provider,
	memory_provider=request.memory_provider,
	memory_model=request.memory_model,
	memory_api_key=request.memory_api_key,
	memory_base_url=request.memory_base_url,
	summary_provider=request.summary_provider,
	summary_model=request.summary_model,
	summary_api_key=request.summary_api_key,
	summary_base_url=request.summary_base_url,
	rebuild_from_scratch=False, # HITL resumption is usually incremental
	))

	except Exception as exc:
	import traceback
	error_details = traceback.format_exc()
	logger.error(f"HITL continuation error: {exc}\n{error_details}")
	yield ErrorEvent(error=_extract_best_error_message(exc)).model_dump()




	def _collect_enabled_tool_names(self, request: StreamChatRequest) -> set[str]:
	names: list[str] = []
	if request.provider != "gemini":
	for tool_id in request.tool_ids or []:
	names.append(resolve_tool_name(str(tool_id)))
	for tool_def in request.tools or []:
	if hasattr(tool_def, "model_dump"):
	tool_def = tool_def.model_dump()
	if not isinstance(tool_def, dict):
	continue
	name = tool_def.get("function", {}).get("name") or tool_def.get("name")
	if name:
	names.append(resolve_tool_name(str(name)))
	for user_tool in request.user_tools or []:
	if hasattr(user_tool, "name") and user_tool.name:
	names.append(str(user_tool.name))
	return set(names)

	def _inject_local_time_context(
	self,
	messages: list[dict[str, Any]],
	request: StreamChatRequest,
	pre_events: list[dict[str, Any]],
	) -> list[dict[str, Any]]:
	if not messages:
	return messages

	timezone = request.user_timezone or "UTC"
	locale = request.user_locale or "en-US"
	time_result = self._compute_local_time(timezone, locale)
	try:
	local_date = datetime.fromisoformat(str(time_result.get("iso"))).strftime("%Y-%m-%d")
	except Exception:
	local_date = str(time_result.get("formatted", "")).split(" ")[0] or datetime.now().strftime(
	"%Y-%m-%d"
	)
	tz_label = str(time_result.get("timezone") or timezone)
	note = (
	f"\n\n[Time note for this query]\n"
	f"Note: local date is {local_date} ({tz_label}). "
	"Interpret relative time terms using this date."
	)

	updated: list[dict[str, Any]] = []
	for msg in messages:
	if not isinstance(msg, dict) or msg.get("role") != "user":
	updated.append(msg)
	continue

	content = msg.get("content")
	if isinstance(content, str):
	if "[Time note for this query]" in content:
	updated.append(msg)
	else:
	updated.append({**msg, "content": f"{content}{note}"})
	continue

	if isinstance(content, list):
	has_note = False
	for part in content:
	if isinstance(part, dict):
	if "[Time note for this query]" in str(part.get("text", "")) or "[Time note for this query]" in str(part.get("content", "")):
	has_note = True
	break

	if has_note:
	updated.append(msg)
	else:
	updated.append({*msg, "content": [content, {"type": "text", "text": note}]})
	continue

	updated.append(msg)

	return updated

	def _compute_local_time(self, timezone: str, locale: str) -> dict[str, Any]:
	try:
	tzinfo = ZoneInfo(timezone)
	now = datetime.now(tzinfo)
	except Exception:
	now = datetime.now()
	return {
	"timezone": timezone,
	"locale": locale,
	"formatted": now.strftime("%Y-%m-%d %H:%M:%S"),
	"iso": now.isoformat(),
	"now": now,
	}

	def _inject_tool_guidance(
	self,
	messages: list[dict[str, Any]],
	enabled_tools: set[str],
	request: Any \| None = None,
	) -> list[dict[str, Any]]:
	if not enabled_tools:
	return messages

	updated = list(messages)
	system_index = next((i for i, m in enumerate(updated) if m.get("role") == "system"), -1)

	no_tool_narration_guidance = (
	"\n\n[OUTPUT DIRECTIVES]\n"
	"1. The main text (Answer) must contain ONLY the final helpful content and necessary explanations for the user.\n"
	"2. In the main text, NEVER describe that you are going to, are currently, or have already called any tools, searched, browsed, retrieved memory, or queried databases. These are internal traces (Trace).\n"
	"3. In the main text, do NOT refer to yourself performing actions (e.g., \"Let me check\", \"I will search\", \"I have retrieved\").\n"
	" Instead, directly present results as established information.\n"
	"4. If citing sources, use neutral phrasing such as: \"According to available data\", \"Based on public information\", \"According to the returned data\".\n"
	" Never mention tool names or the calling process.\n"
	"5. If information is insufficient, directly state the missing gap and ask clarifying questions.\n"
	" Do NOT say \"Let me check again\" or similar transitional action phrases.\n"
	"6. Once you start presenting the final answer, do not switch back to planning, searching, or tool-calling language.\n"
	"7. The final answer should begin naturally with the content itself, without meta commentary or transitional phrases.\n"
	)
	updated = self._append_system_message(updated, no_tool_narration_guidance, system_index)
	system_index = next((i for i, m in enumerate(updated) if m.get("role") == "system"), -1)

	if "interactive_form" in enabled_tools:
	form_guidance = (
	"\n[TOOL USE GUIDANCE]\n"
	"When you need to collect structured information from the user (e.g. preferences, requirements, "
	"booking details), use the 'interactive_form' tool.\n"
	"CRITICAL: DO NOT list questions in text or markdown. YOU MUST USE the 'interactive_form' tool to "
	"display fields.\n"
	"CRITICAL: If the user explicitly asks you to confirm something via a form, interactive form, or form tool, "
	"you MUST call 'interactive_form' in this response instead of asking in plain text.\n"
	"CRITICAL: If you need approval before installing a dependency, you MUST use 'interactive_form'. "
	"Do NOT ask yes/no approval questions in normal prose when the tool is available.\n"
	"CRITICAL: For approval forms, ask only for information the model does NOT already know. "
	"If skill_id or package_name are already known from the current tool result or failure context, "
	"do NOT include text inputs for them in the form.\n"
	"CRITICAL: For dependency installation approval, prefer a single required field such as "
	"'approve_install', and place the package name in the form title or description.\n"
	"Keep forms concise (3-6 fields).\n\n"
	"[SIMPLIFIED PAYLOAD]\n"
	"You may use a minimal payload to reduce tool-call size.\n"
	"- 'id' and 'title' are optional.\n"
	"- Each field may be minimal (e.g., {'name':'budget'}) or even a short string label.\n"
	"- Backend will auto-fill missing label/type defaults.\n\n"
	"[MANDATORY TEXT-FIRST RULE]\n"
	"CRITICAL: You MUST output meaningful introductory text BEFORE calling 'interactive_form'.\n"
	"- NEVER call 'interactive_form' as the very first thing in your response\n"
	"- ALWAYS explain the context, acknowledge the user's request, or provide guidance BEFORE the form\n"
	"- Minimum: Output at least 1-2 sentences before the form call\n"
	'- Example: "I can help you with that. To provide the best recommendation, please share some '
	'details below:"\n\n'
	"[SINGLE FORM PER RESPONSE]\n"
	"CRITICAL: You may call 'interactive_form' ONLY ONCE per response. Do NOT call it multiple times in "
	"the same answer.\n"
	"If you need to collect information, design ONE comprehensive form that gathers all necessary "
	"details at once.\n\n"
	"[MULTI-TURN INTERACTIONS]\n"
	"1. If the information from a submitted form is insufficient, you MAY present another "
	"'interactive_form' in your NEXT response (after the user submits the first form).\n"
	"2. LIMIT: Use at most 2-3 forms total across the entire conversation. Excessive questioning "
	"frustrates users.\n"
	"3. INTERLEAVING: You can place the form anywhere in your response. Output introductory text FIRST "
	"(e.g., \"I can help with that. Please provide some details below:\"), then call 'interactive_form' "
	"once.\n"
	"4. If the user has provided enough context through previous forms, proceed directly to the final "
	"answer without requesting more information."
	)
	updated = self._append_system_message(updated, form_guidance, system_index)
	system_index = next((i for i, m in enumerate(updated) if m.get("role") == "system"), -1)

	search_tools_requiring_citations = {
	"Tavily_web_search",
	"Tavily_academic_search",
	"web_search_using_tavily",
	"web_search",
	"search_news",
	"search_exa",
	"search_arxiv_and_return_articles",
	"search_wikipedia",
	}
	if enabled_tools.intersection(search_tools_requiring_citations):
	citation_prompt = (
	"\n\n[IMPORTANT] You have access to search tools. When you use them to answer a question, "
	"you MUST cite the search results in your answer using the format [1], [2], etc., "
	"corresponding to the index of the search result provided in the tool output. Do not fabricate "
	"citations."
	)
	updated = self._append_system_message(updated, citation_prompt, system_index)

	if "local_time" in enabled_tools:
	local_time_guidance = (
	"\n\n[TIME CONTEXT GUIDANCE]\n"
	"A local-date note is already appended to each user query before model execution.\n"
	"Do not call local_time again unless the user explicitly asks to refresh/recheck time."
	)
	updated = self._append_system_message(updated, local_time_guidance, system_index)
	# [REINFORCED] System-level long-term memory guidance for the internal agent-memory skill
	if getattr(request, "enable_long_term_memory", False):
	memory_guidance = (
	"\n\n[SYSTEM-LEVEL META-RULE: LONG-TERM MEMORY]\n"
	"You are equipped with a persistent memory layer ('agent-memory'). This is a core infrastructure skill that overrides situational constraints.\n"
	"1) MANDATORY PREREQUISITE: If a query involves personal history, preferences, or prior facts, searching memory is a mandatory PREREQUISITE. "
	"This MUST be executed BEFORE any other skill logic (e.g., Roleplay, Image Search) to avoid factual inconsistency.\n"
	"2) PERSISTENCE AGAINST OVERRIDES: This meta-rule remains in effect even if other skills (like roleplay) demand immediate responses. "
	"Retrieving ground-truth user context is the first step of all processing.\n"
	"3) USAGE: Call 'execute_skill_script' with skill_id='agent-memory'. Factual alignment via memory retrieval is non-negotiable."
	)
	updated = self._append_system_message(updated, memory_guidance, system_index)

	return updated

	def _append_system_message(
	self,
	messages: list[dict[str, Any]],
	addition: str,
	system_index: int,
	) -> list[dict[str, Any]]:
	updated = list(messages)
	if system_index != -1:
	updated[system_index] = {
	**updated[system_index],
	"content": f"{updated[system_index].get('content', '')}{addition}",
	}
	else:
	updated.insert(0, {"role": "system", "content": addition})
	return updated

	def _normalize_tool_output(self, output: Any) -> Any:
	if hasattr(output, "model_dump"):
	try:
	return output.model_dump()
	except Exception:
	return str(output)
	if isinstance(output, dict):
	return output
	if isinstance(output, list):
	return [self._normalize_tool_output(item) for item in output]
	return output

	def _collect_search_sources(self, result: Any, sources_map: dict[str, Any]) -> None:
	def _extract_results(payload: Any) -> list[dict[str, Any]]:
	if isinstance(payload, list):
	return [item for item in payload if isinstance(item, dict)]
	if isinstance(payload, dict):
	for key in ("results", "items", "data", "sources", "articles", "news", "papers"):
	value = payload.get(key)
	if isinstance(value, list):
	return [item for item in value if isinstance(item, dict)]
	return []

	results = _extract_results(result)
	if not results:
	return

	for item in results:
	url = (
	item.get("url")
	or item.get("link")
	or item.get("uri")
	or item.get("source")
	or item.get("href")
	)
	if not url or url in sources_map:
	continue
	title = (
	item.get("title")
	or item.get("name")
	or item.get("headline")
	or item.get("paper_title")
	or "Unknown Source"
	)
	snippet = (
	item.get("content")
	or item.get("snippet")
	or item.get("summary")
	or item.get("abstract")
	or ""
	)
	sources_map[url] = SourceEvent(
	uri=url,
	title=title,
	snippet=str(snippet)[:200],
	).model_dump()

	async def _maybe_optimize_memories(self, agent: Agent, request: StreamChatRequest) -> None:
	return

	def _map_field_type_to_frontend(self, field_type: Any) -> str:
	"""
	Map Python/Agno field types to frontend form types.

	Args:
	field_type: Python type (class or string)

	Returns:
	Frontend form field type (text, number, checkbox, etc.)
	"""
	# Handle cases where field_type is a class/type instead of a string
	field_type_str = ""
	if isinstance(field_type, type):
	field_type_str = field_type.__name__
	elif not isinstance(field_type, str):
	field_type_str = str(field_type)
	else:
	field_type_str = field_type

	type_mapping = {
	"str": "text",
	"int": "number",
	"float": "number",
	"bool": "checkbox",
	"date": "date",
	"time": "time",
	"datetime": "datetime",
	"list": "text",
	"dict": "textarea",
	}
	return type_mapping.get(field_type_str.lower(), "text")


	_stream_chat_service: StreamChatService \| None = None

	def get_stream_chat_service() -> StreamChatService:
	global _stream_chat_service
	if _stream_chat_service is None:
	_stream_chat_service = StreamChatService()
	return _stream_chat_service