Spaces:

ABAO77
/

chatbot-rag-fi

Sleeping

App Files Files Community

chatbot-rag-fi / src /agents /services.py

ABAO77

Upload 147 files

0df80b4 verified about 2 months ago

raw

history blame contribute delete

16 kB

	from __future__ import annotations

	import asyncio
	import logging
	from collections.abc import AsyncGenerator
	from time import perf_counter
	from typing import Any
	import json
	import uuid

	from agents import Agent, ModelSettings, Runner, RunState
	from agents.items import ToolCallItem, ToolCallOutputItem
	from openai.types.responses import ResponseTextDeltaEvent

	from src.agents.state import AgentContext, AgentRunResult
	from src.agents.flow import run_guardrail
	from src.utils.message_builder import MessageBuilder
	from src.agents.prompts import get_prompt_bundle
	from src.utils.tool_event_inspector import ToolEventInspector
	from src.agents.tools import hand_off_ceo, retrieve_brand_context
	from src.utils.agent_utils import (
	insufficiency_fallback,
	input_guardrail_fallback,
	system_error_fallback,
	)
	from src.schemas import ChatHistoryMessage, ChatTextSegment
	from src.services.citations import CitationTagStreamFilter, parse_citation_segments
	from src.services.llm import get_chat_model

	logger = logging.getLogger(__name__)


	class AgentService:
	def __init__(self) -> None:
	self._assistant_agent: Agent[AgentContext] \| None = None

	@property
	def assistant_agent(self) -> Agent[AgentContext]:
	if self._assistant_agent is None:
	bundle = get_prompt_bundle()
	self._assistant_agent = Agent(
	name="brand-assistant",
	instructions=bundle.system_prompt,
	model=get_chat_model(),
	model_settings=ModelSettings(
	parallel_tool_calls=False,
	),
	tools=[retrieve_brand_context, hand_off_ceo],
	)
	return self._assistant_agent

	def build_context(self, question: str, history: list[ChatHistoryMessage]) -> AgentContext:
	return AgentContext(
	question=question,
	message_count=len(history) + 1,
	prompt_bundle=get_prompt_bundle(),
	)

	def build_messages(self, history: list[ChatHistoryMessage], question: str) -> list[dict[str, str]]:
	return MessageBuilder.build_input_items(history, question)

	@staticmethod
	def _normalize_resume_state_payload(node: Any) -> None:
	"""Normalize persisted message parts for SDK resume compatibility."""
	if isinstance(node, dict):
	# Some persisted chat-completions items include a message envelope like:
	# {"role":"assistant","content":[{"type":"text","text":"..."}],"status":"completed"}
	# Normalize it into the simpler structure expected by the converter.
	role = node.get("role")
	content = node.get("content")
	if role in {"assistant", "user", "system"} and isinstance(content, list):
	flattened_parts: list[str] = []
	for part in content:
	if isinstance(part, dict) and part.get("type") in {"text", "output_text"}:
	text_value = part.get("text")
	if isinstance(text_value, str):
	flattened_parts.append(text_value)
	if flattened_parts:
	node["content"] = "".join(flattened_parts)
	node.pop("status", None)

	if node.get("type") == "output_text" and "text" in node:
	node["type"] = "text"
	for value in node.values():
	AgentService._normalize_resume_state_payload(value)
	return
	if isinstance(node, list):
	for value in node:
	AgentService._normalize_resume_state_payload(value)

	async def _load_resume_state(self, state_json: str) -> RunState:
	state_dict = json.loads(state_json)
	self._normalize_resume_state_payload(state_dict)
	state = await RunState.from_json(
	self.assistant_agent,
	state_dict,
	context_deserializer=lambda x: AgentContext(**x),
	)
	if isinstance(state._context.context.citation_ids, list):
	state._context.context.citation_ids = set(state._context.context.citation_ids)
	return state

	def _build_result(
	self,
	*,
	question: str,
	context: AgentContext,
	raw_output: str,
	ttft_ms: int \| None,
	latency_ms: int,
	fallback_answer: str \| None = None,
	) -> AgentRunResult:
	content = raw_output.strip()
	segments = []
	citations = []

	if fallback_answer:
	content = fallback_answer
	elif context.retrieval_status == "insufficient":
	content = content or insufficiency_fallback()
	elif not content:
	content = system_error_fallback()

	if not fallback_answer and (context.citation_ids or "<doc-ref" in content):
	parsed = parse_citation_segments(content, allowed_document_ids=context.citation_ids)
	content = parsed.content
	segments = parsed.segments
	citations = parsed.citations

	if fallback_answer:
	citations = []
	segments = []
	else:
	if content and not segments:
	segments = [ChatTextSegment(text=content)]

	return AgentRunResult(
	content=content,
	segments=segments,
	citations=citations,
	should_handoff=context.should_handoff,
	fallback_answer=fallback_answer,
	ttft_ms=ttft_ms,
	latency_ms=latency_ms,
	contact=context.contact,
	email_notification=context.email_notification,
	)

	async def stream(
	self,
	question: str,
	history: list[ChatHistoryMessage],
	*,
	conversation_id: str \| None = None,
	resume_data: dict \| None = None,
	) -> AsyncGenerator[tuple[str, dict[str, Any]], None]:
	resume_conversation_id = resume_data.get("conversation_id") if isinstance(resume_data, dict) else None
	resolved_conversation_id = conversation_id or resume_conversation_id or str(uuid.uuid4())

	if resume_data:
	state = await self._load_resume_state(resume_data["state_json"])
	context = state._context.context
	context.user_email = resume_data.get("user_email")
	context.user_name = resume_data.get("user_name")
	context.user_phone = resume_data.get("user_phone")
	for item in state.get_interruptions():
	if item.name == "hand_off_ceo":
	state.approve(item)
	payload = self.build_messages(history, question)
	else:
	context = self.build_context(question, history)
	payload = self.build_messages(history, question)

	started_at = perf_counter()
	ttft_ms: int \| None = None

	# Fire guardrail and assistant in parallel
	guardrail_task = asyncio.create_task(
	run_guardrail(question)
	)

	filter_state = CitationTagStreamFilter()
	tool_calls: dict[str, str] = {}
	token_buffer: list[str] = []
	pending_events: list[tuple[str, dict[str, Any]]] = []
	final_output = ""
	streamed_visible = False
	ceo_notification_emitted = False
	guardrail_resolved = False
	is_blocked = False
	result = None

	try:
	if resume_data:
	result = Runner.run_streamed(self.assistant_agent, state)
	else:
	result = Runner.run_streamed(
	self.assistant_agent,
	input=payload,
	context=context,
	max_turns=6,
	)

	async for event in result.stream_events():
	# Non-blocking poll: has guardrail resolved?
	if not guardrail_resolved and guardrail_task.done():
	guardrail_resolved = True
	is_blocked = guardrail_task.result()
	if is_blocked:
	result.cancel()
	break
	# Flush buffered status events then tokens
	for evt_type, evt_data in pending_events:
	yield evt_type, evt_data
	pending_events.clear()
	if token_buffer:
	ttft_ms = max(1, round((perf_counter() - started_at) * 1000))
	yield "perf", {"ttft_ms": ttft_ms}
	for tok in token_buffer:
	streamed_visible = True
	yield "token", {"delta": tok}
	token_buffer.clear()

	# Process event
	if event.type == "run_item_stream_event":
	if event.name == "tool_called" and isinstance(event.item, ToolCallItem):
	tool_name = ToolEventInspector.tool_name(event.item)
	tool_call_id = ToolEventInspector.tool_call_id_from_call(event.item)
	if tool_name and tool_call_id:
	tool_calls[tool_call_id] = tool_name
	if tool_name == "retrieve_brand_context":
	evt: tuple[str, dict[str, Any]] = ("status", {"stage": "retrieval_start"})
	if guardrail_resolved:
	yield evt[0], evt[1]
	else:
	pending_events.append(evt)
	continue

	if event.name == "tool_output" and isinstance(event.item, ToolCallOutputItem):
	tool_call_id = ToolEventInspector.tool_call_id_from_output(event.item)
	tool_name = tool_calls.get(tool_call_id or "")
	if tool_name == "retrieve_brand_context":
	evt = (
	"status",
	{
	"stage": "retrieval_end",
	"sources": [item.model_dump() for item in context.citations],
	"handoff": context.should_handoff,
	"contact": context.contact.model_dump() if context.contact else None,
	},
	)
	if guardrail_resolved:
	yield evt[0], evt[1]
	else:
	pending_events.append(evt)
	if tool_name == "hand_off_ceo" and context.email_notification and not ceo_notification_emitted:
	ceo_notification_emitted = True
	evt = (
	"ceo_email_sent",
	{
	"contact": context.contact.model_dump() if context.contact else None,
	"email_notification": context.email_notification.model_dump(),
	},
	)
	if guardrail_resolved:
	yield evt[0], evt[1]
	else:
	pending_events.append(evt)
	continue

	if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
	visible_delta = filter_state.feed(event.data.delta)
	if not visible_delta:
	continue
	if guardrail_resolved:
	if ttft_ms is None:
	ttft_ms = max(1, round((perf_counter() - started_at) * 1000))
	yield "perf", {"ttft_ms": ttft_ms}
	streamed_visible = True
	yield "token", {"delta": visible_delta}
	else:
	token_buffer.append(visible_delta)

	# Assistant finished — if guardrail hasn't resolved yet, await it now
	if not guardrail_resolved:
	is_blocked = await guardrail_task
	guardrail_resolved = True
	if not is_blocked:
	# Flush buffered events and tokens
	for evt_type, evt_data in pending_events:
	yield evt_type, evt_data
	if token_buffer:
	if ttft_ms is None:
	ttft_ms = max(1, round((perf_counter() - started_at) * 1000))
	yield "perf", {"ttft_ms": ttft_ms}
	for tok in token_buffer:
	streamed_visible = True
	yield "token", {"delta": tok}

	if is_blocked:
	fallback = input_guardrail_fallback()
	latency_ms = max(1, round((perf_counter() - started_at) * 1000))
	if ttft_ms is None:
	ttft_ms = latency_ms
	yield "perf", {"ttft_ms": ttft_ms}
	yield "token", {"delta": fallback}
	run_result = self._build_result(
	question=question,
	context=context,
	raw_output="",
	ttft_ms=ttft_ms,
	latency_ms=latency_ms,
	fallback_answer=fallback,
	)
	else:
	if result and result.interruptions:
	state_json_dict = result.to_state().to_json()
	yield "interrupt", {
	"conversation_id": resolved_conversation_id,
	"state_json": json.dumps(state_json_dict, default=lambda x: list(x) if isinstance(x, set) else x),
	"interruptions": [
	{"name": i.name, "arguments": i.arguments} for i in result.interruptions
	]
	}
	return

	final_output = str(result.final_output or "") + filter_state.flush()
	latency_ms = max(1, round((perf_counter() - started_at) * 1000))
	run_result = self._build_result(
	question=question,
	context=context,
	raw_output=final_output,
	ttft_ms=ttft_ms,
	latency_ms=latency_ms,
	)

	except Exception:
	logger.exception("Agent streaming failed")
	guardrail_task.cancel()
	if result is not None:
	result.cancel()
	fallback = system_error_fallback()
	latency_ms = max(1, round((perf_counter() - started_at) * 1000))
	if not streamed_visible:
	if ttft_ms is None:
	ttft_ms = latency_ms
	yield "perf", {"ttft_ms": ttft_ms}
	yield "token", {"delta": fallback}
	run_result = self._build_result(
	question=question,
	context=context,
	raw_output="",
	ttft_ms=ttft_ms,
	latency_ms=latency_ms,
	fallback_answer=fallback,
	)

	yield "done", {
	"conversation_id": resolved_conversation_id,
	"content": run_result.content,
	"segments": [segment.model_dump() for segment in run_result.segments],
	"citations": [item.model_dump() for item in run_result.citations],
	"handoff": run_result.should_handoff,
	"contact": run_result.contact.model_dump() if run_result.contact else None,
	"email_notification": run_result.email_notification.model_dump() if run_result.email_notification else None,
	"ttft_ms": run_result.ttft_ms,
	"latency_ms": run_result.latency_ms,
	}