""" Task functions that wrap the fraud agent for evaluation. """ from typing import List, Tuple from strands_evals import Case from app import query_agent def extract_context_and_tools(agent_result) -> Tuple[str, List[str]]: """Extracts retrieved text and tool names from AgentResult.""" context = [] tool_calls = [] if not hasattr(agent_result, 'trace') or not agent_result.trace: return "", [] for span in agent_result.trace.spans: # Check for tool execution spans if hasattr(span, 'span_type') and str(span.span_type) == 'tool_execution': # Tool Name tool_name = span.tool_call.name tool_calls.append(tool_name) # Context from Search/Load Tools if 'confluence' in tool_name or 'get_application_summary' in tool_name or 'compare' in tool_name: context.append(f"Source ({tool_name}): {span.tool_result.content}") return "\n\n".join(context), tool_calls def get_fraud_explanation(case: Case) -> str: """ Task function for basic output evaluation. Args: case: Test case with input question Returns: Agent's response as string """ result = query_agent(case.input, return_full_result=False) return str(result) def get_fraud_explanation_with_trace(case: Case) -> dict: """ Task function for trajectory and faithfulness evaluation. Args: case: Test case with input question Returns: Dict with output, trajectory, and context """ result = query_agent(case.input, return_full_result=True) # Extract context and tools context, tools = extract_context_and_tools(result) return { "output": str(result), "trajectory": tools, "context": context }