chrisjcc's picture
Strands-Agents SDK Evaluators
d87a140
Raw
History Blame Contribute Delete
1.85 kB
"""
Task functions that wrap the fraud agent for evaluation.
"""
from typing import List, Tuple
from strands_evals import Case
from app import query_agent
def extract_context_and_tools(agent_result) -> Tuple[str, List[str]]:
"""Extracts retrieved text and tool names from AgentResult."""
context = []
tool_calls = []
if not hasattr(agent_result, 'trace') or not agent_result.trace:
return "", []
for span in agent_result.trace.spans:
# Check for tool execution spans
if hasattr(span, 'span_type') and str(span.span_type) == 'tool_execution':
# Tool Name
tool_name = span.tool_call.name
tool_calls.append(tool_name)
# Context from Search/Load Tools
if 'confluence' in tool_name or 'get_application_summary' in tool_name or 'compare' in tool_name:
context.append(f"Source ({tool_name}): {span.tool_result.content}")
return "\n\n".join(context), tool_calls
def get_fraud_explanation(case: Case) -> str:
"""
Task function for basic output evaluation.
Args:
case: Test case with input question
Returns:
Agent's response as string
"""
result = query_agent(case.input, return_full_result=False)
return str(result)
def get_fraud_explanation_with_trace(case: Case) -> dict:
"""
Task function for trajectory and faithfulness evaluation.
Args:
case: Test case with input question
Returns:
Dict with output, trajectory, and context
"""
result = query_agent(case.input, return_full_result=True)
# Extract context and tools
context, tools = extract_context_and_tools(result)
return {
"output": str(result),
"trajectory": tools,
"context": context
}