Spaces:
Runtime error
Runtime error
File size: 1,854 Bytes
d87a140 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | """
Task functions that wrap the fraud agent for evaluation.
"""
from typing import List, Tuple
from strands_evals import Case
from app import query_agent
def extract_context_and_tools(agent_result) -> Tuple[str, List[str]]:
"""Extracts retrieved text and tool names from AgentResult."""
context = []
tool_calls = []
if not hasattr(agent_result, 'trace') or not agent_result.trace:
return "", []
for span in agent_result.trace.spans:
# Check for tool execution spans
if hasattr(span, 'span_type') and str(span.span_type) == 'tool_execution':
# Tool Name
tool_name = span.tool_call.name
tool_calls.append(tool_name)
# Context from Search/Load Tools
if 'confluence' in tool_name or 'get_application_summary' in tool_name or 'compare' in tool_name:
context.append(f"Source ({tool_name}): {span.tool_result.content}")
return "\n\n".join(context), tool_calls
def get_fraud_explanation(case: Case) -> str:
"""
Task function for basic output evaluation.
Args:
case: Test case with input question
Returns:
Agent's response as string
"""
result = query_agent(case.input, return_full_result=False)
return str(result)
def get_fraud_explanation_with_trace(case: Case) -> dict:
"""
Task function for trajectory and faithfulness evaluation.
Args:
case: Test case with input question
Returns:
Dict with output, trajectory, and context
"""
result = query_agent(case.input, return_full_result=True)
# Extract context and tools
context, tools = extract_context_and_tools(result)
return {
"output": str(result),
"trajectory": tools,
"context": context
}
|