File size: 1,321 Bytes
bba4fab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env bash
set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT"

MODELS="${MODELS:-gpt-oss}"
ROUTER_AGENT="${ROUTER_AGENT:-hf_hub_community}"
ROUTER_AGENT_CARDS="${ROUTER_AGENT_CARDS:-$ROOT/.fast-agent/tool-cards}"
TIMEOUT="${TIMEOUT:-240}"

RUN_COMMUNITY="${RUN_COMMUNITY:-1}"
RUN_ROUTING="${RUN_ROUTING:-1}"
RUN_DESC_AB="${RUN_DESC_AB:-1}"

echo "[info] root=$ROOT"
echo "[info] models=$MODELS"
echo "[info] router_agent=$ROUTER_AGENT"
echo "[info] router_agent_cards=$ROUTER_AGENT_CARDS"

action() { echo; echo "========== $* =========="; }

if [[ "$RUN_COMMUNITY" == "1" ]]; then
  action "HF Hub community challenge scoring"
  python scripts/score_hf_hub_community_challenges.py --timeout "$TIMEOUT"
fi

if [[ "$RUN_ROUTING" == "1" ]]; then
  action "Tool routing batch"
  python scripts/run_tool_routing_batch.py \
    --models "$MODELS" \
    --agent "$ROUTER_AGENT" \
    --agent-cards "$ROUTER_AGENT_CARDS" \
    --timeout "$TIMEOUT"
fi

if [[ "$RUN_DESC_AB" == "1" ]]; then
  action "Tool description A/B"
  python scripts/eval_tool_description_ab.py \
    --models "$MODELS" \
    --timeout "$TIMEOUT"

  action "Tool description plots + interpretation"
  python scripts/plot_tool_description_eval.py
fi

action "Done"
echo "See docs/RESULTS.md for report locations."