File size: 14,563 Bytes
bd51d10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b7fb4d
 
 
 
 
 
 
 
 
bd51d10
2b7fb4d
bd51d10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
import { useEffect, useState, useMemo } from "react";

// ── Types ────────────────────────────────────────────────────────────────

interface Row {
  query_id: string;
  rationale: string;
  selected_indices: number[];
  k_requested: number;
  k_effective: number;
  excerpt: string;
  new_trajectory: string;
  direct_answer: boolean;
  tool_call_counts: Record<string, number>;
  total_tool_calls: number;
  status: string;
}

// ── Block parsers ────────────────────────────────────────────────────────

type BlockType = "reasoning" | "tool_call" | "tool_result" | "final_answer" | "unknown";
interface Block { type: BlockType; label?: string; content: string }

const BLOCK_STYLES: Record<BlockType, { border: string; labelColor: string; bg: string }> = {
  reasoning:    { border: "border-purple-700", labelColor: "text-purple-400",  bg: "bg-purple-950/30" },
  tool_call:    { border: "border-blue-700",   labelColor: "text-blue-400",    bg: "bg-blue-950/30"   },
  tool_result:  { border: "border-gray-600",   labelColor: "text-gray-400",    bg: "bg-gray-800/30"   },
  final_answer: { border: "border-green-700",  labelColor: "text-green-400",   bg: "bg-green-950/30"  },
  unknown:      { border: "border-gray-700",   labelColor: "text-gray-500",    bg: ""                 },
};

/**
 * Parse the `excerpt` column (reference trajectory format):
 *   [Reasoning]: text
 *   [Tool call] tool_name\narguments:\n{...}
 *   [Tool result]:\n[...]
 */
function parseExcerpt(text: string): Block[] {
  if (!text) return [];
  const blocks: Block[] = [];
  const parts = text.split(/\n\n(?=\[)/);
  for (const part of parts) {
    const p = part.trim();
    if (p.startsWith("[Reasoning]:")) {
      blocks.push({ type: "reasoning", label: "Reasoning", content: p.slice("[Reasoning]:".length).trim() });
    } else if (p.startsWith("[Tool call]")) {
      blocks.push({ type: "tool_call", label: "Tool Call", content: p.slice("[Tool call]".length).trim() });
    } else if (p.startsWith("[Tool result]:")) {
      blocks.push({ type: "tool_result", label: "Tool Result", content: p.slice("[Tool result]:".length).trim() });
    } else if (p.startsWith("[Final answer]:")) {
      blocks.push({ type: "final_answer", label: "Final Answer", content: p.slice("[Final answer]:".length).trim() });
    } else if (p) {
      blocks.push({ type: "unknown", label: "β€”", content: p });
    }
  }
  return blocks;
}

/**
 * Parse the `new_trajectory` column (our formatted output):
 *   [Reasoning]\ntext
 *   [Tool Call: name]\nArguments:\n{...}\n\n[Tool Result]\n{...}
 *   [Final Answer]\ntext
 *   Blocks separated by \n\n---\n\n
 */
function parseNewTrajectory(text: string): Block[] {
  if (!text) return [];
  const blocks: Block[] = [];
  const parts = text.split("\n\n---\n\n");
  for (const part of parts) {
    const p = part.trim();
    if (p.startsWith("[Reasoning]\n")) {
      blocks.push({ type: "reasoning", label: "Reasoning", content: p.slice("[Reasoning]\n".length).trim() });
    } else if (p.startsWith("[Tool Call:")) {
      // Split into call and result at the embedded [Tool Result] marker
      const resultMarker = "\n\n[Tool Result]\n";
      const resultIdx = p.indexOf(resultMarker);
      const headerEnd = p.indexOf("]\n");
      const toolName = headerEnd >= 0 ? p.slice("[Tool Call:".length, headerEnd).trim() : "unknown";
      if (resultIdx >= 0) {
        const callContent = p.slice(0, resultIdx).replace(/^\[Tool Call:[^\]]*\]\n/, "").trim();
        const resultContent = p.slice(resultIdx + resultMarker.length).trim();
        blocks.push({ type: "tool_call",   label: `Tool Call: ${toolName}`,  content: callContent });
        blocks.push({ type: "tool_result", label: "Tool Result", content: resultContent });
      } else {
        const callContent = p.replace(/^\[Tool Call:[^\]]*\]\n/, "").trim();
        blocks.push({ type: "tool_call", label: `Tool Call: ${toolName}`, content: callContent });
      }
    } else if (p.startsWith("[Final Answer]\n")) {
      blocks.push({ type: "final_answer", label: "Final Answer", content: p.slice("[Final Answer]\n".length).trim() });
    } else if (p) {
      blocks.push({ type: "unknown", label: "β€”", content: p });
    }
  }
  return blocks;
}

// ── Trajectory renderer ──────────────────────────────────────────────────

function TrajectoryView({ blocks }: { blocks: Block[] }) {
  if (blocks.length === 0) return <div className="text-gray-500 text-xs italic">No steps.</div>;
  return (
    <div className="space-y-2">
      {blocks.map((b, i) => {
        const s = BLOCK_STYLES[b.type];
        return (
          <div key={i} className={`border-l-2 ${s.border} ${s.bg} pl-3 py-1.5 rounded-r`}>
            <div className={`text-[10px] font-bold uppercase tracking-widest mb-1 ${s.labelColor}`}>
              {b.label ?? b.type}
            </div>
            <pre className="text-xs text-gray-300 whitespace-pre-wrap font-mono leading-relaxed">{b.content}</pre>
          </div>
        );
      })}
    </div>
  );
}

// ── Filter type ──────────────────────────────────────────────────────────

type FilterMode = "all" | "direct" | "searched";

// ── Main component ───────────────────────────────────────────────────────

export default function SelectedToolsApp() {
  const [data, setData] = useState<Row[]>([]);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);
  const [selectedIdx, setSelectedIdx] = useState(0);
  const [search, setSearch] = useState("");
  const [filter, setFilter] = useState<FilterMode>("all");

  useEffect(() => {
    setLoading(true);
    fetch("/api/selected-tools/")
      .then(r => { if (!r.ok) throw new Error(r.statusText); return r.json(); })
      .then((d: { rows: Row[] }) => { setData(d.rows); setLoading(false); })
      .catch(e => { setError(e.message); setLoading(false); });
  }, []);

  const filtered = useMemo(() => {
    let rows = data;
    if (filter === "direct")   rows = rows.filter(r => r.direct_answer);
    if (filter === "searched") rows = rows.filter(r => !r.direct_answer);
    if (search.trim()) {
      const q = search.toLowerCase();
      rows = rows.filter(r => r.query_id.includes(q));
    }
    return rows;
  }, [data, search, filter]);

  const current = filtered[selectedIdx] ?? null;

  const excerptBlocks    = useMemo(() => current ? parseExcerpt(current.excerpt) : [],         [current]);
  const trajectoryBlocks = useMemo(() => current ? parseNewTrajectory(current.new_trajectory) : [], [current]);

  // Stats
  const directCount    = data.filter(r => r.direct_answer).length;
  const directPct      = data.length ? Math.round(100 * directCount / data.length) : 0;

  if (loading) return <div className="h-full flex items-center justify-center text-gray-400">Loading from HuggingFace…</div>;
  if (error)   return <div className="h-full flex items-center justify-center text-red-400">Error: {error}</div>;

  return (
    <div className="h-full flex overflow-hidden bg-gray-950 text-gray-100">

      {/* ── Sidebar ──────────────────────────────────────────────── */}
      <div className="w-64 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900">

        {/* Stats banner */}
        <div className="px-3 py-2 border-b border-gray-800 bg-gray-900/80">
          <div className="text-[10px] text-gray-500 uppercase tracking-widest mb-1">Direct-answer rate</div>
          <div className="text-lg font-bold text-emerald-400">{directPct}%</div>
          <div className="text-[10px] text-gray-600">{directCount} / {data.length} no tool calls</div>
        </div>

        {/* Filter toggles */}
        <div className="flex gap-1 px-2 py-2 border-b border-gray-800">
          {(["all", "direct", "searched"] as FilterMode[]).map(m => (
            <button
              key={m}
              onClick={() => { setFilter(m); setSelectedIdx(0); }}
              className={`flex-1 text-[10px] py-1 rounded border transition-colors capitalize ${
                filter === m
                  ? m === "direct"   ? "bg-emerald-900/60 border-emerald-600 text-emerald-300"
                  : m === "searched" ? "bg-blue-900/60 border-blue-600 text-blue-300"
                  :                   "bg-gray-700 border-gray-500 text-gray-200"
                  : "bg-gray-800/50 border-gray-700 text-gray-500 hover:border-gray-500"
              }`}
            >{m}</button>
          ))}
        </div>

        {/* Search */}
        <div className="px-2 py-1.5 border-b border-gray-800">
          <input
            type="text"
            placeholder="Search query ID…"
            value={search}
            onChange={e => { setSearch(e.target.value); setSelectedIdx(0); }}
            className="w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"
          />
          <div className="text-[10px] text-gray-600 mt-1">{filtered.length} / {data.length}</div>
        </div>

        {/* Query list */}
        <div className="flex-1 overflow-y-auto">
          {filtered.map((row, i) => (
            <button
              key={row.query_id}
              onClick={() => setSelectedIdx(i)}
              className={`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${
                selectedIdx === i
                  ? "bg-blue-900/40 text-blue-200 border-l-2 border-l-blue-500"
                  : "text-gray-400 hover:bg-gray-800"
              }`}
            >
              <div className="flex items-center justify-between">
                <span className="font-medium text-gray-200">#{row.query_id}</span>
                {row.direct_answer
                  ? <span className="text-[9px] px-1.5 py-0.5 rounded-full bg-emerald-900/60 text-emerald-400 border border-emerald-800">direct</span>
                  : <span className="text-[9px] px-1.5 py-0.5 rounded-full bg-blue-900/60 text-blue-400 border border-blue-800">{row.total_tool_calls} calls</span>
                }
              </div>
              <div className="text-[10px] text-gray-600 mt-0.5">k={row.k_effective} selected steps</div>
            </button>
          ))}
        </div>
      </div>

      {/* ── Main: two-column side-by-side ──────────────────────── */}
      {current ? (
        <div className="flex-1 flex flex-col min-w-0 overflow-hidden">

          {/* Header */}
          <div className="px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0">
            <div className="flex items-center gap-3 flex-wrap">
              <span className="text-sm font-medium text-gray-100">Query #{current.query_id}</span>
              {current.direct_answer
                ? <span className="text-xs px-2 py-0.5 rounded-full bg-emerald-900/50 text-emerald-300 border border-emerald-800">Direct answer</span>
                : <span className="text-xs px-2 py-0.5 rounded-full bg-blue-900/50 text-blue-300 border border-blue-800">{current.total_tool_calls} tool calls</span>
              }
              <span className="text-xs text-gray-500">k={current.k_effective} steps selected</span>
              <span className={`text-xs px-2 py-0.5 rounded-full ${current.status === "completed" ? "bg-gray-800 text-gray-400" : "bg-amber-900/50 text-amber-300"}`}>
                {current.status}
              </span>
            </div>
            {/* Selected indices */}
            <div className="mt-1.5 flex items-center gap-1.5 flex-wrap">
              <span className="text-[10px] font-bold uppercase tracking-widest text-gray-500">Selected steps</span>
              {current.selected_indices.map(idx => (
                <span key={idx} className="text-[10px] px-1.5 py-0.5 rounded bg-gray-800 border border-gray-700 text-gray-300 font-mono">
                  #{idx}
                </span>
              ))}
            </div>
            {/* Rationale */}
            <div className="mt-1.5 text-xs text-gray-400 leading-snug bg-gray-800/50 rounded px-2 py-1.5 border border-gray-700">
              <span className="text-[10px] font-bold uppercase tracking-widest text-amber-500 mr-2">Rationale</span>
              {current.rationale}
            </div>
          </div>

          {/* Side-by-side columns */}
          <div className="flex-1 flex overflow-hidden min-w-0">

            {/* Left: excerpt (selected tool calls from reference trajectory) */}
            <div className="flex-1 flex flex-col min-w-0 border-r border-gray-800 overflow-hidden">
              <div className="px-3 py-1.5 bg-gray-900/40 border-b border-gray-800 shrink-0">
                <span className="text-[11px] font-semibold text-amber-400 uppercase tracking-widest">Selected Tool Calls</span>
                <span className="text-[10px] text-gray-600 ml-2">reference trajectory Β· {current.k_effective} steps</span>
              </div>
              <div className="flex-1 overflow-y-auto p-3">
                <TrajectoryView blocks={excerptBlocks} />
              </div>
            </div>

            {/* Right: new trajectory */}
            <div className="flex-1 flex flex-col min-w-0 overflow-hidden">
              <div className="px-3 py-1.5 bg-gray-900/40 border-b border-gray-800 shrink-0">
                <span className="text-[11px] font-semibold text-sky-400 uppercase tracking-widest">New Trajectory</span>
                <span className="text-[10px] text-gray-600 ml-2">gpt-oss-120b Β· conditioned on selected steps</span>
              </div>
              <div className="flex-1 overflow-y-auto p-3">
                <TrajectoryView blocks={trajectoryBlocks} />
              </div>
            </div>

          </div>
        </div>
      ) : (
        <div className="flex-1 flex items-center justify-center text-gray-500">No query selected.</div>
      )}

    </div>
  );
}