timchen0618 commited on
Commit
30fb9c5
·
1 Parent(s): 8026e0e

Add question/answer/accuracy to Scout Runs tab; fix selected-tools reload cache

Browse files

Scout Runs: accuracy %, ✓/✗ badges in sidebar, question+answer in header.
Selected Tools: same pattern already deployed; reload now deletes HF cache first.
Both backends load question/correct_answer/correct from HF datasets.
Patch scripts for adding eval columns to existing scout-run and orig-analysis datasets.

backend/api/scout_runs.py CHANGED
@@ -100,6 +100,9 @@ def _load(variant: str) -> list:
100
  "new_tool_calls": new_counts,
101
  "scout_total_calls": sum(scout_counts.values()),
102
  "new_total_calls": sum(new_counts.values()),
 
 
 
103
  })
104
  _cache[variant] = rows
105
  return rows
@@ -122,10 +125,19 @@ def get_variants():
122
 
123
  @bp.post("/reload")
124
  def reload_data():
 
125
  variant = request.args.get("variant", DEFAULT_VARIANT)
126
  if variant in _cache:
127
  del _cache[variant]
 
 
 
128
  try:
 
 
 
 
 
129
  rows = _load(variant)
130
  return jsonify({"status": "ok", "count": len(rows), "variant": variant})
131
  except Exception as e:
 
100
  "new_tool_calls": new_counts,
101
  "scout_total_calls": sum(scout_counts.values()),
102
  "new_total_calls": sum(new_counts.values()),
103
+ "question": row.get("question") or "",
104
+ "correct_answer": row.get("correct_answer") or "",
105
+ "correct": row.get("correct"),
106
  })
107
  _cache[variant] = rows
108
  return rows
 
125
 
126
  @bp.post("/reload")
127
  def reload_data():
128
+ import shutil, os
129
  variant = request.args.get("variant", DEFAULT_VARIANT)
130
  if variant in _cache:
131
  del _cache[variant]
132
+ if variant not in VARIANTS:
133
+ return jsonify({"error": f"Unknown variant: {variant!r}"}), 400
134
+ repo = VARIANTS[variant]["repo"]
135
  try:
136
+ cache_base = os.path.expanduser("~/.cache/huggingface/hub")
137
+ dataset_cache_name = "datasets--" + repo.replace("/", "--")
138
+ dataset_cache_path = os.path.join(cache_base, dataset_cache_name)
139
+ if os.path.exists(dataset_cache_path):
140
+ shutil.rmtree(dataset_cache_path)
141
  rows = _load(variant)
142
  return jsonify({"status": "ok", "count": len(rows), "variant": variant})
143
  except Exception as e:
build_scout_runs_with_eval.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Patch an existing scout-runs HF dataset to add question/correct_answer/correct columns
4
+ by joining with eval result files (*_eval.json).
5
+
6
+ Usage:
7
+ $PYTHON build_scout_runs_with_eval.py \
8
+ --repo timchen0618/browsecomp-plus-scout-runs-test300-gpt-oss-120b-v1 \
9
+ --eval-dir /path/to/evals/.../traj_budget_orig_ext_gpt-oss-120b_seed0
10
+
11
+ Python env: /scratch/hc3337/envs/raca-py312/bin/python
12
+ """
13
+ from __future__ import annotations
14
+ import argparse, json, sys, os
15
+ from pathlib import Path
16
+
17
+ os.environ.setdefault("HF_HOME", "/scratch/hc3337/.cache/huggingface")
18
+
19
+
20
+ def load_eval_data(eval_dir: Path) -> dict:
21
+ """Load eval files and return query_id -> {question, correct_answer, correct}."""
22
+ eval_map: dict = {}
23
+ for p in eval_dir.glob("*_eval.json"):
24
+ try:
25
+ d = json.load(p.open("r", encoding="utf-8"))
26
+ qid_raw = str(d.get("query_id", "")).strip()
27
+ qid = int(qid_raw) if qid_raw.isdigit() else qid_raw
28
+ jr = d.get("judge_result") or {}
29
+ correct_val = jr.get("correct")
30
+ eval_map[qid] = {
31
+ "question": str(d.get("question") or ""),
32
+ "correct_answer": str(d.get("correct_answer") or ""),
33
+ "correct": bool(correct_val) if correct_val is not None else None,
34
+ }
35
+ except Exception as e:
36
+ print(f"warning: skipping {p.name}: {e}", file=sys.stderr)
37
+ print(f"Loaded {len(eval_map)} eval entries from {eval_dir}", file=sys.stderr)
38
+ return eval_map
39
+
40
+
41
+ def main():
42
+ ap = argparse.ArgumentParser()
43
+ ap.add_argument("--repo", required=True, help="HF repo ID to patch.")
44
+ ap.add_argument("--eval-dir", type=Path, required=True,
45
+ help="Directory of *_eval.json files.")
46
+ ap.add_argument("--commit-message", default="Add question/correct_answer/correct columns")
47
+ args = ap.parse_args()
48
+
49
+ from datasets import load_dataset, Dataset
50
+
51
+ eval_map = load_eval_data(args.eval_dir.resolve())
52
+
53
+ print(f"Loading {args.repo}...", file=sys.stderr)
54
+ ds = load_dataset(args.repo, split="train")
55
+ print(f"Loaded {len(ds)} rows. Columns: {ds.column_names}", file=sys.stderr)
56
+
57
+ rows = []
58
+ matched = 0
59
+ for row in ds:
60
+ qid_raw = str(row["query_id"]).strip()
61
+ qid = int(qid_raw) if qid_raw.isdigit() else qid_raw
62
+ ev = eval_map.get(qid, {})
63
+ if ev:
64
+ matched += 1
65
+ r = dict(row)
66
+ r["question"] = ev.get("question", "")
67
+ r["correct_answer"] = ev.get("correct_answer", "")
68
+ r["correct"] = ev.get("correct", None)
69
+ rows.append(r)
70
+
71
+ print(f"Matched {matched}/{len(rows)} rows with eval data.", file=sys.stderr)
72
+ correct_count = sum(1 for r in rows if r.get("correct") is True)
73
+ if matched:
74
+ print(f"Accuracy: {correct_count}/{matched} ({100*correct_count//matched}%)", file=sys.stderr)
75
+
76
+ ds_new = Dataset.from_list(rows)
77
+ ds_new.push_to_hub(args.repo, split="train", commit_message=args.commit_message)
78
+ print(f"Pushed {len(rows)} rows to {args.repo}.")
79
+
80
+
81
+ if __name__ == "__main__":
82
+ main()
frontend/dist/assets/{ExperimentsApp-MhV_BmUC.js → ExperimentsApp-BXbEFU8h.js} RENAMED
The diff for this file is too large to render. See raw diff
 
frontend/dist/assets/{ModelApp-ByP0hVWF.js → ModelApp-D9tEU6of.js} RENAMED
@@ -1,4 +1,4 @@
1
- import{r as i,p as re,a as le,j as e}from"./index-BFtcA7GQ.js";const U="/api/model",X="/api/presets/model";async function L(t,l){const n=await fetch(t,{headers:{"Content-Type":"application/json"},...l});if(!n.ok){const c=await n.json().catch(()=>({error:n.statusText}));throw new Error(c.error||n.statusText)}return n.json()}const A={loadDataset(t,l,n,c){return L(`${U}/datasets/load`,{method:"POST",body:JSON.stringify({repo:t,column:l,split:n,prompt_column:c})})},listDatasets(){return L(`${U}/datasets/`)},getQuestion(t,l){return L(`${U}/datasets/${t}/question/${l}`)},getSummary(t){return L(`${U}/datasets/${t}/summary`)},unloadDataset(t){return L(`${U}/datasets/${t}`,{method:"DELETE"})},listPresets(){return L(`${X}`)},createPreset(t,l,n,c){return L(`${X}`,{method:"POST",body:JSON.stringify({name:t,repo:l,column:n,split:c})})},updatePreset(t,l){return L(`${X}/${t}`,{method:"PUT",body:JSON.stringify(l)})},deletePreset(t){return L(`${X}/${t}`,{method:"DELETE"})}};function ie(){const t=i.useRef(re().params),[l,n]=i.useState([]),[c,p]=i.useState([]),[s,x]=i.useState("all"),[b,g]=i.useState({}),[h,S]=i.useState({}),[I,j]=i.useState(null),[w,k]=i.useState({}),[o,m]=i.useState(null);i.useEffect(()=>{A.listPresets().then(p).catch(()=>{})},[]),i.useEffect(()=>{const a=re().params,d=parseInt(a.get("q")||"0"),f=parseInt(a.get("s")||"0"),u=a.get("filter")||"all";x(u),(!isNaN(d)||!isNaN(f))&&(window.__initialQ=isNaN(d)?0:d,window.__initialS=isNaN(f)?0:f)},[]);const y=i.useMemo(()=>{const r={};for(const a of l){const d=a.questionFingerprint;r[d]||(r[d]=[]),r[d].push(a)}return r},[l]),P=i.useMemo(()=>Object.keys(y).sort(),[y]);i.useEffect(()=>{if(o&&y[o])return;const r=P.find(a=>y[a].some(d=>d.active));r?m(r):P.length>0?m(P[0]):m(null)},[P,y,o]);const N=i.useMemo(()=>l.filter(r=>r.active&&r.questionFingerprint===o),[l,o]),[F,W]=i.useState([]);i.useEffect(()=>{const r=new Set(N.map(a=>a.id));W(a=>{const d=a.filter(v=>r.has(v)),f=N.map(v=>v.id).filter(v=>!a.includes(v)),u=[...d,...f];return u.length===a.length&&u.every((v,C)=>v===a[C])?a:u})},[N]);const H=i.useMemo(()=>{const r=new Map(N.map(a=>[a.id,a]));return F.map(a=>r.get(a)).filter(a=>a!==void 0)},[N,F]),Y=i.useCallback((r,a)=>{r!==a&&W(d=>{const f=[...d],u=f.indexOf(r),v=f.indexOf(a);return u===-1||v===-1?d:(f.splice(u,1),f.splice(v,0,r),f)})},[]),D=o?w[o]:void 0,E=(D==null?void 0:D.questionIdx)??0,M=(D==null?void 0:D.sampleIdx)??0,G=i.useCallback(r=>{o&&k(a=>{const d=a[o]??{questionIdx:0,sampleIdx:0},f=typeof r=="function"?r(d.questionIdx):r;return{...a,[o]:{...d,questionIdx:f}}})},[o]),B=i.useCallback(r=>{o&&k(a=>{const d=a[o]??{questionIdx:0,sampleIdx:0},f=typeof r=="function"?r(d.sampleIdx):r;return{...a,[o]:{...d,sampleIdx:f}}})},[o]);i.useEffect(()=>{const r=new URLSearchParams,a=l.filter(d=>d.active);a.length>0&&(r.set("repos",a.map(d=>d.repo).join(",")),r.set("cols",a.map(d=>d.column).join(",")),r.set("pcols",a.map(d=>d.promptColumn||"formatted_prompt").join(","))),r.set("q",String(E)),r.set("s",String(M)),s!=="all"&&r.set("filter",s),o&&r.set("group",o),le({params:r})},[l,E,M,s,o]),i.useEffect(()=>{N.forEach(r=>{const a=`${r.id}:${E}`;b[a]||A.getQuestion(r.id,E).then(d=>{g(f=>({...f,[a]:d}))}).catch(()=>{})})},[E,N]);const O=i.useCallback(async(r,a,d,f,u,v)=>{S(C=>({...C,[r]:!0})),j(null);try{const{question_fingerprint:C,...Q}=await A.loadDataset(r,a,d,f),V=C??"",ee={...Q,questionFingerprint:V,active:!0,presetId:u,presetName:v};n(T=>T.some(q=>q.id===ee.id)?T:[...T,ee]),k(T=>{if(T[V])return T;const q=window,ne=typeof q.__initialQ=="number"?q.__initialQ:0,oe=typeof q.__initialS=="number"?q.__initialS:0,te=Object.keys(T).length===0;return{...T,[V]:{questionIdx:te?ne:0,sampleIdx:te?oe:0}}}),m(V)}catch(C){j(C instanceof Error?C.message:"Failed to load dataset")}finally{S(C=>({...C,[r]:!1}))}},[]);i.useEffect(()=>{var u,v,C;const r=t.current,a=((u=r.get("repos"))==null?void 0:u.split(",").filter(Boolean))||[],d=((v=r.get("cols"))==null?void 0:v.split(","))||[],f=((C=r.get("pcols"))==null?void 0:C.split(","))||[];for(let Q=0;Q<a.length;Q++)O(a[Q],d[Q]||void 0,void 0,f[Q]||void 0)},[O]);const R=i.useCallback(async r=>{await A.unloadDataset(r).catch(()=>{}),n(a=>a.filter(d=>d.id!==r))},[]),$=i.useCallback(r=>{n(a=>{const d=a.map(u=>u.id===r?{...u,active:!u.active}:u),f=d.find(u=>u.id===r);return f&&f.active&&m(f.questionFingerprint),d})},[]),_=i.useCallback((r,a)=>{n(d=>d.map(f=>f.id===r?{...f,presetName:a}:f))},[]),J=i.useCallback(r=>{n(a=>a.map(d=>d.id===r?{...d,presetId:void 0,presetName:void 0}:d))},[]),K=Math.min(...N.map(r=>r.n_rows),1/0),z=Math.max(...N.map(r=>r.n_samples),0);return{datasets:l,presets:c,setPresets:p,questionIdx:E,setQuestionIdx:G,sampleIdx:M,setSampleIdx:B,filter:s,setFilter:x,loading:h,error:I,setError:j,activeDatasets:N,orderedActiveDatasets:H,maxQuestions:K,maxSamples:z,addDataset:O,removeDataset:R,toggleDataset:$,updateDatasetPresetName:_,clearDatasetPreset:J,getQuestionData:r=>b[`${r}:${E}`],reorderPanels:Y,groups:y,groupIds:P,currentGroupId:o,setCurrentGroupId:m}}const se=[{bg:"bg-blue-500",border:"border-blue-500",text:"text-blue-400",label:"text-blue-300"},{bg:"bg-emerald-500",border:"border-emerald-500",text:"text-emerald-400",label:"text-emerald-300"},{bg:"bg-amber-500",border:"border-amber-500",text:"text-amber-400",label:"text-amber-300"},{bg:"bg-purple-500",border:"border-purple-500",text:"text-purple-400",label:"text-purple-300"},{bg:"bg-rose-500",border:"border-rose-500",text:"text-rose-400",label:"text-rose-300"},{bg:"bg-cyan-500",border:"border-cyan-500",text:"text-cyan-400",label:"text-cyan-300"}];function ce({datasets:t,presets:l,loading:n,groups:c,groupIds:p,currentGroupId:s,onAddDataset:x,onRemoveDataset:b,onToggleDataset:g,onSetCurrentGroup:h,onLoadPreset:S,onSavePreset:I,onDeletePreset:j,onUpdatePreset:w}){const[k,o]=i.useState(!1),[m,y]=i.useState(""),[P,N]=i.useState(""),[F,W]=i.useState("train"),[H,Y]=i.useState(""),[D,E]=i.useState(""),[M,G]=i.useState(null),[B,O]=i.useState(""),[R,$]=i.useState(null),[_,J]=i.useState(""),K=()=>{m.trim()&&(x(m.trim(),P.trim()||void 0,F.trim()||void 0,H.trim()||void 0),y(""),o(!1))},z=r=>{B.trim()&&(I(B.trim(),r.repo,r.column,r.split),O(""),G(null))},Z=r=>{const a=p.indexOf(r);return se[a%se.length]};return e.jsxs("div",{className:"w-72 min-w-72 bg-gray-900 border-r border-gray-700 flex flex-col h-full",children:[e.jsxs("div",{className:"p-3 border-b border-gray-700",children:[e.jsx("div",{className:"flex items-center justify-between mb-2",children:e.jsx("h3",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wider",children:"Presets"})}),l.length===0?e.jsx("p",{className:"text-xs text-gray-500 italic",children:"No presets saved"}):e.jsxs(e.Fragment,{children:[l.length>6&&e.jsx("input",{type:"text",value:D,onChange:r=>E(r.target.value),placeholder:"Search presets...",className:"w-full px-2 py-1 mb-2 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"}),e.jsx("div",{className:"flex flex-wrap gap-1 max-h-32 overflow-y-auto",children:l.filter(r=>!D||r.name.toLowerCase().includes(D.toLowerCase())||r.repo.toLowerCase().includes(D.toLowerCase())).map(r=>e.jsxs("div",{className:"group relative",children:[e.jsx("button",{onClick:()=>S(r),className:"px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 rounded border border-gray-600 text-gray-300 transition-colors",title:`${r.repo} (${r.column}, ${r.split??"train"})`,children:r.name}),e.jsx("div",{className:"hidden group-hover:flex absolute top-full left-0 mt-1 z-10 gap-1",children:e.jsx("button",{onClick:()=>j(r.id),className:"px-1.5 py-0.5 text-[10px] bg-red-900 hover:bg-red-800 rounded text-red-300",children:"Delete"})})]},r.id))})]})]}),e.jsxs("div",{className:"flex-1 overflow-y-auto p-3",children:[e.jsx("h3",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wider mb-2",children:"Loaded Repos"}),t.length===0?e.jsx("p",{className:"text-xs text-gray-500 italic",children:"No repos loaded. Add one below."}):e.jsx("div",{className:"space-y-3",children:p.map(r=>{const a=Z(r),d=c[r],f=r===s;return e.jsxs("div",{children:[e.jsxs("button",{onClick:()=>h(r),className:`w-full flex items-center gap-1.5 mb-1 px-1 py-0.5 rounded transition-colors ${f?"bg-gray-800":"hover:bg-gray-800/50"}`,children:[e.jsx("span",{className:`inline-block w-2 h-2 rounded-full ${a.bg} shrink-0`}),e.jsxs("span",{className:`text-[10px] font-semibold uppercase tracking-wider ${f?a.label:"text-gray-500"}`,children:["Group ",p.indexOf(r)+1,e.jsxs("span",{className:"normal-case font-normal ml-1 text-gray-600",children:["(",d.length," repo",d.length!==1?"s":"",")"]})]}),f&&e.jsx("span",{className:"text-[9px] text-gray-600 ml-auto",children:"viewing"})]}),e.jsx("div",{className:`space-y-1 border-l-2 ml-1 pl-2 ${f?a.border:"border-gray-700"}`,children:d.map(u=>e.jsxs("div",{children:[e.jsxs("div",{onClick:()=>{u.presetId&&($(R===u.id?null:u.id),J(u.presetName||""),o(!1))},className:`flex items-center gap-2 px-2 py-1.5 rounded text-sm transition-colors ${u.active?"bg-gray-800":"bg-gray-900 opacity-60"} ${R===u.id?"ring-1 ring-blue-500":""} ${u.presetId?"cursor-pointer":""}`,children:[e.jsx("input",{type:"checkbox",checked:u.active,onChange:()=>g(u.id),onClick:v=>v.stopPropagation(),className:"rounded border-gray-600 bg-gray-800 text-blue-500 focus:ring-blue-500 focus:ring-offset-0"}),e.jsxs("div",{className:"flex-1 min-w-0",children:[e.jsx("div",{className:"text-gray-200 truncate text-xs font-medium",title:u.presetName?`${u.presetName}
2
  ${u.repo}`:u.repo,children:u.presetName||u.name}),e.jsxs("div",{className:"text-[10px] text-gray-500",children:[u.column," | ",u.n_rows," rows | ",u.n_samples," samples"]})]}),e.jsx("button",{onClick:v=>{v.stopPropagation(),G(M===u.id?null:u.id),O("")},className:`transition-colors shrink-0 ${M===u.id?"text-blue-400":"text-gray-600 hover:text-blue-400"}`,title:"Save as preset",children:e.jsx("svg",{className:"w-3.5 h-3.5",fill:"none",viewBox:"0 0 24 24",stroke:"currentColor",children:e.jsx("path",{strokeLinecap:"round",strokeLinejoin:"round",strokeWidth:2,d:"M5 5a2 2 0 012-2h10a2 2 0 012 2v16l-7-3.5L5 21V5z"})})}),e.jsx("button",{onClick:v=>{v.stopPropagation(),b(u.id)},className:"text-gray-600 hover:text-red-400 transition-colors shrink-0",title:"Remove",children:e.jsx("svg",{className:"w-3.5 h-3.5",fill:"none",viewBox:"0 0 24 24",stroke:"currentColor",children:e.jsx("path",{strokeLinecap:"round",strokeLinejoin:"round",strokeWidth:2,d:"M6 18L18 6M6 6l12 12"})})})]}),M===u.id&&e.jsxs("div",{className:"flex gap-1 mt-1 ml-6",children:[e.jsx("input",{type:"text",value:B,onChange:v=>O(v.target.value),onKeyDown:v=>{v.key==="Enter"&&z(u),v.key==="Escape"&&G(null)},placeholder:"Preset name...",className:"flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none",autoFocus:!0}),e.jsx("button",{onClick:()=>z(u),className:"px-2 py-1 text-xs bg-blue-600 hover:bg-blue-500 rounded text-white",children:"Save"})]})]},u.id))})]},r)})})]}),R&&(()=>{const r=t.find(a=>a.id===R);return r!=null&&r.presetId?e.jsxs("div",{className:"p-3 border-t border-gray-700 space-y-2",children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase font-semibold tracking-wider",children:"Edit Preset"}),e.jsx("input",{type:"text",value:_,onChange:a=>J(a.target.value),onKeyDown:a=>{a.key==="Enter"&&_.trim()&&(w(r.presetId,r.id,{name:_.trim()}),$(null)),a.key==="Escape"&&$(null)},placeholder:"Preset name...",className:"w-full px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none",autoFocus:!0}),e.jsxs("div",{className:"flex gap-2",children:[e.jsx("button",{onClick:()=>{_.trim()&&(w(r.presetId,r.id,{name:_.trim()}),$(null))},disabled:!_.trim(),className:"flex-1 px-2 py-1 text-xs bg-blue-600 hover:bg-blue-500 disabled:bg-gray-700 disabled:text-gray-500 rounded text-white transition-colors",children:"Save"}),e.jsx("button",{onClick:()=>{j(r.presetId,r.id),$(null)},className:"px-2 py-1 text-xs bg-red-900 hover:bg-red-800 rounded text-red-300 transition-colors",children:"Delete"}),e.jsx("button",{onClick:()=>$(null),className:"px-2 py-1 text-xs bg-gray-700 hover:bg-gray-600 rounded text-gray-300 transition-colors",children:"Cancel"})]})]}):null})(),e.jsx("div",{className:"p-3 border-t border-gray-700",children:k?e.jsxs("div",{className:"space-y-2",children:[e.jsx("input",{type:"text",value:m,onChange:r=>y(r.target.value),onKeyDown:r=>r.key==="Enter"&&K(),placeholder:"org/dataset-name",className:"w-full px-2 py-1.5 text-sm bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none",autoFocus:!0}),e.jsxs("div",{className:"flex gap-2",children:[e.jsx("input",{type:"text",value:P,onChange:r=>N(r.target.value),placeholder:"Response col (auto-detect)",className:"flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"}),e.jsx("input",{type:"text",value:F,onChange:r=>W(r.target.value),placeholder:"Split",className:"w-16 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"})]}),e.jsx("div",{className:"flex gap-2",children:e.jsx("input",{type:"text",value:H,onChange:r=>Y(r.target.value),placeholder:"Prompt col (auto-detect)",className:"flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"})}),e.jsxs("div",{className:"flex gap-2",children:[e.jsx("button",{onClick:K,disabled:!m.trim()||n[m.trim()],className:"flex-1 px-2 py-1.5 text-sm bg-blue-600 hover:bg-blue-500 disabled:bg-gray-700 disabled:text-gray-500 rounded text-white transition-colors",children:n[m.trim()]?"Loading...":"Load"}),e.jsx("button",{onClick:()=>o(!1),className:"px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded text-gray-300 transition-colors",children:"Cancel"})]})]}):e.jsx("button",{onClick:()=>{$(null),o(!0),y(""),N(""),W("train"),Y("")},className:"w-full px-3 py-2 text-sm bg-blue-600 hover:bg-blue-500 rounded text-white font-medium transition-colors",children:"+ Add Repo"})})]})}function de(t){if(!t)return[{text:"(no response)",className:"text-gray-500 italic"}];const l=[],n=t.split(`
3
  `);for(let c=0;c<n.length;c++){const p=n[c],s=p.toLowerCase().trim();let x="text-gray-300";s.startsWith("wait")||s.startsWith("hmm")||s.startsWith("but wait")?x="text-yellow-400":s.startsWith("let me try")||s.startsWith("let me reconsider")||s.startsWith("let me think")?x="text-cyan-400":s.startsWith("so the answer")||s.startsWith("so the expression")||s.startsWith("therefore")||s.startsWith("the final")?x="text-green-400 font-bold":s.startsWith("i give up")||s.startsWith("i can't find")||s.startsWith("i'm stuck")||s.startsWith("i'm sorry")?x="text-red-400 font-bold":p.includes("=")&&/[+\-*/]/.test(p)&&(x="text-gray-100"),l.push({text:p,className:x}),c<n.length-1&&l.push({text:`
4
  `,className:""})}return l}function ue(t){if(!t||!t.trim())return[];try{const l=JSON.parse(t);if(Array.isArray(l)&&l.length>0&&l[0].role!==void 0)return l.map(n=>({role:String(n.role||"unknown"),content:String(n.content??"")}))}catch{}if(t.includes("<|im_start|>"))return t.split("<|im_start|>").filter(Boolean).map(n=>{const c=n.indexOf(`
 
1
+ import{r as i,p as re,a as le,j as e}from"./index-BcY8Ufng.js";const U="/api/model",X="/api/presets/model";async function L(t,l){const n=await fetch(t,{headers:{"Content-Type":"application/json"},...l});if(!n.ok){const c=await n.json().catch(()=>({error:n.statusText}));throw new Error(c.error||n.statusText)}return n.json()}const A={loadDataset(t,l,n,c){return L(`${U}/datasets/load`,{method:"POST",body:JSON.stringify({repo:t,column:l,split:n,prompt_column:c})})},listDatasets(){return L(`${U}/datasets/`)},getQuestion(t,l){return L(`${U}/datasets/${t}/question/${l}`)},getSummary(t){return L(`${U}/datasets/${t}/summary`)},unloadDataset(t){return L(`${U}/datasets/${t}`,{method:"DELETE"})},listPresets(){return L(`${X}`)},createPreset(t,l,n,c){return L(`${X}`,{method:"POST",body:JSON.stringify({name:t,repo:l,column:n,split:c})})},updatePreset(t,l){return L(`${X}/${t}`,{method:"PUT",body:JSON.stringify(l)})},deletePreset(t){return L(`${X}/${t}`,{method:"DELETE"})}};function ie(){const t=i.useRef(re().params),[l,n]=i.useState([]),[c,p]=i.useState([]),[s,x]=i.useState("all"),[b,g]=i.useState({}),[h,S]=i.useState({}),[I,j]=i.useState(null),[w,k]=i.useState({}),[o,m]=i.useState(null);i.useEffect(()=>{A.listPresets().then(p).catch(()=>{})},[]),i.useEffect(()=>{const a=re().params,d=parseInt(a.get("q")||"0"),f=parseInt(a.get("s")||"0"),u=a.get("filter")||"all";x(u),(!isNaN(d)||!isNaN(f))&&(window.__initialQ=isNaN(d)?0:d,window.__initialS=isNaN(f)?0:f)},[]);const y=i.useMemo(()=>{const r={};for(const a of l){const d=a.questionFingerprint;r[d]||(r[d]=[]),r[d].push(a)}return r},[l]),P=i.useMemo(()=>Object.keys(y).sort(),[y]);i.useEffect(()=>{if(o&&y[o])return;const r=P.find(a=>y[a].some(d=>d.active));r?m(r):P.length>0?m(P[0]):m(null)},[P,y,o]);const N=i.useMemo(()=>l.filter(r=>r.active&&r.questionFingerprint===o),[l,o]),[F,W]=i.useState([]);i.useEffect(()=>{const r=new Set(N.map(a=>a.id));W(a=>{const d=a.filter(v=>r.has(v)),f=N.map(v=>v.id).filter(v=>!a.includes(v)),u=[...d,...f];return u.length===a.length&&u.every((v,C)=>v===a[C])?a:u})},[N]);const H=i.useMemo(()=>{const r=new Map(N.map(a=>[a.id,a]));return F.map(a=>r.get(a)).filter(a=>a!==void 0)},[N,F]),Y=i.useCallback((r,a)=>{r!==a&&W(d=>{const f=[...d],u=f.indexOf(r),v=f.indexOf(a);return u===-1||v===-1?d:(f.splice(u,1),f.splice(v,0,r),f)})},[]),D=o?w[o]:void 0,E=(D==null?void 0:D.questionIdx)??0,M=(D==null?void 0:D.sampleIdx)??0,G=i.useCallback(r=>{o&&k(a=>{const d=a[o]??{questionIdx:0,sampleIdx:0},f=typeof r=="function"?r(d.questionIdx):r;return{...a,[o]:{...d,questionIdx:f}}})},[o]),B=i.useCallback(r=>{o&&k(a=>{const d=a[o]??{questionIdx:0,sampleIdx:0},f=typeof r=="function"?r(d.sampleIdx):r;return{...a,[o]:{...d,sampleIdx:f}}})},[o]);i.useEffect(()=>{const r=new URLSearchParams,a=l.filter(d=>d.active);a.length>0&&(r.set("repos",a.map(d=>d.repo).join(",")),r.set("cols",a.map(d=>d.column).join(",")),r.set("pcols",a.map(d=>d.promptColumn||"formatted_prompt").join(","))),r.set("q",String(E)),r.set("s",String(M)),s!=="all"&&r.set("filter",s),o&&r.set("group",o),le({params:r})},[l,E,M,s,o]),i.useEffect(()=>{N.forEach(r=>{const a=`${r.id}:${E}`;b[a]||A.getQuestion(r.id,E).then(d=>{g(f=>({...f,[a]:d}))}).catch(()=>{})})},[E,N]);const O=i.useCallback(async(r,a,d,f,u,v)=>{S(C=>({...C,[r]:!0})),j(null);try{const{question_fingerprint:C,...Q}=await A.loadDataset(r,a,d,f),V=C??"",ee={...Q,questionFingerprint:V,active:!0,presetId:u,presetName:v};n(T=>T.some(q=>q.id===ee.id)?T:[...T,ee]),k(T=>{if(T[V])return T;const q=window,ne=typeof q.__initialQ=="number"?q.__initialQ:0,oe=typeof q.__initialS=="number"?q.__initialS:0,te=Object.keys(T).length===0;return{...T,[V]:{questionIdx:te?ne:0,sampleIdx:te?oe:0}}}),m(V)}catch(C){j(C instanceof Error?C.message:"Failed to load dataset")}finally{S(C=>({...C,[r]:!1}))}},[]);i.useEffect(()=>{var u,v,C;const r=t.current,a=((u=r.get("repos"))==null?void 0:u.split(",").filter(Boolean))||[],d=((v=r.get("cols"))==null?void 0:v.split(","))||[],f=((C=r.get("pcols"))==null?void 0:C.split(","))||[];for(let Q=0;Q<a.length;Q++)O(a[Q],d[Q]||void 0,void 0,f[Q]||void 0)},[O]);const R=i.useCallback(async r=>{await A.unloadDataset(r).catch(()=>{}),n(a=>a.filter(d=>d.id!==r))},[]),$=i.useCallback(r=>{n(a=>{const d=a.map(u=>u.id===r?{...u,active:!u.active}:u),f=d.find(u=>u.id===r);return f&&f.active&&m(f.questionFingerprint),d})},[]),_=i.useCallback((r,a)=>{n(d=>d.map(f=>f.id===r?{...f,presetName:a}:f))},[]),J=i.useCallback(r=>{n(a=>a.map(d=>d.id===r?{...d,presetId:void 0,presetName:void 0}:d))},[]),K=Math.min(...N.map(r=>r.n_rows),1/0),z=Math.max(...N.map(r=>r.n_samples),0);return{datasets:l,presets:c,setPresets:p,questionIdx:E,setQuestionIdx:G,sampleIdx:M,setSampleIdx:B,filter:s,setFilter:x,loading:h,error:I,setError:j,activeDatasets:N,orderedActiveDatasets:H,maxQuestions:K,maxSamples:z,addDataset:O,removeDataset:R,toggleDataset:$,updateDatasetPresetName:_,clearDatasetPreset:J,getQuestionData:r=>b[`${r}:${E}`],reorderPanels:Y,groups:y,groupIds:P,currentGroupId:o,setCurrentGroupId:m}}const se=[{bg:"bg-blue-500",border:"border-blue-500",text:"text-blue-400",label:"text-blue-300"},{bg:"bg-emerald-500",border:"border-emerald-500",text:"text-emerald-400",label:"text-emerald-300"},{bg:"bg-amber-500",border:"border-amber-500",text:"text-amber-400",label:"text-amber-300"},{bg:"bg-purple-500",border:"border-purple-500",text:"text-purple-400",label:"text-purple-300"},{bg:"bg-rose-500",border:"border-rose-500",text:"text-rose-400",label:"text-rose-300"},{bg:"bg-cyan-500",border:"border-cyan-500",text:"text-cyan-400",label:"text-cyan-300"}];function ce({datasets:t,presets:l,loading:n,groups:c,groupIds:p,currentGroupId:s,onAddDataset:x,onRemoveDataset:b,onToggleDataset:g,onSetCurrentGroup:h,onLoadPreset:S,onSavePreset:I,onDeletePreset:j,onUpdatePreset:w}){const[k,o]=i.useState(!1),[m,y]=i.useState(""),[P,N]=i.useState(""),[F,W]=i.useState("train"),[H,Y]=i.useState(""),[D,E]=i.useState(""),[M,G]=i.useState(null),[B,O]=i.useState(""),[R,$]=i.useState(null),[_,J]=i.useState(""),K=()=>{m.trim()&&(x(m.trim(),P.trim()||void 0,F.trim()||void 0,H.trim()||void 0),y(""),o(!1))},z=r=>{B.trim()&&(I(B.trim(),r.repo,r.column,r.split),O(""),G(null))},Z=r=>{const a=p.indexOf(r);return se[a%se.length]};return e.jsxs("div",{className:"w-72 min-w-72 bg-gray-900 border-r border-gray-700 flex flex-col h-full",children:[e.jsxs("div",{className:"p-3 border-b border-gray-700",children:[e.jsx("div",{className:"flex items-center justify-between mb-2",children:e.jsx("h3",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wider",children:"Presets"})}),l.length===0?e.jsx("p",{className:"text-xs text-gray-500 italic",children:"No presets saved"}):e.jsxs(e.Fragment,{children:[l.length>6&&e.jsx("input",{type:"text",value:D,onChange:r=>E(r.target.value),placeholder:"Search presets...",className:"w-full px-2 py-1 mb-2 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"}),e.jsx("div",{className:"flex flex-wrap gap-1 max-h-32 overflow-y-auto",children:l.filter(r=>!D||r.name.toLowerCase().includes(D.toLowerCase())||r.repo.toLowerCase().includes(D.toLowerCase())).map(r=>e.jsxs("div",{className:"group relative",children:[e.jsx("button",{onClick:()=>S(r),className:"px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 rounded border border-gray-600 text-gray-300 transition-colors",title:`${r.repo} (${r.column}, ${r.split??"train"})`,children:r.name}),e.jsx("div",{className:"hidden group-hover:flex absolute top-full left-0 mt-1 z-10 gap-1",children:e.jsx("button",{onClick:()=>j(r.id),className:"px-1.5 py-0.5 text-[10px] bg-red-900 hover:bg-red-800 rounded text-red-300",children:"Delete"})})]},r.id))})]})]}),e.jsxs("div",{className:"flex-1 overflow-y-auto p-3",children:[e.jsx("h3",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wider mb-2",children:"Loaded Repos"}),t.length===0?e.jsx("p",{className:"text-xs text-gray-500 italic",children:"No repos loaded. Add one below."}):e.jsx("div",{className:"space-y-3",children:p.map(r=>{const a=Z(r),d=c[r],f=r===s;return e.jsxs("div",{children:[e.jsxs("button",{onClick:()=>h(r),className:`w-full flex items-center gap-1.5 mb-1 px-1 py-0.5 rounded transition-colors ${f?"bg-gray-800":"hover:bg-gray-800/50"}`,children:[e.jsx("span",{className:`inline-block w-2 h-2 rounded-full ${a.bg} shrink-0`}),e.jsxs("span",{className:`text-[10px] font-semibold uppercase tracking-wider ${f?a.label:"text-gray-500"}`,children:["Group ",p.indexOf(r)+1,e.jsxs("span",{className:"normal-case font-normal ml-1 text-gray-600",children:["(",d.length," repo",d.length!==1?"s":"",")"]})]}),f&&e.jsx("span",{className:"text-[9px] text-gray-600 ml-auto",children:"viewing"})]}),e.jsx("div",{className:`space-y-1 border-l-2 ml-1 pl-2 ${f?a.border:"border-gray-700"}`,children:d.map(u=>e.jsxs("div",{children:[e.jsxs("div",{onClick:()=>{u.presetId&&($(R===u.id?null:u.id),J(u.presetName||""),o(!1))},className:`flex items-center gap-2 px-2 py-1.5 rounded text-sm transition-colors ${u.active?"bg-gray-800":"bg-gray-900 opacity-60"} ${R===u.id?"ring-1 ring-blue-500":""} ${u.presetId?"cursor-pointer":""}`,children:[e.jsx("input",{type:"checkbox",checked:u.active,onChange:()=>g(u.id),onClick:v=>v.stopPropagation(),className:"rounded border-gray-600 bg-gray-800 text-blue-500 focus:ring-blue-500 focus:ring-offset-0"}),e.jsxs("div",{className:"flex-1 min-w-0",children:[e.jsx("div",{className:"text-gray-200 truncate text-xs font-medium",title:u.presetName?`${u.presetName}
2
  ${u.repo}`:u.repo,children:u.presetName||u.name}),e.jsxs("div",{className:"text-[10px] text-gray-500",children:[u.column," | ",u.n_rows," rows | ",u.n_samples," samples"]})]}),e.jsx("button",{onClick:v=>{v.stopPropagation(),G(M===u.id?null:u.id),O("")},className:`transition-colors shrink-0 ${M===u.id?"text-blue-400":"text-gray-600 hover:text-blue-400"}`,title:"Save as preset",children:e.jsx("svg",{className:"w-3.5 h-3.5",fill:"none",viewBox:"0 0 24 24",stroke:"currentColor",children:e.jsx("path",{strokeLinecap:"round",strokeLinejoin:"round",strokeWidth:2,d:"M5 5a2 2 0 012-2h10a2 2 0 012 2v16l-7-3.5L5 21V5z"})})}),e.jsx("button",{onClick:v=>{v.stopPropagation(),b(u.id)},className:"text-gray-600 hover:text-red-400 transition-colors shrink-0",title:"Remove",children:e.jsx("svg",{className:"w-3.5 h-3.5",fill:"none",viewBox:"0 0 24 24",stroke:"currentColor",children:e.jsx("path",{strokeLinecap:"round",strokeLinejoin:"round",strokeWidth:2,d:"M6 18L18 6M6 6l12 12"})})})]}),M===u.id&&e.jsxs("div",{className:"flex gap-1 mt-1 ml-6",children:[e.jsx("input",{type:"text",value:B,onChange:v=>O(v.target.value),onKeyDown:v=>{v.key==="Enter"&&z(u),v.key==="Escape"&&G(null)},placeholder:"Preset name...",className:"flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none",autoFocus:!0}),e.jsx("button",{onClick:()=>z(u),className:"px-2 py-1 text-xs bg-blue-600 hover:bg-blue-500 rounded text-white",children:"Save"})]})]},u.id))})]},r)})})]}),R&&(()=>{const r=t.find(a=>a.id===R);return r!=null&&r.presetId?e.jsxs("div",{className:"p-3 border-t border-gray-700 space-y-2",children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase font-semibold tracking-wider",children:"Edit Preset"}),e.jsx("input",{type:"text",value:_,onChange:a=>J(a.target.value),onKeyDown:a=>{a.key==="Enter"&&_.trim()&&(w(r.presetId,r.id,{name:_.trim()}),$(null)),a.key==="Escape"&&$(null)},placeholder:"Preset name...",className:"w-full px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none",autoFocus:!0}),e.jsxs("div",{className:"flex gap-2",children:[e.jsx("button",{onClick:()=>{_.trim()&&(w(r.presetId,r.id,{name:_.trim()}),$(null))},disabled:!_.trim(),className:"flex-1 px-2 py-1 text-xs bg-blue-600 hover:bg-blue-500 disabled:bg-gray-700 disabled:text-gray-500 rounded text-white transition-colors",children:"Save"}),e.jsx("button",{onClick:()=>{j(r.presetId,r.id),$(null)},className:"px-2 py-1 text-xs bg-red-900 hover:bg-red-800 rounded text-red-300 transition-colors",children:"Delete"}),e.jsx("button",{onClick:()=>$(null),className:"px-2 py-1 text-xs bg-gray-700 hover:bg-gray-600 rounded text-gray-300 transition-colors",children:"Cancel"})]})]}):null})(),e.jsx("div",{className:"p-3 border-t border-gray-700",children:k?e.jsxs("div",{className:"space-y-2",children:[e.jsx("input",{type:"text",value:m,onChange:r=>y(r.target.value),onKeyDown:r=>r.key==="Enter"&&K(),placeholder:"org/dataset-name",className:"w-full px-2 py-1.5 text-sm bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none",autoFocus:!0}),e.jsxs("div",{className:"flex gap-2",children:[e.jsx("input",{type:"text",value:P,onChange:r=>N(r.target.value),placeholder:"Response col (auto-detect)",className:"flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"}),e.jsx("input",{type:"text",value:F,onChange:r=>W(r.target.value),placeholder:"Split",className:"w-16 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"})]}),e.jsx("div",{className:"flex gap-2",children:e.jsx("input",{type:"text",value:H,onChange:r=>Y(r.target.value),placeholder:"Prompt col (auto-detect)",className:"flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"})}),e.jsxs("div",{className:"flex gap-2",children:[e.jsx("button",{onClick:K,disabled:!m.trim()||n[m.trim()],className:"flex-1 px-2 py-1.5 text-sm bg-blue-600 hover:bg-blue-500 disabled:bg-gray-700 disabled:text-gray-500 rounded text-white transition-colors",children:n[m.trim()]?"Loading...":"Load"}),e.jsx("button",{onClick:()=>o(!1),className:"px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded text-gray-300 transition-colors",children:"Cancel"})]})]}):e.jsx("button",{onClick:()=>{$(null),o(!0),y(""),N(""),W("train"),Y("")},className:"w-full px-3 py-2 text-sm bg-blue-600 hover:bg-blue-500 rounded text-white font-medium transition-colors",children:"+ Add Repo"})})]})}function de(t){if(!t)return[{text:"(no response)",className:"text-gray-500 italic"}];const l=[],n=t.split(`
3
  `);for(let c=0;c<n.length;c++){const p=n[c],s=p.toLowerCase().trim();let x="text-gray-300";s.startsWith("wait")||s.startsWith("hmm")||s.startsWith("but wait")?x="text-yellow-400":s.startsWith("let me try")||s.startsWith("let me reconsider")||s.startsWith("let me think")?x="text-cyan-400":s.startsWith("so the answer")||s.startsWith("so the expression")||s.startsWith("therefore")||s.startsWith("the final")?x="text-green-400 font-bold":s.startsWith("i give up")||s.startsWith("i can't find")||s.startsWith("i'm stuck")||s.startsWith("i'm sorry")?x="text-red-400 font-bold":p.includes("=")&&/[+\-*/]/.test(p)&&(x="text-gray-100"),l.push({text:p,className:x}),c<n.length-1&&l.push({text:`
4
  `,className:""})}return l}function ue(t){if(!t||!t.trim())return[];try{const l=JSON.parse(t);if(Array.isArray(l)&&l.length>0&&l[0].role!==void 0)return l.map(n=>({role:String(n.role||"unknown"),content:String(n.content??"")}))}catch{}if(t.includes("<|im_start|>"))return t.split("<|im_start|>").filter(Boolean).map(n=>{const c=n.indexOf(`
frontend/dist/assets/{PlanRevisionsApp-Dm4VU5KS.js → PlanRevisionsApp-Dr-ACMnp.js} RENAMED
@@ -1,4 +1,4 @@
1
- import{r as i,j as t}from"./index-BFtcA7GQ.js";function S(r,p){const x=r.split(`
2
  `),n=p.split(`
3
  `),d=new Set(x),m=new Set(n),s=[];let o=0,a=0;for(;o<x.length||a<n.length;){const u=x[o],l=n[a];o>=x.length?(s.push({text:n[a],type:"add"}),a++):a>=n.length?(s.push({text:x[o],type:"del"}),o++):u===l?(s.push({text:u,type:"same"}),o++,a++):m.has(u)?d.has(l)?(s.push({text:u,type:"del"}),s.push({text:l,type:"add"}),o++,a++):(s.push({text:l,type:"add"}),a++):(s.push({text:u,type:"del"}),o++)}return s}function _({prev:r,curr:p}){const x=i.useMemo(()=>S(r,p),[r,p]);return t.jsx("pre",{className:"text-xs font-mono whitespace-pre-wrap leading-relaxed",children:x.map((n,d)=>n.type==="same"?t.jsx("span",{className:"text-gray-300",children:n.text+`
4
  `},d):n.type==="add"?t.jsx("span",{className:"bg-green-900/40 text-green-300",children:"+ "+n.text+`
 
1
+ import{r as i,j as t}from"./index-BcY8Ufng.js";function S(r,p){const x=r.split(`
2
  `),n=p.split(`
3
  `),d=new Set(x),m=new Set(n),s=[];let o=0,a=0;for(;o<x.length||a<n.length;){const u=x[o],l=n[a];o>=x.length?(s.push({text:n[a],type:"add"}),a++):a>=n.length?(s.push({text:x[o],type:"del"}),o++):u===l?(s.push({text:u,type:"same"}),o++,a++):m.has(u)?d.has(l)?(s.push({text:u,type:"del"}),s.push({text:l,type:"add"}),o++,a++):(s.push({text:l,type:"add"}),a++):(s.push({text:u,type:"del"}),o++)}return s}function _({prev:r,curr:p}){const x=i.useMemo(()=>S(r,p),[r,p]);return t.jsx("pre",{className:"text-xs font-mono whitespace-pre-wrap leading-relaxed",children:x.map((n,d)=>n.type==="same"?t.jsx("span",{className:"text-gray-300",children:n.text+`
4
  `},d):n.type==="add"?t.jsx("span",{className:"bg-green-900/40 text-green-300",children:"+ "+n.text+`
frontend/dist/assets/ScoutRunsApp-BqSa3vbC.js DELETED
@@ -1,11 +0,0 @@
1
- import{r as n,j as e}from"./index-BFtcA7GQ.js";const _={reasoning:{border:"border-purple-700",labelColor:"text-purple-400",bg:"bg-purple-950/30"},tool_call:{border:"border-blue-700",labelColor:"text-blue-400",bg:"bg-blue-950/30"},tool_result:{border:"border-gray-600",labelColor:"text-gray-400",bg:"bg-gray-800/30"},final_answer:{border:"border-green-700",labelColor:"text-green-400",bg:"bg-green-950/30"},unknown:{border:"border-gray-700",labelColor:"text-gray-500",bg:""}};function h(a){if(!a)return[];const l=[],g=a.split(`
2
-
3
- ---
4
-
5
- `);for(const o of g){const s=o.trim();if(s.startsWith(`[Reasoning]
6
- `))l.push({type:"reasoning",label:"Reasoning",content:s.slice(12).trim()});else if(s.startsWith("[Tool Call:")){const b=`
7
-
8
- [Tool Result]
9
- `,i=s.indexOf(b),d=s.indexOf(`]
10
- `),c=d>=0?s.slice(11,d).trim():"unknown";if(i>=0){const x=s.slice(0,i).replace(/^\[Tool Call:[^\]]*\]\n/,"").trim(),p=s.slice(i+b.length).trim();l.push({type:"tool_call",label:`Tool Call: ${c}`,content:x}),l.push({type:"tool_result",label:"Tool Result",content:p})}else{const x=s.replace(/^\[Tool Call:[^\]]*\]\n/,"").trim();l.push({type:"tool_call",label:`Tool Call: ${c}`,content:x})}}else s.startsWith(`[Final Answer]
11
- `)?l.push({type:"final_answer",label:"Final Answer",content:s.slice(15).trim()}):s&&l.push({type:"unknown",label:"—",content:s})}return l}function f({blocks:a}){return a.length===0?e.jsx("div",{className:"text-gray-500 text-xs italic",children:"No steps."}):e.jsx("div",{className:"space-y-2",children:a.map((l,g)=>{const o=_[l.type];return e.jsxs("div",{className:`border-l-2 ${o.border} ${o.bg} pl-3 py-1.5 rounded-r`,children:[e.jsx("div",{className:`text-[10px] font-bold uppercase tracking-widest mb-1 ${o.labelColor}`,children:l.label??l.type}),e.jsx("pre",{className:"text-xs text-gray-300 whitespace-pre-wrap font-mono leading-relaxed",children:l.content})]},g)})})}function k(){const[a,l]=n.useState([]),[g,o]=n.useState(!0),[s,b]=n.useState(null),[i,d]=n.useState(0),[c,x]=n.useState(""),[p,j]=n.useState("test300-gpt-oss-120b"),[y,v]=n.useState({});n.useEffect(()=>{o(!0),l([]),d(0),fetch(`/api/scout-runs/?variant=${encodeURIComponent(p)}`).then(t=>{if(!t.ok)throw new Error(t.statusText);return t.json()}).then(t=>{l(t.rows),t.variants&&v(t.variants),o(!1)}).catch(t=>{b(t.message),o(!1)})},[p]);const m=n.useMemo(()=>{if(!c.trim())return a;const t=c.toLowerCase();return a.filter(u=>u.query_id.includes(t))},[a,c]),r=m[i]??null,N=n.useMemo(()=>r?h(r.scout_trajectory):[],[r]),w=n.useMemo(()=>r?h(r.new_trajectory):[],[r]);return g?e.jsx("div",{className:"h-full flex items-center justify-center text-gray-400",children:"Loading from HuggingFace…"}):s?e.jsxs("div",{className:"h-full flex items-center justify-center text-red-400",children:["Error: ",s]}):e.jsxs("div",{className:"h-full flex overflow-hidden bg-gray-950 text-gray-100",children:[e.jsxs("div",{className:"w-56 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900",children:[Object.keys(y).length>1&&e.jsxs("div",{className:"px-2 py-2 border-b border-gray-800",children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase tracking-widest mb-1",children:"Variant"}),e.jsx("select",{value:p,onChange:t=>{j(t.target.value),x("")},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5",children:Object.entries(y).map(([t,u])=>e.jsx("option",{value:t,children:u.label},t))})]}),e.jsxs("div",{className:"px-3 py-2 border-b border-gray-800 bg-gray-900/80",children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase tracking-widest mb-1",children:"Scout incomplete"}),e.jsxs("div",{className:"text-lg font-bold text-amber-400",children:[a.filter(t=>t.scout_status==="incomplete").length,e.jsxs("span",{className:"text-xs text-gray-500 font-normal ml-1",children:["/ ",a.length]})]})]}),e.jsxs("div",{className:"px-2 py-1.5 border-b border-gray-800",children:[e.jsx("input",{type:"text",placeholder:"Search query ID…",value:c,onChange:t=>{x(t.target.value),d(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"}),e.jsxs("div",{className:"text-[10px] text-gray-600 mt-1",children:[m.length," / ",a.length]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto",children:m.map((t,u)=>e.jsxs("button",{onClick:()=>d(u),className:`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${i===u?"bg-cyan-900/40 text-cyan-200 border-l-2 border-l-cyan-500":"text-gray-400 hover:bg-gray-800"}`,children:[e.jsxs("div",{className:"flex items-center justify-between",children:[e.jsxs("span",{className:"font-medium text-gray-200",children:["#",t.query_id]}),e.jsx("span",{className:`text-[9px] px-1.5 py-0.5 rounded-full border ${t.scout_status==="incomplete"?"bg-amber-900/60 text-amber-400 border-amber-800":"bg-gray-800 text-gray-400 border-gray-700"}`,children:t.scout_status})]}),e.jsxs("div",{className:"text-[10px] text-gray-600 mt-0.5",children:["scout ",t.scout_total_calls," calls · new ",t.new_total_calls," calls"]})]},t.query_id))})]}),r?e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsx("div",{className:"px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0",children:e.jsxs("div",{className:"flex items-center gap-3 flex-wrap",children:[e.jsxs("span",{className:"text-sm font-medium text-gray-100",children:["Query #",r.query_id]}),e.jsxs("span",{className:`text-xs px-2 py-0.5 rounded-full border ${r.scout_status==="incomplete"?"bg-amber-900/50 text-amber-300 border-amber-800":"bg-gray-800 text-gray-400 border-gray-700"}`,children:["scout: ",r.scout_status]}),e.jsxs("span",{className:"text-xs text-gray-500",children:["scout ",r.scout_total_calls," calls · new ",r.new_total_calls," calls"]})]})}),e.jsxs("div",{className:"flex-1 flex overflow-hidden min-w-0",children:[e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 border-r border-gray-800 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/40 border-b border-gray-800 shrink-0",children:[e.jsx("span",{className:"text-[11px] font-semibold text-amber-400 uppercase tracking-widest",children:"Scout Run"}),e.jsxs("span",{className:"text-[10px] text-gray-600 ml-2",children:[r.scout_total_calls," tool calls · ",r.scout_status]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:e.jsx(f,{blocks:N})})]}),e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/40 border-b border-gray-800 shrink-0",children:[e.jsx("span",{className:"text-[11px] font-semibold text-cyan-400 uppercase tracking-widest",children:"New Trajectory"}),e.jsxs("span",{className:"text-[10px] text-gray-600 ml-2",children:[r.new_total_calls," tool calls · ",r.new_status]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:e.jsx(f,{blocks:w})})]})]})]}):e.jsx("div",{className:"flex-1 flex items-center justify-center text-gray-500",children:"No query selected."})]})}export{k as default};
 
 
 
 
 
 
 
 
 
 
 
 
frontend/dist/assets/ScoutRunsApp-UEZw1rLk.js ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import{r as n,j as e}from"./index-BcY8Ufng.js";const S={reasoning:{border:"border-purple-700",labelColor:"text-purple-400",bg:"bg-purple-950/30"},tool_call:{border:"border-blue-700",labelColor:"text-blue-400",bg:"bg-blue-950/30"},tool_result:{border:"border-gray-600",labelColor:"text-gray-400",bg:"bg-gray-800/30"},final_answer:{border:"border-green-700",labelColor:"text-green-400",bg:"bg-green-950/30"},unknown:{border:"border-gray-700",labelColor:"text-gray-500",bg:""}};function N(a){if(!a)return[];const l=[],g=a.split(`
2
+
3
+ ---
4
+
5
+ `);for(const c of g){const s=c.trim();if(s.startsWith(`[Reasoning]
6
+ `))l.push({type:"reasoning",label:"Reasoning",content:s.slice(12).trim()});else if(s.startsWith("[Tool Call:")){const b=`
7
+
8
+ [Tool Result]
9
+ `,d=s.indexOf(b),i=s.indexOf(`]
10
+ `),o=i>=0?s.slice(11,i).trim():"unknown";if(d>=0){const x=s.slice(0,d).replace(/^\[Tool Call:[^\]]*\]\n/,"").trim(),p=s.slice(d+b.length).trim();l.push({type:"tool_call",label:`Tool Call: ${o}`,content:x}),l.push({type:"tool_result",label:"Tool Result",content:p})}else{const x=s.replace(/^\[Tool Call:[^\]]*\]\n/,"").trim();l.push({type:"tool_call",label:`Tool Call: ${o}`,content:x})}}else s.startsWith(`[Final Answer]
11
+ `)?l.push({type:"final_answer",label:"Final Answer",content:s.slice(15).trim()}):s&&l.push({type:"unknown",label:"—",content:s})}return l}function v({blocks:a}){return a.length===0?e.jsx("div",{className:"text-gray-500 text-xs italic",children:"No steps."}):e.jsx("div",{className:"space-y-2",children:a.map((l,g)=>{const c=S[l.type];return e.jsxs("div",{className:`border-l-2 ${c.border} ${c.bg} pl-3 py-1.5 rounded-r`,children:[e.jsx("div",{className:`text-[10px] font-bold uppercase tracking-widest mb-1 ${c.labelColor}`,children:l.label??l.type}),e.jsx("pre",{className:"text-xs text-gray-300 whitespace-pre-wrap font-mono leading-relaxed",children:l.content})]},g)})})}function R(){const[a,l]=n.useState([]),[g,c]=n.useState(!0),[s,b]=n.useState(null),[d,i]=n.useState(0),[o,x]=n.useState(""),[p,w]=n.useState("test300-gpt-oss-120b"),[h,_]=n.useState({});n.useEffect(()=>{c(!0),l([]),i(0),fetch(`/api/scout-runs/?variant=${encodeURIComponent(p)}`).then(t=>{if(!t.ok)throw new Error(t.statusText);return t.json()}).then(t=>{l(t.rows),t.variants&&_(t.variants),c(!1)}).catch(t=>{b(t.message),c(!1)})},[p]);const y=n.useMemo(()=>{if(!o.trim())return a;const t=o.toLowerCase();return a.filter(u=>u.query_id.includes(t))},[a,o]),r=y[d]??null,k=n.useMemo(()=>r?N(r.scout_trajectory):[],[r]),C=n.useMemo(()=>r?N(r.new_trajectory):[],[r]),m=a.filter(t=>t.correct!==null&&t.correct!==void 0),f=m.filter(t=>t.correct===!0).length,j=m.length?Math.round(100*f/m.length):null;return g?e.jsx("div",{className:"h-full flex items-center justify-center text-gray-400",children:"Loading from HuggingFace…"}):s?e.jsxs("div",{className:"h-full flex items-center justify-center text-red-400",children:["Error: ",s]}):e.jsxs("div",{className:"h-full flex overflow-hidden bg-gray-950 text-gray-100",children:[e.jsxs("div",{className:"w-56 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900",children:[Object.keys(h).length>1&&e.jsxs("div",{className:"px-2 py-2 border-b border-gray-800",children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase tracking-widest mb-1",children:"Variant"}),e.jsx("select",{value:p,onChange:t=>{w(t.target.value),x("")},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5",children:Object.entries(h).map(([t,u])=>e.jsx("option",{value:t,children:u.label},t))})]}),e.jsxs("div",{className:"px-3 py-2 border-b border-gray-800 bg-gray-900/80 space-y-1.5",children:[j!==null&&e.jsxs("div",{children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase tracking-widest mb-0.5",children:"Accuracy"}),e.jsxs("div",{className:"text-lg font-bold text-sky-400",children:[j,"%"]}),e.jsxs("div",{className:"text-[10px] text-gray-600",children:[f," / ",m.length," correct"]})]}),e.jsxs("div",{children:[e.jsx("div",{className:"text-[10px] text-gray-500 uppercase tracking-widest mb-0.5",children:"Scout incomplete"}),e.jsxs("div",{className:"text-base font-bold text-amber-400",children:[a.filter(t=>t.scout_status==="incomplete").length,e.jsxs("span",{className:"text-xs text-gray-500 font-normal ml-1",children:["/ ",a.length]})]})]})]}),e.jsxs("div",{className:"px-2 py-1.5 border-b border-gray-800",children:[e.jsx("input",{type:"text",placeholder:"Search query ID…",value:o,onChange:t=>{x(t.target.value),i(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"}),e.jsxs("div",{className:"text-[10px] text-gray-600 mt-1",children:[y.length," / ",a.length]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto",children:y.map((t,u)=>e.jsxs("button",{onClick:()=>i(u),className:`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${d===u?"bg-cyan-900/40 text-cyan-200 border-l-2 border-l-cyan-500":"text-gray-400 hover:bg-gray-800"}`,children:[e.jsxs("div",{className:"flex items-center justify-between",children:[e.jsxs("span",{className:"font-medium text-gray-200",children:["#",t.query_id]}),e.jsxs("div",{className:"flex items-center gap-1",children:[t.correct===!0&&e.jsx("span",{className:"text-[9px] px-1.5 py-0.5 rounded-full bg-green-900/60 text-green-400 border border-green-800",children:"✓"}),t.correct===!1&&e.jsx("span",{className:"text-[9px] px-1.5 py-0.5 rounded-full bg-red-900/60 text-red-400 border border-red-800",children:"✗"}),e.jsx("span",{className:`text-[9px] px-1.5 py-0.5 rounded-full border ${t.scout_status==="incomplete"?"bg-amber-900/60 text-amber-400 border-amber-800":"bg-gray-800 text-gray-400 border-gray-700"}`,children:t.scout_status})]})]}),e.jsxs("div",{className:"text-[10px] text-gray-600 mt-0.5",children:["scout ",t.scout_total_calls," · new ",t.new_total_calls," calls"]})]},t.query_id))})]}),r?e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0",children:[e.jsxs("div",{className:"flex items-center gap-3 flex-wrap",children:[e.jsxs("span",{className:"text-sm font-medium text-gray-100",children:["Query #",r.query_id]}),r.correct===!0&&e.jsx("span",{className:"text-xs px-2 py-0.5 rounded-full bg-green-900/50 text-green-300 border border-green-800 font-semibold",children:"✓ Correct"}),r.correct===!1&&e.jsx("span",{className:"text-xs px-2 py-0.5 rounded-full bg-red-900/50 text-red-300 border border-red-800 font-semibold",children:"✗ Incorrect"}),e.jsxs("span",{className:`text-xs px-2 py-0.5 rounded-full border ${r.scout_status==="incomplete"?"bg-amber-900/50 text-amber-300 border-amber-800":"bg-gray-800 text-gray-400 border-gray-700"}`,children:["scout: ",r.scout_status]}),e.jsxs("span",{className:"text-xs text-gray-500",children:["scout ",r.scout_total_calls," calls · new ",r.new_total_calls," calls"]})]}),r.question&&e.jsxs("div",{className:"mt-1.5 text-xs text-gray-200 leading-snug bg-gray-800/40 rounded px-2 py-1.5 border border-gray-700",children:[e.jsx("span",{className:"text-[10px] font-bold uppercase tracking-widest text-violet-400 mr-2",children:"Question"}),r.question]}),r.correct_answer&&e.jsxs("div",{className:"mt-1 text-xs leading-snug bg-gray-800/40 rounded px-2 py-1.5 border border-gray-700 flex items-start gap-2",children:[e.jsx("span",{className:"text-[10px] font-bold uppercase tracking-widest text-green-400 shrink-0 mt-0.5",children:"Answer"}),e.jsx("span",{className:r.correct===!0?"text-green-300":r.correct===!1?"text-red-300":"text-gray-300",children:r.correct_answer})]})]}),e.jsxs("div",{className:"flex-1 flex overflow-hidden min-w-0",children:[e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 border-r border-gray-800 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/40 border-b border-gray-800 shrink-0",children:[e.jsx("span",{className:"text-[11px] font-semibold text-amber-400 uppercase tracking-widest",children:"Scout Run"}),e.jsxs("span",{className:"text-[10px] text-gray-600 ml-2",children:[r.scout_total_calls," tool calls · ",r.scout_status]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:e.jsx(v,{blocks:k})})]}),e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/40 border-b border-gray-800 shrink-0",children:[e.jsx("span",{className:"text-[11px] font-semibold text-cyan-400 uppercase tracking-widest",children:"New Trajectory"}),e.jsxs("span",{className:"text-[10px] text-gray-600 ml-2",children:[r.new_total_calls," tool calls · ",r.new_status]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:e.jsx(v,{blocks:C})})]})]})]}):e.jsx("div",{className:"flex-1 flex items-center justify-center text-gray-500",children:"No query selected."})]})}export{R as default};
frontend/dist/assets/{SelectedToolsApp-BXaKmpYg.js → SelectedToolsApp-i9MHUxW6.js} RENAMED
@@ -1,4 +1,4 @@
1
- import{r as n,j as e}from"./index-BFtcA7GQ.js";const E={reasoning:{border:"border-purple-700",labelColor:"text-purple-400",bg:"bg-purple-950/30"},tool_call:{border:"border-blue-700",labelColor:"text-blue-400",bg:"bg-blue-950/30"},tool_result:{border:"border-gray-600",labelColor:"text-gray-400",bg:"bg-gray-800/30"},final_answer:{border:"border-green-700",labelColor:"text-green-400",bg:"bg-green-950/30"},unknown:{border:"border-gray-700",labelColor:"text-gray-500",bg:""}};function $(a){if(!a)return[];const l=[],d=a.split(/\n\n(?=\[)/);for(const o of d){const r=o.trim();r.startsWith("[Reasoning]:")?l.push({type:"reasoning",label:"Reasoning",content:r.slice(12).trim()}):r.startsWith("[Tool call]")?l.push({type:"tool_call",label:"Tool Call",content:r.slice(11).trim()}):r.startsWith("[Tool result]:")?l.push({type:"tool_result",label:"Tool Result",content:r.slice(14).trim()}):r.startsWith("[Final answer]:")?l.push({type:"final_answer",label:"Final Answer",content:r.slice(15).trim()}):r&&l.push({type:"unknown",label:"—",content:r})}return l}function F(a){if(!a)return[];const l=[],d=a.split(`
2
 
3
  ---
4
 
 
1
+ import{r as n,j as e}from"./index-BcY8Ufng.js";const E={reasoning:{border:"border-purple-700",labelColor:"text-purple-400",bg:"bg-purple-950/30"},tool_call:{border:"border-blue-700",labelColor:"text-blue-400",bg:"bg-blue-950/30"},tool_result:{border:"border-gray-600",labelColor:"text-gray-400",bg:"bg-gray-800/30"},final_answer:{border:"border-green-700",labelColor:"text-green-400",bg:"bg-green-950/30"},unknown:{border:"border-gray-700",labelColor:"text-gray-500",bg:""}};function $(a){if(!a)return[];const l=[],d=a.split(/\n\n(?=\[)/);for(const o of d){const r=o.trim();r.startsWith("[Reasoning]:")?l.push({type:"reasoning",label:"Reasoning",content:r.slice(12).trim()}):r.startsWith("[Tool call]")?l.push({type:"tool_call",label:"Tool Call",content:r.slice(11).trim()}):r.startsWith("[Tool result]:")?l.push({type:"tool_result",label:"Tool Result",content:r.slice(14).trim()}):r.startsWith("[Final answer]:")?l.push({type:"final_answer",label:"Final Answer",content:r.slice(15).trim()}):r&&l.push({type:"unknown",label:"—",content:r})}return l}function F(a){if(!a)return[];const l=[],d=a.split(`
2
 
3
  ---
4
 
frontend/dist/assets/{SftDiffApp-RlJJgAZ7.js → SftDiffApp-l1Fcp8rf.js} RENAMED
@@ -1 +1 @@
1
- import{r as d,j as e}from"./index-BFtcA7GQ.js";const N={user:{bg:"bg-amber-950/40",border:"border-amber-600",label:"text-amber-400",tag:"USER MESSAGE"},reasoning:{bg:"bg-purple-950/40",border:"border-purple-600",label:"text-purple-400",tag:"REASONING"},tool_call:{bg:"bg-blue-950/40",border:"border-blue-600",label:"text-blue-400",tag:"TOOL CALL"},tool_resp:{bg:"bg-gray-800/60",border:"border-gray-600",label:"text-gray-400",tag:"TOOL RESPONSE"}};function g({kind:r,label:t,children:l}){const s=N[r];return e.jsxs("div",{className:`border-l-2 ${s.border} ${s.bg} pl-3 py-2 rounded-r mb-2`,children:[e.jsx("div",{className:`text-[9px] font-bold uppercase tracking-widest mb-1.5 ${s.label}`,children:t??s.tag}),l]})}function A(r){return r.split(/\n\n/).map(t=>t.trim()).filter(Boolean).flatMap(t=>{try{const l=JSON.parse(t);return typeof l=="object"&&l!==null&&"type"in l?[l]:[]}catch{return[]}})}function T({excerpt:r,userContent:t}){const l=d.useMemo(()=>A(r),[r]);return e.jsxs("div",{className:"space-y-0",children:[e.jsx(g,{kind:"user",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:t})}),l.map((s,a)=>{const n=JSON.stringify(s,null,2);return s.type==="reasoning"?e.jsx(g,{kind:"reasoning",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a):s.type==="function_call"?e.jsx(g,{kind:"tool_call",label:`TOOL CALL${s.name?`: ${s.name}`:""}`,children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a):s.type==="function_call_output"?e.jsx(g,{kind:"tool_resp",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a):e.jsx("div",{className:"border-l-2 border-gray-700 pl-3 py-2 mb-2",children:e.jsx("pre",{className:"text-xs text-gray-400 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a)})]})}function I(r){const t=[],l=/(<tool_call>[\s\S]*?<\/tool_call>)/g;let s=0,a;for(;(a=l.exec(r))!==null;){const i=r.slice(s,a.index).trim();i&&t.push({kind:"reasoning",text:i}),t.push({kind:"tool_call",text:a[1]}),s=a.index+a[1].length}const n=r.slice(s).trim();return n&&t.push({kind:"reasoning",text:n}),t}function M(r){const t=[],l=/(<think>[\s\S]*?<\/think>|<tool_call>[\s\S]*?<\/tool_call>)/g;let s=0,a;for(;(a=l.exec(r))!==null;){const i=r.slice(s,a.index).trim();i&&t.push({kind:"reasoning",text:i}),a[1].startsWith("<think>")?t.push({kind:"reasoning",text:a[1]}):t.push({kind:"tool_call",text:a[1]}),s=a.index+a[1].length}const n=r.slice(s).trim();return n&&t.push({kind:"reasoning",text:n}),t}function R({content:r,template:t}){const l=t==="qwen"?M(r):I(r),s=l.some(n=>n.kind==="tool_call"),a=N.reasoning;return e.jsxs("div",{className:`border-l-2 ${a.border} ${a.bg} pl-3 py-2 rounded-r mb-2`,children:[e.jsx("div",{className:`text-[9px] font-bold uppercase tracking-widest mb-1.5 ${a.label}`,children:s?"REASONING + TOOL CALL":"REASONING"}),l.map((n,i)=>{if(n.kind==="tool_call"){const p=N.tool_call;return e.jsxs("div",{className:`border-l-2 ${p.border} ${p.bg} pl-2 py-1.5 rounded-r mb-1.5`,children:[e.jsx("div",{className:`text-[9px] font-bold uppercase tracking-widest mb-1 ${p.label}`,children:"TOOL CALL"}),e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n.text})]},i)}return e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed mb-1.5",children:n.text},i)})]})}function G({messages:r,template:t}){return e.jsx("div",{className:"space-y-0",children:r.map((l,s)=>l.role==="user"&&s===0?e.jsx(g,{kind:"user",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:l.content})},s):l.role==="user"?e.jsx(g,{kind:"tool_resp",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:l.content})},s):e.jsx(R,{content:l.content,template:t},s))})}function D(){return e.jsxs("div",{className:"flex items-center gap-4 px-4 py-1.5 bg-gray-900/80 border-b border-gray-800 shrink-0 flex-wrap",children:[e.jsx("span",{className:"text-[9px] font-bold uppercase tracking-widest text-gray-500 mr-1",children:"Legend"}),Object.entries(N).map(([r,t])=>e.jsxs("span",{className:`flex items-center gap-1.5 text-[10px] ${t.label}`,children:[e.jsx("span",{className:`inline-block w-2.5 h-2.5 rounded-sm border ${t.border} ${t.bg}`}),t.tag]},r))]})}function B({value:r,onChange:t}){return e.jsxs("select",{value:r,onChange:l=>t(l.target.value),className:"text-xs bg-gray-800 border border-gray-600 text-gray-200 rounded px-2 py-1 ml-2 cursor-pointer",children:[e.jsx("option",{value:"gpt-oss",children:"gpt-oss"}),e.jsx("option",{value:"qwen",children:"qwen"})]})}function P(){var v,k,_,S;const[r,t]=d.useState([]),[l,s]=d.useState(!0),[a,n]=d.useState(null),[i,p]=d.useState(0),[f,E]=d.useState(""),[b,q]=d.useState("gpt-oss");d.useEffect(()=>{s(!0),fetch("/api/sft-diff/").then(o=>{if(!o.ok)throw new Error(o.statusText);return o.json()}).then(o=>{t(o.rows),s(!1)}).catch(o=>{n(o.message),s(!1)})},[]);const w=d.useMemo(()=>{if(!f.trim())return r;const o=f.toLowerCase();return r.filter(x=>{var m,u,j,h;return x.query_id.toLowerCase().includes(o)||(((u=(m=x.messages_gpt)==null?void 0:m[0])==null?void 0:u.content)??((h=(j=x.messages_qwen)==null?void 0:j[0])==null?void 0:h.content)??"").toLowerCase().includes(o)})},[r,f]),c=w[i]??null,y=c?b==="qwen"?c.messages_qwen:c.messages_gpt:null,$=((k=(v=c==null?void 0:c.messages_gpt)==null?void 0:v[0])==null?void 0:k.content)??((S=(_=c==null?void 0:c.messages_qwen)==null?void 0:_[0])==null?void 0:S.content)??"";return l?e.jsx("div",{className:"h-full flex items-center justify-center text-gray-400",children:"Loading…"}):a?e.jsxs("div",{className:"h-full flex items-center justify-center text-red-400",children:["Error: ",a]}):e.jsxs("div",{className:"h-full flex overflow-hidden bg-gray-950 text-gray-100",children:[e.jsxs("div",{className:"w-60 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900",children:[e.jsxs("div",{className:"px-2 py-1.5 border-b border-gray-800",children:[e.jsx("input",{type:"text",placeholder:"Search query ID or question…",value:f,onChange:o=>{E(o.target.value),p(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"}),e.jsxs("div",{className:"text-[10px] text-gray-600 mt-1",children:[w.length," / ",r.length]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto",children:w.map((o,x)=>{var h,L,O,C;const m=((L=(h=o.messages_gpt)==null?void 0:h[0])==null?void 0:L.content)??((C=(O=o.messages_qwen)==null?void 0:O[0])==null?void 0:C.content)??"",u=m.match(/Question:\s*([\s\S]{0,120})/),j=u?u[1].trim().replace(/\n/g," "):m.slice(0,80);return e.jsxs("button",{onClick:()=>p(x),className:`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${i===x?"bg-blue-900/40 text-blue-200 border-l-2 border-l-blue-500":"text-gray-400 hover:bg-gray-800"}`,children:[e.jsxs("div",{className:"font-medium text-gray-200 mb-0.5",children:["#",o.query_id]}),e.jsx("div",{className:"text-[10px] text-gray-500 leading-snug line-clamp-2",children:j})]},o.query_id+x)})})]}),c?e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsx(D,{}),e.jsxs("div",{className:"flex-1 flex overflow-hidden min-w-0",children:[e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 border-r border-gray-800 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/60 border-b border-gray-800 shrink-0",children:[e.jsx("span",{className:"text-[11px] font-semibold text-amber-400 uppercase tracking-widest",children:"Original"}),e.jsx("span",{className:"text-[10px] text-gray-500 ml-2",children:"excerpt field — raw JSON items"})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:e.jsx(T,{excerpt:c.excerpt,userContent:$})})]}),e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/60 border-b border-gray-800 shrink-0 flex items-center",children:[e.jsx("span",{className:"text-[11px] font-semibold text-sky-400 uppercase tracking-widest",children:"Converted"}),e.jsx(B,{value:b,onChange:q}),y&&e.jsxs("span",{className:"text-[10px] text-gray-500 ml-2",children:[y.length," messages"]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:y?e.jsx(G,{messages:y,template:b}):e.jsxs("div",{className:"text-gray-500 text-xs italic mt-4",children:["Not available for ",b," template."]})})]})]})]}):e.jsx("div",{className:"flex-1 flex items-center justify-center text-gray-500",children:"No record selected."})]})}export{P as default};
 
1
+ import{r as d,j as e}from"./index-BcY8Ufng.js";const N={user:{bg:"bg-amber-950/40",border:"border-amber-600",label:"text-amber-400",tag:"USER MESSAGE"},reasoning:{bg:"bg-purple-950/40",border:"border-purple-600",label:"text-purple-400",tag:"REASONING"},tool_call:{bg:"bg-blue-950/40",border:"border-blue-600",label:"text-blue-400",tag:"TOOL CALL"},tool_resp:{bg:"bg-gray-800/60",border:"border-gray-600",label:"text-gray-400",tag:"TOOL RESPONSE"}};function g({kind:r,label:t,children:l}){const s=N[r];return e.jsxs("div",{className:`border-l-2 ${s.border} ${s.bg} pl-3 py-2 rounded-r mb-2`,children:[e.jsx("div",{className:`text-[9px] font-bold uppercase tracking-widest mb-1.5 ${s.label}`,children:t??s.tag}),l]})}function A(r){return r.split(/\n\n/).map(t=>t.trim()).filter(Boolean).flatMap(t=>{try{const l=JSON.parse(t);return typeof l=="object"&&l!==null&&"type"in l?[l]:[]}catch{return[]}})}function T({excerpt:r,userContent:t}){const l=d.useMemo(()=>A(r),[r]);return e.jsxs("div",{className:"space-y-0",children:[e.jsx(g,{kind:"user",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:t})}),l.map((s,a)=>{const n=JSON.stringify(s,null,2);return s.type==="reasoning"?e.jsx(g,{kind:"reasoning",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a):s.type==="function_call"?e.jsx(g,{kind:"tool_call",label:`TOOL CALL${s.name?`: ${s.name}`:""}`,children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a):s.type==="function_call_output"?e.jsx(g,{kind:"tool_resp",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a):e.jsx("div",{className:"border-l-2 border-gray-700 pl-3 py-2 mb-2",children:e.jsx("pre",{className:"text-xs text-gray-400 whitespace-pre-wrap font-mono leading-relaxed",children:n})},a)})]})}function I(r){const t=[],l=/(<tool_call>[\s\S]*?<\/tool_call>)/g;let s=0,a;for(;(a=l.exec(r))!==null;){const i=r.slice(s,a.index).trim();i&&t.push({kind:"reasoning",text:i}),t.push({kind:"tool_call",text:a[1]}),s=a.index+a[1].length}const n=r.slice(s).trim();return n&&t.push({kind:"reasoning",text:n}),t}function M(r){const t=[],l=/(<think>[\s\S]*?<\/think>|<tool_call>[\s\S]*?<\/tool_call>)/g;let s=0,a;for(;(a=l.exec(r))!==null;){const i=r.slice(s,a.index).trim();i&&t.push({kind:"reasoning",text:i}),a[1].startsWith("<think>")?t.push({kind:"reasoning",text:a[1]}):t.push({kind:"tool_call",text:a[1]}),s=a.index+a[1].length}const n=r.slice(s).trim();return n&&t.push({kind:"reasoning",text:n}),t}function R({content:r,template:t}){const l=t==="qwen"?M(r):I(r),s=l.some(n=>n.kind==="tool_call"),a=N.reasoning;return e.jsxs("div",{className:`border-l-2 ${a.border} ${a.bg} pl-3 py-2 rounded-r mb-2`,children:[e.jsx("div",{className:`text-[9px] font-bold uppercase tracking-widest mb-1.5 ${a.label}`,children:s?"REASONING + TOOL CALL":"REASONING"}),l.map((n,i)=>{if(n.kind==="tool_call"){const p=N.tool_call;return e.jsxs("div",{className:`border-l-2 ${p.border} ${p.bg} pl-2 py-1.5 rounded-r mb-1.5`,children:[e.jsx("div",{className:`text-[9px] font-bold uppercase tracking-widest mb-1 ${p.label}`,children:"TOOL CALL"}),e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:n.text})]},i)}return e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed mb-1.5",children:n.text},i)})]})}function G({messages:r,template:t}){return e.jsx("div",{className:"space-y-0",children:r.map((l,s)=>l.role==="user"&&s===0?e.jsx(g,{kind:"user",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:l.content})},s):l.role==="user"?e.jsx(g,{kind:"tool_resp",children:e.jsx("pre",{className:"text-xs text-gray-200 whitespace-pre-wrap font-mono leading-relaxed",children:l.content})},s):e.jsx(R,{content:l.content,template:t},s))})}function D(){return e.jsxs("div",{className:"flex items-center gap-4 px-4 py-1.5 bg-gray-900/80 border-b border-gray-800 shrink-0 flex-wrap",children:[e.jsx("span",{className:"text-[9px] font-bold uppercase tracking-widest text-gray-500 mr-1",children:"Legend"}),Object.entries(N).map(([r,t])=>e.jsxs("span",{className:`flex items-center gap-1.5 text-[10px] ${t.label}`,children:[e.jsx("span",{className:`inline-block w-2.5 h-2.5 rounded-sm border ${t.border} ${t.bg}`}),t.tag]},r))]})}function B({value:r,onChange:t}){return e.jsxs("select",{value:r,onChange:l=>t(l.target.value),className:"text-xs bg-gray-800 border border-gray-600 text-gray-200 rounded px-2 py-1 ml-2 cursor-pointer",children:[e.jsx("option",{value:"gpt-oss",children:"gpt-oss"}),e.jsx("option",{value:"qwen",children:"qwen"})]})}function P(){var v,k,_,S;const[r,t]=d.useState([]),[l,s]=d.useState(!0),[a,n]=d.useState(null),[i,p]=d.useState(0),[f,E]=d.useState(""),[b,q]=d.useState("gpt-oss");d.useEffect(()=>{s(!0),fetch("/api/sft-diff/").then(o=>{if(!o.ok)throw new Error(o.statusText);return o.json()}).then(o=>{t(o.rows),s(!1)}).catch(o=>{n(o.message),s(!1)})},[]);const w=d.useMemo(()=>{if(!f.trim())return r;const o=f.toLowerCase();return r.filter(x=>{var m,u,j,h;return x.query_id.toLowerCase().includes(o)||(((u=(m=x.messages_gpt)==null?void 0:m[0])==null?void 0:u.content)??((h=(j=x.messages_qwen)==null?void 0:j[0])==null?void 0:h.content)??"").toLowerCase().includes(o)})},[r,f]),c=w[i]??null,y=c?b==="qwen"?c.messages_qwen:c.messages_gpt:null,$=((k=(v=c==null?void 0:c.messages_gpt)==null?void 0:v[0])==null?void 0:k.content)??((S=(_=c==null?void 0:c.messages_qwen)==null?void 0:_[0])==null?void 0:S.content)??"";return l?e.jsx("div",{className:"h-full flex items-center justify-center text-gray-400",children:"Loading…"}):a?e.jsxs("div",{className:"h-full flex items-center justify-center text-red-400",children:["Error: ",a]}):e.jsxs("div",{className:"h-full flex overflow-hidden bg-gray-950 text-gray-100",children:[e.jsxs("div",{className:"w-60 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900",children:[e.jsxs("div",{className:"px-2 py-1.5 border-b border-gray-800",children:[e.jsx("input",{type:"text",placeholder:"Search query ID or question…",value:f,onChange:o=>{E(o.target.value),p(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"}),e.jsxs("div",{className:"text-[10px] text-gray-600 mt-1",children:[w.length," / ",r.length]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto",children:w.map((o,x)=>{var h,L,O,C;const m=((L=(h=o.messages_gpt)==null?void 0:h[0])==null?void 0:L.content)??((C=(O=o.messages_qwen)==null?void 0:O[0])==null?void 0:C.content)??"",u=m.match(/Question:\s*([\s\S]{0,120})/),j=u?u[1].trim().replace(/\n/g," "):m.slice(0,80);return e.jsxs("button",{onClick:()=>p(x),className:`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${i===x?"bg-blue-900/40 text-blue-200 border-l-2 border-l-blue-500":"text-gray-400 hover:bg-gray-800"}`,children:[e.jsxs("div",{className:"font-medium text-gray-200 mb-0.5",children:["#",o.query_id]}),e.jsx("div",{className:"text-[10px] text-gray-500 leading-snug line-clamp-2",children:j})]},o.query_id+x)})})]}),c?e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsx(D,{}),e.jsxs("div",{className:"flex-1 flex overflow-hidden min-w-0",children:[e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 border-r border-gray-800 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/60 border-b border-gray-800 shrink-0",children:[e.jsx("span",{className:"text-[11px] font-semibold text-amber-400 uppercase tracking-widest",children:"Original"}),e.jsx("span",{className:"text-[10px] text-gray-500 ml-2",children:"excerpt field — raw JSON items"})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:e.jsx(T,{excerpt:c.excerpt,userContent:$})})]}),e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-3 py-1.5 bg-gray-900/60 border-b border-gray-800 shrink-0 flex items-center",children:[e.jsx("span",{className:"text-[11px] font-semibold text-sky-400 uppercase tracking-widest",children:"Converted"}),e.jsx(B,{value:b,onChange:q}),y&&e.jsxs("span",{className:"text-[10px] text-gray-500 ml-2",children:[y.length," messages"]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto p-3",children:y?e.jsx(G,{messages:y,template:b}):e.jsxs("div",{className:"text-gray-500 text-xs italic mt-4",children:["Not available for ",b," template."]})})]})]})]}):e.jsx("div",{className:"flex-1 flex items-center justify-center text-gray-500",children:"No record selected."})]})}export{P as default};
frontend/dist/assets/{TrajExtApp-DjSZ_Kko.js → TrajExtApp-FHTMLig-.js} RENAMED
@@ -1 +1 @@
1
- import{r as l,j as e}from"./index-BFtcA7GQ.js";function S(r){try{return JSON.parse(r).map(o=>({type:["reasoning","tool_call","tool_result","final_answer"].includes(o.type)?o.type:"unknown",content:o.content}))}catch{return[]}}function T(r){if(!r||r==="(no trajectory steps)")return[];const n=[],o=r.split(/\n\n(?=\[(?:Reasoning\]:|Tool call\]|Tool result\]:|Final answer\]:))/);for(const d of o){const s=d.trim();s.startsWith("[Reasoning]:")?n.push({type:"reasoning",content:s.slice(12).trim()}):s.startsWith("[Tool call]")?n.push({type:"tool_call",content:s.slice(11).trim()}):s.startsWith("[Tool result]:")?n.push({type:"tool_result",content:s.slice(14).trim()}):s.startsWith("[Final answer]:")?n.push({type:"final_answer",content:s.slice(15).trim()}):s&&n.push({type:"unknown",content:s})}return n}const k={reasoning:{border:"border-purple-700",label:"Reasoning",labelColor:"text-purple-400",bg:"bg-purple-950/30"},tool_call:{border:"border-blue-700",label:"Tool Call",labelColor:"text-blue-400",bg:"bg-blue-950/30"},tool_result:{border:"border-gray-600",label:"Tool Result",labelColor:"text-gray-400",bg:"bg-gray-800/30"},final_answer:{border:"border-green-700",label:"Final Answer",labelColor:"text-green-400",bg:"bg-green-950/30"},unknown:{border:"border-gray-700",label:"—",labelColor:"text-gray-500",bg:""}};function C({row:r}){const n=l.useMemo(()=>r.trajectory_blocks?S(r.trajectory_blocks):T(r.trajectory_text),[r.trajectory_blocks,r.trajectory_text]);return n.length===0?e.jsx("div",{className:"text-gray-500 text-xs",children:"No trajectory steps."}):e.jsx("div",{className:"space-y-2",children:n.map((o,d)=>{const s=k[o.type];return e.jsxs("div",{className:`border-l-2 ${s.border} ${s.bg} pl-3 py-1 rounded-r`,children:[e.jsx("div",{className:`text-[10px] font-bold uppercase tracking-widest mb-1 ${s.labelColor}`,children:s.label}),e.jsx("pre",{className:"text-xs text-gray-300 whitespace-pre-wrap font-mono leading-relaxed",children:o.content})]},d)})})}const E={traj_ext:"Full Trajectory (Tags)",traj_orig_ext:"Full Trajectory (Original Messages)",traj_summary_ext:"Summary (Tags)",traj_summary_orig_ext:"Summary (Original Messages)",traj_summary_ext_selected_tools:"Selected Tools (Tags)",traj_summary_orig_ext_selected_tools:"Selected Tools (Original Messages)"},f=r=>E[r]??r,M=["trajectory","prompt","both"];function R(){const[r,n]=l.useState([]),[o,d]=l.useState(!0),[s,j]=l.useState(null),[b,p]=l.useState(0),[c,_]=l.useState("trajectory"),[u,N]=l.useState(""),[x,v]=l.useState("all"),[m,w]=l.useState({});l.useEffect(()=>{d(!0),fetch("/api/traj-ext/").then(t=>{if(!t.ok)throw new Error(t.statusText);return t.json()}).then(t=>{n(t.rows),d(!1)}).catch(t=>{j(t.message),d(!1)})},[]);const h=l.useMemo(()=>Array.from(new Set(r.map(i=>i.run_name??""))).filter(Boolean).sort(),[r]),y=l.useMemo(()=>{let t=r;if(x!=="all"&&(t=t.filter(g=>g.run_name===x)),!u.trim())return t;const i=u.toLowerCase();return t.filter(g=>g.query_id.includes(i)||g.question.toLowerCase().includes(i))},[r,u,x]),a=y[b]??null;return l.useEffect(()=>{a&&(c!=="prompt"&&c!=="both"||m[a.run_id]||fetch(`/api/traj-ext/${encodeURIComponent(a.run_id)}`).then(t=>t.json()).then(t=>w(i=>({...i,[a.run_id]:t.formatted_prompt??""}))).catch(()=>{}))},[a==null?void 0:a.run_id,c]),o?e.jsx("div",{className:"h-full flex items-center justify-center text-gray-400",children:"Loading traj-ext data from HuggingFace…"}):s?e.jsxs("div",{className:"h-full flex items-center justify-center text-red-400",children:["Error: ",s]}):e.jsxs("div",{className:"h-full flex overflow-hidden bg-gray-950 text-gray-100",children:[e.jsxs("div",{className:"w-64 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900",children:[e.jsxs("div",{className:"p-2 border-b border-gray-800 space-y-1.5",children:[h.length>0&&e.jsxs("select",{value:x,onChange:t=>{v(t.target.value),p(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5",children:[e.jsxs("option",{value:"all",children:["All runs (",r.length,")"]}),h.map(t=>e.jsx("option",{value:t,children:f(t)},t))]}),e.jsx("input",{type:"text",placeholder:"Search queries…",value:u,onChange:t=>{N(t.target.value),p(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"}),e.jsxs("div",{className:"text-[10px] text-gray-600",children:[y.length,x!=="all"||u?` / ${r.length}`:""," trajectories"]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto",children:y.map((t,i)=>e.jsxs("button",{onClick:()=>p(i),className:`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${b===i?"bg-blue-900/40 text-blue-200 border-l-2 border-l-blue-500":"text-gray-400 hover:bg-gray-800"}`,children:[e.jsxs("div",{className:"font-medium text-gray-200",children:["#",t.query_id]}),e.jsxs("div",{className:"text-gray-500 truncate mt-0.5",children:[t.question.slice(0,60),"…"]}),e.jsxs("div",{className:"flex gap-2 mt-1 text-[10px] text-gray-600 flex-wrap items-center",children:[e.jsxs("span",{children:[t.n_tool_calls," tools"]}),e.jsxs("span",{children:[t.n_reasoning_steps," reasoning"]}),e.jsx("span",{className:t.status==="completed"?"text-green-600":"text-amber-600",children:t.status}),t.n_tool_calls===0&&e.jsx("span",{className:"px-1 py-0.5 rounded bg-yellow-900/50 text-yellow-300 font-semibold",children:"direct"})]}),t.run_name&&x==="all"&&e.jsx("div",{className:"mt-0.5 text-[9px] text-indigo-400 truncate",children:f(t.run_name)})]},t.run_id))})]}),a?e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0",children:[e.jsxs("div",{className:"flex items-center gap-3 flex-wrap",children:[e.jsxs("span",{className:"text-sm font-medium text-gray-100",children:["Query #",a.query_id]}),e.jsx("span",{className:`text-xs px-2 py-0.5 rounded-full ${a.status==="completed"?"bg-green-900/50 text-green-300":"bg-amber-900/50 text-amber-300"}`,children:a.status}),e.jsxs("span",{className:"text-xs text-gray-500",children:[a.n_tool_calls," tool calls · ",a.n_reasoning_steps," reasoning"]}),a.n_tool_calls===0&&e.jsx("span",{className:"px-1.5 py-0.5 rounded bg-yellow-900/50 text-yellow-300 text-xs font-semibold",children:"direct"}),e.jsx("div",{className:"ml-auto flex gap-1",children:M.map(t=>e.jsx("button",{onClick:()=>_(t),className:`px-2 py-0.5 text-xs rounded border transition-colors capitalize ${c===t?"bg-blue-700 border-blue-500 text-white":"bg-gray-800 border-gray-700 text-gray-400 hover:border-gray-500"}`,children:t},t))})]}),e.jsx("div",{className:"mt-1 text-xs text-gray-300 leading-snug",children:a.question})]}),e.jsxs("div",{className:"flex-1 overflow-y-auto p-4 space-y-4 min-w-0",children:[(c==="prompt"||c==="both")&&e.jsxs("div",{children:[c==="both"&&e.jsx("div",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wide mb-2",children:"Prompt"}),m[a.run_id]?e.jsx("pre",{className:"text-xs font-mono whitespace-pre-wrap text-gray-200 bg-gray-900 border border-gray-700 rounded p-3 leading-relaxed",children:m[a.run_id]}):e.jsx("div",{className:"text-gray-500 text-xs p-3",children:"Loading prompt…"})]}),(c==="trajectory"||c==="both")&&e.jsxs("div",{children:[c==="both"&&e.jsx("div",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wide mb-2 mt-4",children:"Trajectory"}),e.jsx(C,{row:a})]})]})]}):e.jsx("div",{className:"flex-1 flex items-center justify-center text-gray-500",children:"No trajectory selected."})]})}export{R as default};
 
1
+ import{r as l,j as e}from"./index-BcY8Ufng.js";function S(r){try{return JSON.parse(r).map(o=>({type:["reasoning","tool_call","tool_result","final_answer"].includes(o.type)?o.type:"unknown",content:o.content}))}catch{return[]}}function T(r){if(!r||r==="(no trajectory steps)")return[];const n=[],o=r.split(/\n\n(?=\[(?:Reasoning\]:|Tool call\]|Tool result\]:|Final answer\]:))/);for(const d of o){const s=d.trim();s.startsWith("[Reasoning]:")?n.push({type:"reasoning",content:s.slice(12).trim()}):s.startsWith("[Tool call]")?n.push({type:"tool_call",content:s.slice(11).trim()}):s.startsWith("[Tool result]:")?n.push({type:"tool_result",content:s.slice(14).trim()}):s.startsWith("[Final answer]:")?n.push({type:"final_answer",content:s.slice(15).trim()}):s&&n.push({type:"unknown",content:s})}return n}const k={reasoning:{border:"border-purple-700",label:"Reasoning",labelColor:"text-purple-400",bg:"bg-purple-950/30"},tool_call:{border:"border-blue-700",label:"Tool Call",labelColor:"text-blue-400",bg:"bg-blue-950/30"},tool_result:{border:"border-gray-600",label:"Tool Result",labelColor:"text-gray-400",bg:"bg-gray-800/30"},final_answer:{border:"border-green-700",label:"Final Answer",labelColor:"text-green-400",bg:"bg-green-950/30"},unknown:{border:"border-gray-700",label:"—",labelColor:"text-gray-500",bg:""}};function C({row:r}){const n=l.useMemo(()=>r.trajectory_blocks?S(r.trajectory_blocks):T(r.trajectory_text),[r.trajectory_blocks,r.trajectory_text]);return n.length===0?e.jsx("div",{className:"text-gray-500 text-xs",children:"No trajectory steps."}):e.jsx("div",{className:"space-y-2",children:n.map((o,d)=>{const s=k[o.type];return e.jsxs("div",{className:`border-l-2 ${s.border} ${s.bg} pl-3 py-1 rounded-r`,children:[e.jsx("div",{className:`text-[10px] font-bold uppercase tracking-widest mb-1 ${s.labelColor}`,children:s.label}),e.jsx("pre",{className:"text-xs text-gray-300 whitespace-pre-wrap font-mono leading-relaxed",children:o.content})]},d)})})}const E={traj_ext:"Full Trajectory (Tags)",traj_orig_ext:"Full Trajectory (Original Messages)",traj_summary_ext:"Summary (Tags)",traj_summary_orig_ext:"Summary (Original Messages)",traj_summary_ext_selected_tools:"Selected Tools (Tags)",traj_summary_orig_ext_selected_tools:"Selected Tools (Original Messages)"},f=r=>E[r]??r,M=["trajectory","prompt","both"];function R(){const[r,n]=l.useState([]),[o,d]=l.useState(!0),[s,j]=l.useState(null),[b,p]=l.useState(0),[c,_]=l.useState("trajectory"),[u,N]=l.useState(""),[x,v]=l.useState("all"),[m,w]=l.useState({});l.useEffect(()=>{d(!0),fetch("/api/traj-ext/").then(t=>{if(!t.ok)throw new Error(t.statusText);return t.json()}).then(t=>{n(t.rows),d(!1)}).catch(t=>{j(t.message),d(!1)})},[]);const h=l.useMemo(()=>Array.from(new Set(r.map(i=>i.run_name??""))).filter(Boolean).sort(),[r]),y=l.useMemo(()=>{let t=r;if(x!=="all"&&(t=t.filter(g=>g.run_name===x)),!u.trim())return t;const i=u.toLowerCase();return t.filter(g=>g.query_id.includes(i)||g.question.toLowerCase().includes(i))},[r,u,x]),a=y[b]??null;return l.useEffect(()=>{a&&(c!=="prompt"&&c!=="both"||m[a.run_id]||fetch(`/api/traj-ext/${encodeURIComponent(a.run_id)}`).then(t=>t.json()).then(t=>w(i=>({...i,[a.run_id]:t.formatted_prompt??""}))).catch(()=>{}))},[a==null?void 0:a.run_id,c]),o?e.jsx("div",{className:"h-full flex items-center justify-center text-gray-400",children:"Loading traj-ext data from HuggingFace…"}):s?e.jsxs("div",{className:"h-full flex items-center justify-center text-red-400",children:["Error: ",s]}):e.jsxs("div",{className:"h-full flex overflow-hidden bg-gray-950 text-gray-100",children:[e.jsxs("div",{className:"w-64 shrink-0 flex flex-col border-r border-gray-800 bg-gray-900",children:[e.jsxs("div",{className:"p-2 border-b border-gray-800 space-y-1.5",children:[h.length>0&&e.jsxs("select",{value:x,onChange:t=>{v(t.target.value),p(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5",children:[e.jsxs("option",{value:"all",children:["All runs (",r.length,")"]}),h.map(t=>e.jsx("option",{value:t,children:f(t)},t))]}),e.jsx("input",{type:"text",placeholder:"Search queries…",value:u,onChange:t=>{N(t.target.value),p(0)},className:"w-full bg-gray-800 border border-gray-700 text-gray-200 text-xs rounded px-2 py-1.5 placeholder-gray-600"}),e.jsxs("div",{className:"text-[10px] text-gray-600",children:[y.length,x!=="all"||u?` / ${r.length}`:""," trajectories"]})]}),e.jsx("div",{className:"flex-1 overflow-y-auto",children:y.map((t,i)=>e.jsxs("button",{onClick:()=>p(i),className:`w-full text-left px-3 py-2 border-b border-gray-800/50 text-xs transition-colors ${b===i?"bg-blue-900/40 text-blue-200 border-l-2 border-l-blue-500":"text-gray-400 hover:bg-gray-800"}`,children:[e.jsxs("div",{className:"font-medium text-gray-200",children:["#",t.query_id]}),e.jsxs("div",{className:"text-gray-500 truncate mt-0.5",children:[t.question.slice(0,60),"…"]}),e.jsxs("div",{className:"flex gap-2 mt-1 text-[10px] text-gray-600 flex-wrap items-center",children:[e.jsxs("span",{children:[t.n_tool_calls," tools"]}),e.jsxs("span",{children:[t.n_reasoning_steps," reasoning"]}),e.jsx("span",{className:t.status==="completed"?"text-green-600":"text-amber-600",children:t.status}),t.n_tool_calls===0&&e.jsx("span",{className:"px-1 py-0.5 rounded bg-yellow-900/50 text-yellow-300 font-semibold",children:"direct"})]}),t.run_name&&x==="all"&&e.jsx("div",{className:"mt-0.5 text-[9px] text-indigo-400 truncate",children:f(t.run_name)})]},t.run_id))})]}),a?e.jsxs("div",{className:"flex-1 flex flex-col min-w-0 overflow-hidden",children:[e.jsxs("div",{className:"px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0",children:[e.jsxs("div",{className:"flex items-center gap-3 flex-wrap",children:[e.jsxs("span",{className:"text-sm font-medium text-gray-100",children:["Query #",a.query_id]}),e.jsx("span",{className:`text-xs px-2 py-0.5 rounded-full ${a.status==="completed"?"bg-green-900/50 text-green-300":"bg-amber-900/50 text-amber-300"}`,children:a.status}),e.jsxs("span",{className:"text-xs text-gray-500",children:[a.n_tool_calls," tool calls · ",a.n_reasoning_steps," reasoning"]}),a.n_tool_calls===0&&e.jsx("span",{className:"px-1.5 py-0.5 rounded bg-yellow-900/50 text-yellow-300 text-xs font-semibold",children:"direct"}),e.jsx("div",{className:"ml-auto flex gap-1",children:M.map(t=>e.jsx("button",{onClick:()=>_(t),className:`px-2 py-0.5 text-xs rounded border transition-colors capitalize ${c===t?"bg-blue-700 border-blue-500 text-white":"bg-gray-800 border-gray-700 text-gray-400 hover:border-gray-500"}`,children:t},t))})]}),e.jsx("div",{className:"mt-1 text-xs text-gray-300 leading-snug",children:a.question})]}),e.jsxs("div",{className:"flex-1 overflow-y-auto p-4 space-y-4 min-w-0",children:[(c==="prompt"||c==="both")&&e.jsxs("div",{children:[c==="both"&&e.jsx("div",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wide mb-2",children:"Prompt"}),m[a.run_id]?e.jsx("pre",{className:"text-xs font-mono whitespace-pre-wrap text-gray-200 bg-gray-900 border border-gray-700 rounded p-3 leading-relaxed",children:m[a.run_id]}):e.jsx("div",{className:"text-gray-500 text-xs p-3",children:"Loading prompt…"})]}),(c==="trajectory"||c==="both")&&e.jsxs("div",{children:[c==="both"&&e.jsx("div",{className:"text-xs font-semibold text-gray-400 uppercase tracking-wide mb-2 mt-4",children:"Trajectory"}),e.jsx(C,{row:a})]})]})]}):e.jsx("div",{className:"flex-1 flex items-center justify-center text-gray-500",children:"No trajectory selected."})]})}export{R as default};
frontend/dist/assets/{VisualizerApp-Bo-eQZ6n.js → VisualizerApp-D7DKZrnc.js} RENAMED
@@ -1,2 +1,2 @@
1
- const __vite__mapDeps=(i,m=__vite__mapDeps,d=(m.f||(m.f=["assets/ModelApp-ByP0hVWF.js","assets/index-BFtcA7GQ.js","assets/index-CxBbYLho.css","assets/PlanRevisionsApp-Dm4VU5KS.js","assets/TrajExtApp-DjSZ_Kko.js","assets/SelectedToolsApp-BXaKmpYg.js","assets/ScoutRunsApp-BqSa3vbC.js"])))=>i.map(i=>d[i]);
2
- import{u as o,j as e,n as d,r as l,_ as t}from"./index-BFtcA7GQ.js";const c=l.lazy(()=>t(()=>import("./ModelApp-ByP0hVWF.js"),__vite__mapDeps([0,1,2]))),n=l.lazy(()=>t(()=>import("./PlanRevisionsApp-Dm4VU5KS.js"),__vite__mapDeps([3,1,2]))),x=l.lazy(()=>t(()=>import("./TrajExtApp-DjSZ_Kko.js"),__vite__mapDeps([4,1,2]))),m=l.lazy(()=>t(()=>import("./SelectedToolsApp-BXaKmpYg.js"),__vite__mapDeps([5,1,2]))),u=l.lazy(()=>t(()=>import("./ScoutRunsApp-BqSa3vbC.js"),__vite__mapDeps([6,1,2]))),i=[{id:"model",label:"Model Trace",activeClass:"border-blue-500 text-blue-400"},{id:"plan-revisions",label:"Plan Revisions",activeClass:"border-amber-500 text-amber-400"},{id:"traj-ext",label:"Traj Ext",activeClass:"border-emerald-500 text-emerald-400"},{id:"selected-tools",label:"Selected Tools",activeClass:"border-amber-400 text-amber-300"},{id:"scout-runs",label:"Scout Runs",activeClass:"border-cyan-500 text-cyan-400"}],p=new Set(i.map(r=>r.id));function _(){const r=o(),s=p.has(r.tab)?r.tab:"model";return e.jsxs("div",{className:"h-full flex flex-col",children:[e.jsx("div",{className:"flex items-center border-b border-gray-800 bg-gray-900/50 px-2 shrink-0",children:i.map(a=>e.jsx("button",{onClick:()=>d({page:"viz",tab:a.id}),className:`px-5 py-2 text-sm font-medium border-b-2 transition-colors ${s===a.id?a.activeClass:"border-transparent text-gray-500 hover:text-gray-300"}`,children:a.label},a.id))}),e.jsx("div",{className:"flex-1 overflow-hidden",children:e.jsxs(l.Suspense,{fallback:e.jsx("div",{className:"flex items-center justify-center h-full text-gray-500",children:"Loading..."}),children:[s==="model"&&e.jsx("div",{className:"theme-model h-full",children:e.jsx(c,{})}),s==="plan-revisions"&&e.jsx("div",{className:"h-full",children:e.jsx(n,{})}),s==="traj-ext"&&e.jsx("div",{className:"h-full",children:e.jsx(x,{})}),s==="selected-tools"&&e.jsx("div",{className:"h-full",children:e.jsx(m,{})}),s==="scout-runs"&&e.jsx("div",{className:"h-full",children:e.jsx(u,{})})]})})]})}export{_ as default};
 
1
+ const __vite__mapDeps=(i,m=__vite__mapDeps,d=(m.f||(m.f=["assets/ModelApp-D9tEU6of.js","assets/index-BcY8Ufng.js","assets/index-CxBbYLho.css","assets/PlanRevisionsApp-Dr-ACMnp.js","assets/TrajExtApp-FHTMLig-.js","assets/SelectedToolsApp-i9MHUxW6.js","assets/ScoutRunsApp-UEZw1rLk.js"])))=>i.map(i=>d[i]);
2
+ import{u as o,j as e,n as d,r as l,_ as t}from"./index-BcY8Ufng.js";const c=l.lazy(()=>t(()=>import("./ModelApp-D9tEU6of.js"),__vite__mapDeps([0,1,2]))),n=l.lazy(()=>t(()=>import("./PlanRevisionsApp-Dr-ACMnp.js"),__vite__mapDeps([3,1,2]))),x=l.lazy(()=>t(()=>import("./TrajExtApp-FHTMLig-.js"),__vite__mapDeps([4,1,2]))),m=l.lazy(()=>t(()=>import("./SelectedToolsApp-i9MHUxW6.js"),__vite__mapDeps([5,1,2]))),u=l.lazy(()=>t(()=>import("./ScoutRunsApp-UEZw1rLk.js"),__vite__mapDeps([6,1,2]))),i=[{id:"model",label:"Model Trace",activeClass:"border-blue-500 text-blue-400"},{id:"plan-revisions",label:"Plan Revisions",activeClass:"border-amber-500 text-amber-400"},{id:"traj-ext",label:"Traj Ext",activeClass:"border-emerald-500 text-emerald-400"},{id:"selected-tools",label:"Selected Tools",activeClass:"border-amber-400 text-amber-300"},{id:"scout-runs",label:"Scout Runs",activeClass:"border-cyan-500 text-cyan-400"}],p=new Set(i.map(r=>r.id));function _(){const r=o(),s=p.has(r.tab)?r.tab:"model";return e.jsxs("div",{className:"h-full flex flex-col",children:[e.jsx("div",{className:"flex items-center border-b border-gray-800 bg-gray-900/50 px-2 shrink-0",children:i.map(a=>e.jsx("button",{onClick:()=>d({page:"viz",tab:a.id}),className:`px-5 py-2 text-sm font-medium border-b-2 transition-colors ${s===a.id?a.activeClass:"border-transparent text-gray-500 hover:text-gray-300"}`,children:a.label},a.id))}),e.jsx("div",{className:"flex-1 overflow-hidden",children:e.jsxs(l.Suspense,{fallback:e.jsx("div",{className:"flex items-center justify-center h-full text-gray-500",children:"Loading..."}),children:[s==="model"&&e.jsx("div",{className:"theme-model h-full",children:e.jsx(c,{})}),s==="plan-revisions"&&e.jsx("div",{className:"h-full",children:e.jsx(n,{})}),s==="traj-ext"&&e.jsx("div",{className:"h-full",children:e.jsx(x,{})}),s==="selected-tools"&&e.jsx("div",{className:"h-full",children:e.jsx(m,{})}),s==="scout-runs"&&e.jsx("div",{className:"h-full",children:e.jsx(u,{})})]})})]})}export{_ as default};
frontend/dist/assets/{index-BFtcA7GQ.js → index-BcY8Ufng.js} RENAMED
The diff for this file is too large to render. See raw diff
 
frontend/dist/index.html CHANGED
@@ -4,7 +4,7 @@
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
  <title>RACA Dashboard</title>
7
- <script type="module" crossorigin src="/assets/index-BFtcA7GQ.js"></script>
8
  <link rel="stylesheet" crossorigin href="/assets/index-CxBbYLho.css">
9
  </head>
10
  <body class="bg-gray-950 text-gray-100">
 
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
  <title>RACA Dashboard</title>
7
+ <script type="module" crossorigin src="/assets/index-BcY8Ufng.js"></script>
8
  <link rel="stylesheet" crossorigin href="/assets/index-CxBbYLho.css">
9
  </head>
10
  <body class="bg-gray-950 text-gray-100">
frontend/src/scout_runs/ScoutRunsApp.tsx CHANGED
@@ -12,6 +12,9 @@ interface Row {
12
  new_tool_calls: Record<string, number>;
13
  scout_total_calls: number;
14
  new_total_calls: number;
 
 
 
15
  }
16
 
17
  interface VariantMeta { label: string; description: string; repo: string }
@@ -117,6 +120,10 @@ export default function ScoutRunsApp() {
117
  const scoutBlocks = useMemo(() => current ? parseTrajectory(current.scout_trajectory) : [], [current]);
118
  const newBlocks = useMemo(() => current ? parseTrajectory(current.new_trajectory) : [], [current]);
119
 
 
 
 
 
120
  if (loading) return <div className="h-full flex items-center justify-center text-gray-400">Loading from HuggingFace…</div>;
121
  if (error) return <div className="h-full flex items-center justify-center text-red-400">Error: {error}</div>;
122
 
@@ -143,11 +150,20 @@ export default function ScoutRunsApp() {
143
  )}
144
 
145
  {/* Stats */}
146
- <div className="px-3 py-2 border-b border-gray-800 bg-gray-900/80">
147
- <div className="text-[10px] text-gray-500 uppercase tracking-widest mb-1">Scout incomplete</div>
148
- <div className="text-lg font-bold text-amber-400">
149
- {data.filter(r => r.scout_status === "incomplete").length}
150
- <span className="text-xs text-gray-500 font-normal ml-1">/ {data.length}</span>
 
 
 
 
 
 
 
 
 
151
  </div>
152
  </div>
153
 
@@ -177,14 +193,22 @@ export default function ScoutRunsApp() {
177
  >
178
  <div className="flex items-center justify-between">
179
  <span className="font-medium text-gray-200">#{row.query_id}</span>
180
- <span className={`text-[9px] px-1.5 py-0.5 rounded-full border ${
181
- row.scout_status === "incomplete"
182
- ? "bg-amber-900/60 text-amber-400 border-amber-800"
183
- : "bg-gray-800 text-gray-400 border-gray-700"
184
- }`}>{row.scout_status}</span>
 
 
 
 
 
 
 
 
185
  </div>
186
  <div className="text-[10px] text-gray-600 mt-0.5">
187
- scout {row.scout_total_calls} calls · new {row.new_total_calls} calls
188
  </div>
189
  </button>
190
  ))}
@@ -199,6 +223,12 @@ export default function ScoutRunsApp() {
199
  <div className="px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0">
200
  <div className="flex items-center gap-3 flex-wrap">
201
  <span className="text-sm font-medium text-gray-100">Query #{current.query_id}</span>
 
 
 
 
 
 
202
  <span className={`text-xs px-2 py-0.5 rounded-full border ${
203
  current.scout_status === "incomplete"
204
  ? "bg-amber-900/50 text-amber-300 border-amber-800"
@@ -208,6 +238,20 @@ export default function ScoutRunsApp() {
208
  scout {current.scout_total_calls} calls · new {current.new_total_calls} calls
209
  </span>
210
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  </div>
212
 
213
  {/* Side-by-side */}
 
12
  new_tool_calls: Record<string, number>;
13
  scout_total_calls: number;
14
  new_total_calls: number;
15
+ question: string;
16
+ correct_answer: string;
17
+ correct: boolean | null;
18
  }
19
 
20
  interface VariantMeta { label: string; description: string; repo: string }
 
120
  const scoutBlocks = useMemo(() => current ? parseTrajectory(current.scout_trajectory) : [], [current]);
121
  const newBlocks = useMemo(() => current ? parseTrajectory(current.new_trajectory) : [], [current]);
122
 
123
+ const evalRows = data.filter(r => r.correct !== null && r.correct !== undefined);
124
+ const correctCount = evalRows.filter(r => r.correct === true).length;
125
+ const accuracyPct = evalRows.length ? Math.round(100 * correctCount / evalRows.length) : null;
126
+
127
  if (loading) return <div className="h-full flex items-center justify-center text-gray-400">Loading from HuggingFace…</div>;
128
  if (error) return <div className="h-full flex items-center justify-center text-red-400">Error: {error}</div>;
129
 
 
150
  )}
151
 
152
  {/* Stats */}
153
+ <div className="px-3 py-2 border-b border-gray-800 bg-gray-900/80 space-y-1.5">
154
+ {accuracyPct !== null && (
155
+ <div>
156
+ <div className="text-[10px] text-gray-500 uppercase tracking-widest mb-0.5">Accuracy</div>
157
+ <div className="text-lg font-bold text-sky-400">{accuracyPct}%</div>
158
+ <div className="text-[10px] text-gray-600">{correctCount} / {evalRows.length} correct</div>
159
+ </div>
160
+ )}
161
+ <div>
162
+ <div className="text-[10px] text-gray-500 uppercase tracking-widest mb-0.5">Scout incomplete</div>
163
+ <div className="text-base font-bold text-amber-400">
164
+ {data.filter(r => r.scout_status === "incomplete").length}
165
+ <span className="text-xs text-gray-500 font-normal ml-1">/ {data.length}</span>
166
+ </div>
167
  </div>
168
  </div>
169
 
 
193
  >
194
  <div className="flex items-center justify-between">
195
  <span className="font-medium text-gray-200">#{row.query_id}</span>
196
+ <div className="flex items-center gap-1">
197
+ {row.correct === true && (
198
+ <span className="text-[9px] px-1.5 py-0.5 rounded-full bg-green-900/60 text-green-400 border border-green-800">✓</span>
199
+ )}
200
+ {row.correct === false && (
201
+ <span className="text-[9px] px-1.5 py-0.5 rounded-full bg-red-900/60 text-red-400 border border-red-800">✗</span>
202
+ )}
203
+ <span className={`text-[9px] px-1.5 py-0.5 rounded-full border ${
204
+ row.scout_status === "incomplete"
205
+ ? "bg-amber-900/60 text-amber-400 border-amber-800"
206
+ : "bg-gray-800 text-gray-400 border-gray-700"
207
+ }`}>{row.scout_status}</span>
208
+ </div>
209
  </div>
210
  <div className="text-[10px] text-gray-600 mt-0.5">
211
+ scout {row.scout_total_calls} · new {row.new_total_calls} calls
212
  </div>
213
  </button>
214
  ))}
 
223
  <div className="px-4 py-2 bg-gray-900/60 border-b border-gray-800 shrink-0">
224
  <div className="flex items-center gap-3 flex-wrap">
225
  <span className="text-sm font-medium text-gray-100">Query #{current.query_id}</span>
226
+ {current.correct === true && (
227
+ <span className="text-xs px-2 py-0.5 rounded-full bg-green-900/50 text-green-300 border border-green-800 font-semibold">✓ Correct</span>
228
+ )}
229
+ {current.correct === false && (
230
+ <span className="text-xs px-2 py-0.5 rounded-full bg-red-900/50 text-red-300 border border-red-800 font-semibold">✗ Incorrect</span>
231
+ )}
232
  <span className={`text-xs px-2 py-0.5 rounded-full border ${
233
  current.scout_status === "incomplete"
234
  ? "bg-amber-900/50 text-amber-300 border-amber-800"
 
238
  scout {current.scout_total_calls} calls · new {current.new_total_calls} calls
239
  </span>
240
  </div>
241
+ {current.question && (
242
+ <div className="mt-1.5 text-xs text-gray-200 leading-snug bg-gray-800/40 rounded px-2 py-1.5 border border-gray-700">
243
+ <span className="text-[10px] font-bold uppercase tracking-widest text-violet-400 mr-2">Question</span>
244
+ {current.question}
245
+ </div>
246
+ )}
247
+ {current.correct_answer && (
248
+ <div className="mt-1 text-xs leading-snug bg-gray-800/40 rounded px-2 py-1.5 border border-gray-700 flex items-start gap-2">
249
+ <span className="text-[10px] font-bold uppercase tracking-widest text-green-400 shrink-0 mt-0.5">Answer</span>
250
+ <span className={current.correct === true ? "text-green-300" : current.correct === false ? "text-red-300" : "text-gray-300"}>
251
+ {current.correct_answer}
252
+ </span>
253
+ </div>
254
+ )}
255
  </div>
256
 
257
  {/* Side-by-side */}
patch_orig_analysis_with_eval.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Patch the traj_summary_orig_ext (orig-analysis) HF dataset to add
4
+ question/correct_answer/correct by joining with eval result files.
5
+
6
+ Dataset: timchen0618/browsecomp-plus-selected-tools-orig-analysis-v1 (826 rows)
7
+ Eval dir: evals/bcp/Qwen3-Embedding-8B/full/gpt-oss-120b/
8
+ traj_summary_orig_ext_selected_tools_gpt-oss-120b_seed0 (832 eval files)
9
+
10
+ Python env: /scratch/hc3337/envs/raca-py312/bin/python
11
+ """
12
+ from __future__ import annotations
13
+ import argparse, json, sys, os
14
+ from pathlib import Path
15
+
16
+ os.environ.setdefault("HF_HOME", "/scratch/hc3337/.cache/huggingface")
17
+
18
+ REPO = "timchen0618/browsecomp-plus-selected-tools-orig-analysis-v1"
19
+ EVAL_DIR = Path("/scratch/hc3337/projects/BrowseComp-Plus/evals/bcp/Qwen3-Embedding-8B/full/gpt-oss-120b/traj_summary_orig_ext_selected_tools_gpt-oss-120b_seed0")
20
+
21
+
22
+ def load_eval_data(eval_dir: Path) -> dict:
23
+ eval_map: dict = {}
24
+ for p in eval_dir.glob("*_eval.json"):
25
+ try:
26
+ d = json.load(p.open("r", encoding="utf-8"))
27
+ qid_raw = str(d.get("query_id", "")).strip()
28
+ qid = int(qid_raw) if qid_raw.isdigit() else qid_raw
29
+ jr = d.get("judge_result") or {}
30
+ correct_val = jr.get("correct")
31
+ eval_map[qid] = {
32
+ "question": str(d.get("question") or ""),
33
+ "correct_answer": str(d.get("correct_answer") or ""),
34
+ "correct": bool(correct_val) if correct_val is not None else None,
35
+ }
36
+ except Exception as e:
37
+ print(f"warning: skipping {p.name}: {e}", file=sys.stderr)
38
+ print(f"Loaded {len(eval_map)} eval entries from {eval_dir}", file=sys.stderr)
39
+ return eval_map
40
+
41
+
42
+ def main():
43
+ from datasets import load_dataset, Dataset
44
+
45
+ eval_map = load_eval_data(EVAL_DIR)
46
+
47
+ print(f"Loading {REPO}...", file=sys.stderr)
48
+ ds = load_dataset(REPO, split="train")
49
+ print(f"Loaded {len(ds)} rows. Columns: {ds.column_names}", file=sys.stderr)
50
+
51
+ rows = []
52
+ matched = 0
53
+ for row in ds:
54
+ qid_raw = str(row["query_id"]).strip()
55
+ qid = int(qid_raw) if qid_raw.isdigit() else qid_raw
56
+ ev = eval_map.get(qid, {})
57
+ if ev:
58
+ matched += 1
59
+ r = dict(row)
60
+ r["question"] = ev.get("question", "")
61
+ r["correct_answer"] = ev.get("correct_answer", "")
62
+ r["correct"] = ev.get("correct", None)
63
+ rows.append(r)
64
+
65
+ print(f"Matched {matched}/{len(rows)} rows with eval data.", file=sys.stderr)
66
+ correct_count = sum(1 for r in rows if r.get("correct") is True)
67
+ if matched:
68
+ print(f"Accuracy: {correct_count}/{matched} ({100*correct_count//matched}%)", file=sys.stderr)
69
+
70
+ ds_new = Dataset.from_list(rows)
71
+ ds_new.push_to_hub(REPO, split="train",
72
+ commit_message="Add question/correct_answer/correct columns")
73
+ print(f"Pushed {len(rows)} rows to {REPO}.")
74
+
75
+
76
+ if __name__ == "__main__":
77
+ main()
rebuild_all_scout_runs_with_eval.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ export HF_HOME=/scratch/hc3337/.cache/huggingface
5
+
6
+ PYTHON=/scratch/hc3337/envs/raca-py312/bin/python
7
+ SCRIPT=/scratch/hc3337/tmp/build_scout_runs_with_eval.py
8
+ EVAL_BASE=/scratch/hc3337/projects/BrowseComp-Plus/evals/bcp/Qwen3-Embedding-8B/test300/gpt-oss-120b
9
+
10
+ patch() {
11
+ KEY=$1
12
+ EVAL_DIR=$EVAL_BASE/$2
13
+ REPO=timchen0618/browsecomp-plus-scout-runs-test300-${KEY}-v1
14
+ echo "[$(date -u +%H:%M:%S)] Patching ${KEY} -> ${REPO}"
15
+ $PYTHON $SCRIPT --repo "$REPO" --eval-dir "$EVAL_DIR" \
16
+ --commit-message "Add question/correct_answer/correct: ${KEY}" 2>&1
17
+ echo "[$(date -u +%H:%M:%S)] Done: ${KEY}"
18
+ }
19
+
20
+ patch gpt-oss-120b traj_budget_orig_ext_gpt-oss-120b_seed0
21
+ patch qwen3p5-4b traj_budget_orig_ext_qwen3.5-4b_seed0
22
+ patch qwen-sft-random traj_budget_orig_ext_qwen3.5-4b-sft-random_selection_seed0
23
+ patch qwen-sft-random-unfiltered traj_budget_orig_ext_qwen3.5-4b-sft-random_selection_unfiltered_seed0
24
+ patch qwen-sft-gemini traj_budget_orig_ext_qwen3.5-4b-sft-gemini_2.5_pro_selection_seed0
25
+ patch qwen-sft-gemini-unfiltered traj_budget_orig_ext_qwen3.5-4b-sft-gemini_2.5_pro_selection_unfiltered_seed0
26
+ patch qwen-sft-best4-gemini-c traj_budget_orig_ext_qwen3.5-4b-sft-best_of_4_gemini_2.5_pro_selection_mode_c_seed0
27
+ patch qwen-sft-best4-gemini-d traj_budget_orig_ext_qwen3.5-4b-sft-best_of_4_gemini_2.5_pro_selection_mode_d_seed0
28
+ patch qwen-sft-best8-random-c traj_budget_orig_ext_qwen3.5-4b-sft-best_of_8_random_selection_mode_c_seed0
29
+ patch qwen-sft-best8-random-d traj_budget_orig_ext_qwen3.5-4b-sft-best_of_8_random_selection_mode_d_seed0
30
+ patch qwen-sft-gpt-scout traj_budget_orig_ext_qwen3.5-4b-sft-gpt-oss-120b_scout_seed0
31
+ patch qwen-sft-gpt-scout-unfiltered traj_budget_orig_ext_qwen3.5-4b-sft-gpt-oss-120b_scout_unfiltered_seed0
32
+
33
+ echo "ALL DONE at $(date -u)"