Spaces:

AUXteam
/

WitNote

Sleeping

App Files Files Community

WitNote / internal /semantic /benchmark_study_test.go

AUXteam

Upload folder using huggingface_hub

6a7089a verified 9 days ago

raw

history blame contribute delete

29.7 kB

	package semantic

	// benchmark_study_test.go — Controlled benchmark study
	//
	// 20 queries × 10 page types comparing:
	// • LexicalMatcher (Jaccard + stopwords + role boost)
	// • EmbeddingMatcher (128-dim HashingEmbedder + cosine similarity)
	// • CombinedMatcher (0.6 lexical + 0.4 embedding)
	//
	// Metrics reported:
	// • Acc@1 — correct element is the top-ranked result
	// • Acc@3 — correct element appears in top-3 results
	// • Mean Latency (µs) per matcher
	//
	// Run:
	// go test ./internal/semantic/ -run TestBenchmarkStudy -v
	//
	// Or with benchmark timing detail:
	// go test ./internal/semantic/ -run TestBenchmarkStudy -v -count 5

	import (
	"context"
	"fmt"
	"sort"
	"strings"
	"testing"
	"time"
	)

	// -----------------------------------------------------------------------
	// Data structures
	// -----------------------------------------------------------------------

	// studyCase is a single (page, query, expected-ref) triple.
	type studyCase struct {
	page string // human-readable page name
	query string // natural language query
	expectedRef string // ref of the ground-truth element
	elements []ElementDescriptor
	}

	// studyResult records one matcher's answer for one case.
	type studyResult struct {
	matcherName string
	caseName string
	page string
	hit1 bool // Acc@1: best ref == expected
	hit3 bool // Acc@3: expected ref in top-3
	latencyNs int64
	bestRef string
	bestScore float64
	}

	// -----------------------------------------------------------------------
	// Ground-truth page element sets (10 pages × 2 queries each = 20 cases)
	// -----------------------------------------------------------------------

	func studyCases() []studyCase {
	// ---- Page 1: Login Form ------------------------------------------------
	login := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Sign In"},
	{Ref: "e1", Role: "textbox", Name: "Email address"},
	{Ref: "e2", Role: "textbox", Name: "Password"},
	{Ref: "e3", Role: "checkbox", Name: "Remember me"},
	{Ref: "e4", Role: "button", Name: "Sign In"},
	{Ref: "e5", Role: "link", Name: "Forgot your password?"},
	{Ref: "e6", Role: "link", Name: "Create account"},
	{Ref: "e7", Role: "button", Name: "Continue with Google"},
	{Ref: "e8", Role: "button", Name: "Continue with Apple"},
	{Ref: "e9", Role: "img", Name: "Company logo"},
	}

	// ---- Page 2: Registration Form -----------------------------------------
	register := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Create your account"},
	{Ref: "e1", Role: "textbox", Name: "First name"},
	{Ref: "e2", Role: "textbox", Name: "Last name"},
	{Ref: "e3", Role: "textbox", Name: "Email"},
	{Ref: "e4", Role: "textbox", Name: "Password"},
	{Ref: "e5", Role: "textbox", Name: "Confirm password"},
	{Ref: "e6", Role: "combobox", Name: "Date of birth"},
	{Ref: "e7", Role: "combobox", Name: "Country or region"},
	{Ref: "e8", Role: "checkbox", Name: "I agree to the Terms and Conditions"},
	{Ref: "e9", Role: "checkbox", Name: "Subscribe to marketing emails"},
	{Ref: "e10", Role: "button", Name: "Create account"},
	{Ref: "e11", Role: "link", Name: "Already have an account? Log in"},
	}

	// ---- Page 3: E-commerce Product Page -----------------------------------
	product := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Wireless Noise-Cancelling Headphones"},
	{Ref: "e1", Role: "text", Name: "$299.99"},
	{Ref: "e2", Role: "combobox", Name: "Color", Value: "Midnight Black"},
	{Ref: "e3", Role: "spinbutton", Name: "Quantity", Value: "1"},
	{Ref: "e4", Role: "button", Name: "Add to cart"},
	{Ref: "e5", Role: "button", Name: "Buy now"},
	{Ref: "e6", Role: "button", Name: "Add to wishlist"},
	{Ref: "e7", Role: "tab", Name: "Description"},
	{Ref: "e8", Role: "tab", Name: "Reviews"},
	{Ref: "e9", Role: "tab", Name: "Specifications"},
	{Ref: "e10", Role: "img", Name: "Product image front view"},
	{Ref: "e11", Role: "text", Name: "Free shipping on orders over $50"},
	}

	// ---- Page 4: Navigation Header -----------------------------------------
	nav := []ElementDescriptor{
	{Ref: "e0", Role: "img", Name: "Site logo"},
	{Ref: "e1", Role: "link", Name: "Home"},
	{Ref: "e2", Role: "link", Name: "Products"},
	{Ref: "e3", Role: "link", Name: "Pricing"},
	{Ref: "e4", Role: "link", Name: "Blog"},
	{Ref: "e5", Role: "link", Name: "About Us"},
	{Ref: "e6", Role: "link", Name: "Contact"},
	{Ref: "e7", Role: "search", Name: "Search"},
	{Ref: "e8", Role: "button", Name: "Search"},
	{Ref: "e9", Role: "button", Name: "Open cart"},
	{Ref: "e10", Role: "link", Name: "Sign in"},
	{Ref: "e11", Role: "button", Name: "Open navigation menu"},
	}

	// ---- Page 5: Analytics Dashboard ---------------------------------------
	dashboard := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Dashboard Overview"},
	{Ref: "e1", Role: "button", Name: "Export Report"},
	{Ref: "e2", Role: "button", Name: "Add Widget"},
	{Ref: "e3", Role: "combobox", Name: "Date range", Value: "Last 30 days"},
	{Ref: "e4", Role: "text", Name: "Total Revenue", Value: "$128,450"},
	{Ref: "e5", Role: "text", Name: "Active Users", Value: "8,302"},
	{Ref: "e6", Role: "text", Name: "Conversion Rate", Value: "3.4%"},
	{Ref: "e7", Role: "text", Name: "Avg Session Duration", Value: "4m 12s"},
	{Ref: "e8", Role: "button", Name: "Refresh Data"},
	{Ref: "e9", Role: "link", Name: "View detailed report"},
	{Ref: "e10", Role: "tab", Name: "Overview"},
	{Ref: "e11", Role: "tab", Name: "Revenue"},
	{Ref: "e12", Role: "tab", Name: "Users"},
	{Ref: "e13", Role: "button", Name: "Notifications"},
	}

	// ---- Page 6: Search Results Page ---------------------------------------
	search := []ElementDescriptor{
	{Ref: "e0", Role: "search", Name: "Search"},
	{Ref: "e1", Role: "button", Name: "Search"},
	{Ref: "e2", Role: "heading", Name: "Search Results for \"golang\""},
	{Ref: "e3", Role: "combobox", Name: "Sort by", Value: "Relevance"},
	{Ref: "e4", Role: "checkbox", Name: "Filter: Last 24 hours"},
	{Ref: "e5", Role: "checkbox", Name: "Filter: Images"},
	{Ref: "e6", Role: "checkbox", Name: "Filter: Videos"},
	{Ref: "e7", Role: "link", Name: "Next page"},
	{Ref: "e8", Role: "link", Name: "Previous page"},
	{Ref: "e9", Role: "button", Name: "Clear filters"},
	{Ref: "e10", Role: "text", Name: "About 4,230,000 results"},
	}

	// ---- Page 7: Admin Data Table ------------------------------------------
	table := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Order Management"},
	{Ref: "e1", Role: "search", Name: "Search orders"},
	{Ref: "e2", Role: "button", Name: "Create order"},
	{Ref: "e3", Role: "button", Name: "Export to CSV"},
	{Ref: "e4", Role: "combobox", Name: "Status filter", Value: "All"},
	{Ref: "e5", Role: "combobox", Name: "Rows per page", Value: "25"},
	{Ref: "e6", Role: "columnheader", Name: "Order ID"},
	{Ref: "e7", Role: "columnheader", Name: "Customer"},
	{Ref: "e8", Role: "columnheader", Name: "Total"},
	{Ref: "e9", Role: "columnheader", Name: "Status"},
	{Ref: "e10", Role: "button", Name: "Previous page"},
	{Ref: "e11", Role: "button", Name: "Next page"},
	{Ref: "e12", Role: "button", Name: "Bulk delete"},
	{Ref: "e13", Role: "checkbox", Name: "Select all orders"},
	}

	// ---- Page 8: Confirmation Modal ----------------------------------------
	modal := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Dashboard"},
	{Ref: "e1", Role: "button", Name: "New Project"},
	{Ref: "e2", Role: "dialog", Name: "Delete Project"},
	{Ref: "e3", Role: "heading", Name: "Delete Project"},
	{Ref: "e4", Role: "text", Name: "This will permanently delete the project and all its data. This action cannot be undone."},
	{Ref: "e5", Role: "textbox", Name: "Type project name to confirm"},
	{Ref: "e6", Role: "button", Name: "Delete project"},
	{Ref: "e7", Role: "button", Name: "Cancel"},
	{Ref: "e8", Role: "button", Name: "Close"},
	{Ref: "e9", Role: "navigation", Name: "Sidebar"},
	}

	// ---- Page 9: Settings / Preferences Page --------------------------------
	settings := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Account Settings"},
	{Ref: "e1", Role: "textbox", Name: "Display name"},
	{Ref: "e2", Role: "textbox", Name: "Email address"},
	{Ref: "e3", Role: "textbox", Name: "Phone number"},
	{Ref: "e4", Role: "combobox", Name: "Language", Value: "English"},
	{Ref: "e5", Role: "combobox", Name: "Timezone", Value: "UTC-5"},
	{Ref: "e6", Role: "switch", Name: "Email notifications"},
	{Ref: "e7", Role: "switch", Name: "Push notifications"},
	{Ref: "e8", Role: "switch", Name: "Dark mode"},
	{Ref: "e9", Role: "button", Name: "Save changes"},
	{Ref: "e10", Role: "button", Name: "Cancel"},
	{Ref: "e11", Role: "button", Name: "Delete account"},
	{Ref: "e12", Role: "link", Name: "Change password"},
	}

	// ---- Page 10: Checkout / Payment Page ----------------------------------
	checkout := []ElementDescriptor{
	{Ref: "e0", Role: "heading", Name: "Checkout"},
	{Ref: "e1", Role: "textbox", Name: "Full name"},
	{Ref: "e2", Role: "textbox", Name: "Email"},
	{Ref: "e3", Role: "textbox", Name: "Shipping address"},
	{Ref: "e4", Role: "textbox", Name: "City"},
	{Ref: "e5", Role: "textbox", Name: "Postal code"},
	{Ref: "e6", Role: "combobox", Name: "Country"},
	{Ref: "e7", Role: "textbox", Name: "Card number"},
	{Ref: "e8", Role: "textbox", Name: "Expiry date"},
	{Ref: "e9", Role: "textbox", Name: "CVV"},
	{Ref: "e10", Role: "checkbox", Name: "Save card for future use"},
	{Ref: "e11", Role: "button", Name: "Place order"},
	{Ref: "e12", Role: "button", Name: "Back to cart"},
	{Ref: "e13", Role: "link", Name: "Apply coupon code"},
	}

	return []studyCase{
	// Page 1: Login — 2 queries
	{"Login Form", "sign in button", "e4", login},
	{"Login Form", "email input field", "e1", login},

	// Page 2: Registration — 2 queries
	{"Registration Form", "create account button", "e10", register},
	{"Registration Form", "confirm password field", "e5", register},

	// Page 3: E-commerce Product — 2 queries
	{"Product Page", "add to cart", "e4", product},
	{"Product Page", "product reviews tab", "e8", product},

	// Page 4: Navigation — 2 queries
	{"Navigation Header", "search box", "e7", nav},
	{"Navigation Header", "shopping cart", "e9", nav},

	// Page 5: Dashboard — 2 queries
	{"Analytics Dashboard", "export report", "e1", dashboard},
	{"Analytics Dashboard", "date range selector", "e3", dashboard},

	// Page 6: Search Results — 2 queries
	{"Search Results", "search input", "e0", search},
	{"Search Results", "next page link", "e7", search},

	// Page 7: Data Table — 2 queries
	{"Admin Data Table", "search orders", "e1", table},
	{"Admin Data Table", "export csv", "e3", table},

	// Page 8: Modal — 2 queries
	{"Confirmation Modal", "cancel button", "e7", modal},
	{"Confirmation Modal", "confirm deletion input", "e5", modal},

	// Page 9: Settings — 2 queries
	{"Settings Page", "save changes button", "e9", settings},
	{"Settings Page", "dark mode toggle", "e8", settings},

	// Page 10: Checkout — 2 queries
	{"Checkout Page", "place order button", "e11", checkout},
	{"Checkout Page", "card number field", "e7", checkout},
	}
	}

	// studyHardCases returns 10 intentionally-challenging query/element pairs
	// designed to reveal differentiation between matchers:
	//
	// Group A (query uses an abbreviation, 0 lexical word overlap):
	// expects embedding to win via character n-gram similarity.
	//
	// Group B (query uses a paraphrase / synonym):
	// expects both matchers to struggle, revealing the ceiling of
	// surface-form-only matching.
	//
	// Group C (ambiguous — multiple equally-plausible elements):
	// expects combined to win via score averaging.
	func studyHardCases() []studyCase {
	// Reuse page element sets from studyCases.
	cases := studyCases()

	// Helper: find element slice for a given page name.
	pageElems := map[string][]ElementDescriptor{}
	for _, c := range cases {
	if _, ok := pageElems[c.page]; !ok {
	pageElems[c.page] = c.elements
	}
	}

	product := pageElems["Product Page"]
	checkout := pageElems["Checkout Page"]
	login := pageElems["Login Form"]
	register := pageElems["Registration Form"]
	settings := pageElems["Settings Page"]
	table := pageElems["Admin Data Table"]
	modal := pageElems["Confirmation Modal"]

	return []studyCase{
	// ── Group A: Abbreviations (lexical = 0, embedding has n-gram overlap) ──

	// "specs" ↔ "Specifications" — shares "^sp","spe","pec","spec" (4-gram)
	{"Product Page [HARD]", "specs tab", "e9", product},

	// "qty" ↔ "Quantity" — shares "^q","ty","y$"
	{"Product Page [HARD]", "qty input", "e3", product},

	// "addr" ↔ "address" — shares "^a","ad","dd","dr"
	{"Checkout Page [HARD]", "addr field", "e3", checkout},

	// "pwd" ↔ "Password" — shares "^p","d$" (weak but present)
	{"Login Form [HARD]", "pwd textbox", "e2", login},

	// "notifs" ↔ "notifications" — shares "not","oti"
	{"Settings Page [HARD]", "toggle email notifs", "e6", settings},

	// ── Group B: Paraphrases / synonyms (no character overlap, both expected to struggle) ──

	// "download" ↔ "Export to CSV" — no shared characters
	{"Admin Data Table [HARD]", "download table data", "e3", table},

	// "proceed" ↔ "Place order" — no shared characters
	{"Checkout Page [HARD]", "proceed to payment", "e11", checkout},

	// "sign up" ↔ "Create account" — no shared characters
	{"Registration Form [HARD]", "sign up now", "e10", register},

	// "dismiss" ↔ "Cancel" — no shared characters
	{"Confirmation Modal [HARD]", "dismiss dialog", "e7", modal},

	// ── Group C: Ambiguous (multiple "button" elements, exact name helps) ──

	// "dark theme" ↔ "Dark mode" switch — "dark" word-matches exactly
	{"Settings Page [HARD]", "dark theme switch", "e8", settings},
	}
	}

	// -----------------------------------------------------------------------
	// Core evaluation logic
	// -----------------------------------------------------------------------

	func runMatcher(
	name string,
	matcher ElementMatcher,
	cases []studyCase,
	) []studyResult {
	ctx := context.Background()
	opts := FindOptions{Threshold: 0.0, TopK: 3}
	results := make([]studyResult, 0, len(cases))

	for _, c := range cases {
	start := time.Now()
	fr, err := matcher.Find(ctx, c.query, c.elements, opts)
	elapsed := time.Since(start).Nanoseconds()

	if err != nil {
	results = append(results, studyResult{
	matcherName: name,
	caseName: fmt.Sprintf("%s \| %q", c.page, c.query),
	page: c.page,
	latencyNs: elapsed,
	})
	continue
	}

	// Acc@1
	hit1 := fr.BestRef == c.expectedRef

	// Acc@3 — expected ref in any of the top-3 matches
	hit3 := hit1
	if !hit3 {
	for _, m := range fr.Matches {
	if m.Ref == c.expectedRef {
	hit3 = true
	break
	}
	}
	}

	results = append(results, studyResult{
	matcherName: name,
	caseName: fmt.Sprintf("%s \| %q", c.page, c.query),
	page: c.page,
	hit1: hit1,
	hit3: hit3,
	latencyNs: elapsed,
	bestRef: fr.BestRef,
	bestScore: fr.BestScore,
	})
	}

	return results
	}

	// -----------------------------------------------------------------------
	// Report generation helpers
	// -----------------------------------------------------------------------

	func percent(n, total int) string {
	if total == 0 {
	return " 0.0%"
	}
	return fmt.Sprintf("%5.1f%%", float64(n)/float64(total)*100)
	}

	func meanLatencyUs(results []studyResult) float64 {
	if len(results) == 0 {
	return 0
	}
	var sum int64
	for _, r := range results {
	sum += r.latencyNs
	}
	return float64(sum) / float64(len(results)) / 1000.0
	}

	func acc(results []studyResult, k int) (int, int) {
	hits := 0
	for _, r := range results {
	if k == 1 && r.hit1 {
	hits++
	}
	if k == 3 && r.hit3 {
	hits++
	}
	}
	return hits, len(results)
	}

	func printSeparator(t *testing.T, char string, width int) {
	t.Log(strings.Repeat(char, width))
	}

	// -----------------------------------------------------------------------
	// Main study test
	// -----------------------------------------------------------------------

	func TestBenchmarkStudy(t *testing.T) {
	cases := studyCases()

	matchers := []struct {
	name string
	matcher ElementMatcher
	}{
	{"Lexical", NewLexicalMatcher()},
	{"Embedding", NewEmbeddingMatcher(NewHashingEmbedder(128))},
	{"Combined", NewCombinedMatcher(NewHashingEmbedder(128))},
	}

	// Run all matchers
	allResults := make(map[string][]studyResult, len(matchers))
	for _, m := range matchers {
	allResults[m.name] = runMatcher(m.name, m.matcher, cases)
	}

	// ----------------------------------------------------------------
	// REPORT HEADER
	// ----------------------------------------------------------------
	const W = 72
	t.Log("")
	printSeparator(t, "═", W)
	t.Log(" SEMANTIC MATCHING — CONTROLLED BENCHMARK STUDY")
	t.Logf(" %d queries × %d page types \| 3 matchers \| TopK=3 \| Threshold=0",
	len(cases), 10)
	printSeparator(t, "═", W)

	// ----------------------------------------------------------------
	// OVERALL SUMMARY TABLE
	// ----------------------------------------------------------------
	t.Log("")
	t.Log(" OVERALL RESULTS")
	t.Log("")
	t.Logf(" %-12s %7s %7s %12s", "Matcher", "Acc@1", "Acc@3", "Latency (µs)")
	t.Log(" " + strings.Repeat("-", 46))

	type summaryRow struct {
	name string
	acc1 int
	acc3 int
	total int
	latency float64
	}
	rows := make([]summaryRow, 0, len(matchers))

	for _, m := range matchers {
	res := allResults[m.name]
	h1, tot := acc(res, 1)
	h3, _ := acc(res, 3)
	lat := meanLatencyUs(res)
	rows = append(rows, summaryRow{m.name, h1, h3, tot, lat})
	t.Logf(" %-12s %s %s %10.1f µs",
	m.name, percent(h1, tot), percent(h3, tot), lat)
	}

	t.Log(" " + strings.Repeat("-", 46))
	t.Log("")

	// ----------------------------------------------------------------
	// PER-PAGE BREAKDOWN
	// ----------------------------------------------------------------
	// Collect unique page names in order
	pages := make([]string, 0, 10)
	seen := map[string]bool{}
	for _, c := range cases {
	if !seen[c.page] {
	pages = append(pages, c.page)
	seen[c.page] = true
	}
	}

	printSeparator(t, "─", W)
	t.Log(" PER-PAGE BREAKDOWN (Acc@1 / Acc@3) ")
	printSeparator(t, "─", W)
	t.Log("")
	t.Logf(" %-26s %-14s %-14s %-14s", "Page", "Lexical", "Embedding", "Combined")
	t.Log(" " + strings.Repeat("-", 66))

	for _, page := range pages {
	// Filter results for this page
	cols := make([]string, 0, 3)
	for _, m := range matchers {
	var pageRes []studyResult
	for _, r := range allResults[m.name] {
	if r.page == page {
	pageRes = append(pageRes, r)
	}
	}
	h1, tot := acc(pageRes, 1)
	h3, _ := acc(pageRes, 3)
	cols = append(cols, fmt.Sprintf("%s / %s", percent(h1, tot), percent(h3, tot)))
	}
	t.Logf(" %-26s %-14s %-14s %-14s", page, cols[0], cols[1], cols[2])
	}
	t.Log("")

	// ----------------------------------------------------------------
	// DETAILED CASE-BY-CASE TABLE
	// ----------------------------------------------------------------
	printSeparator(t, "─", W)
	t.Log(" CASE-BY-CASE RESULTS (✓ = hit, ✗ = miss) ")
	printSeparator(t, "─", W)
	t.Log("")
	t.Logf(" %-6s %-28s %-8s %-8s %-8s %-8s %-8s %-8s",
	"#", "Query", "Lex@1", "Lex@3", "Emb@1", "Emb@3", "Com@1", "Com@3")
	t.Log(" " + strings.Repeat("-", 88))

	mark := func(hit bool) string {
	if hit {
	return " ✓"
	}
	return " ✗"
	}

	for i, c := range cases {
	shortQ := c.query
	if len(shortQ) > 27 {
	shortQ = shortQ[:24] + "..."
	}

	lex := allResults["Lexical"][i]
	emb := allResults["Embedding"][i]
	com := allResults["Combined"][i]

	t.Logf(" %3d. %-28s %-8s %-8s %-8s %-8s %-8s %-8s",
	i+1, shortQ,
	mark(lex.hit1), mark(lex.hit3),
	mark(emb.hit1), mark(emb.hit3),
	mark(com.hit1), mark(com.hit3),
	)
	}
	t.Log("")

	// ----------------------------------------------------------------
	// MISSED CASES ANALYSIS
	// ----------------------------------------------------------------
	printSeparator(t, "─", W)
	t.Log(" MISSED CASES ANALYSIS (Acc@1 misses)")
	printSeparator(t, "─", W)
	t.Log("")

	for _, m := range matchers {
	missCount := 0
	for _, r := range allResults[m.name] {
	if !r.hit1 {
	missCount++
	}
	}
	if missCount == 0 {
	t.Logf(" %s: perfect score — no Acc@1 misses", m.name)
	continue
	}
	t.Logf(" %s misses (%d):", m.name, missCount)
	for i, r := range allResults[m.name] {
	if r.hit1 {
	continue
	}
	t.Logf(" [%2d] %-28s expected=%-4s got=%-4s score=%.3f",
	i+1, fmt.Sprintf("%q", cases[i].query),
	cases[i].expectedRef, r.bestRef, r.bestScore)
	}
	}
	t.Log("")

	// ----------------------------------------------------------------
	// MATCHER COMPARISON: cases where they DISAGREE
	// ----------------------------------------------------------------
	printSeparator(t, "─", W)
	t.Log(" DISAGREMENT ANALYSIS (where Combined beats both)")
	printSeparator(t, "─", W)
	t.Log("")

	improvements := 0
	for i := range cases {
	lex := allResults["Lexical"][i]
	emb := allResults["Embedding"][i]
	com := allResults["Combined"][i]

	if !lex.hit1 && !emb.hit1 && com.hit1 {
	improvements++
	t.Logf(" [%2d] %q — Combined rescued (lex✗ emb✗ com✓)",
	i+1, cases[i].query)
	}
	if lex.hit1 && emb.hit1 && !com.hit1 {
	t.Logf(" [%2d] %q — Combined degraded (lex✓ emb✓ com✗) !",
	i+1, cases[i].query)
	}
	}
	if improvements == 0 {
	t.Log(" No unique rescues by Combined (or none needed)")
	}
	t.Log("")

	// ----------------------------------------------------------------
	// LATENCY COMPARISON
	// ----------------------------------------------------------------
	printSeparator(t, "─", W)
	t.Log(" LATENCY SUMMARY")
	printSeparator(t, "─", W)
	t.Log("")

	// Sort by latency ascending
	sort.Slice(rows, func(i, j int) bool {
	return rows[i].latency < rows[j].latency
	})

	baseline := rows[0].latency
	for _, row := range rows {
	overhead := ""
	if row.latency > baseline {
	overhead = fmt.Sprintf(" (+%.1fx)", row.latency/baseline)
	}
	t.Logf(" %-12s %8.2f µs%s", row.name, row.latency, overhead)
	}
	t.Log("")

	// ----------------------------------------------------------------
	// FINAL VERDICT
	// ----------------------------------------------------------------
	printSeparator(t, "═", W)
	t.Log(" VERDICT")
	printSeparator(t, "─", W)
	t.Log("")

	// Find best Acc@1
	bestAcc1Name := ""
	bestAcc1 := -1
	for _, m := range matchers {
	h1, tot := acc(allResults[m.name], 1)
	pct := h1 * 100 / tot
	if pct > bestAcc1 {
	bestAcc1 = pct
	bestAcc1Name = m.name
	}
	}

	lexH1, tot := acc(allResults["Lexical"], 1)
	embH1, _ := acc(allResults["Embedding"], 1)
	comH1, _ := acc(allResults["Combined"], 1)
	lexH3, _ := acc(allResults["Lexical"], 3)
	embH3, _ := acc(allResults["Embedding"], 3)
	comH3, _ := acc(allResults["Combined"], 3)

	t.Logf(" Best Acc@1: %s (%d/%d = %s)", bestAcc1Name, comH1, tot, percent(comH1, tot))
	t.Log("")
	t.Logf(" Acc@1 — Lexical: %s Embedding: %s Combined: %s",
	percent(lexH1, tot), percent(embH1, tot), percent(comH1, tot))
	t.Logf(" Acc@3 — Lexical: %s Embedding: %s Combined: %s",
	percent(lexH3, tot), percent(embH3, tot), percent(comH3, tot))
	t.Log("")
	t.Logf(" Mean latency — Lexical: %.1fµs Embedding: %.1fµs Combined: %.1fµs",
	meanLatencyUs(allResults["Lexical"]),
	meanLatencyUs(allResults["Embedding"]),
	meanLatencyUs(allResults["Combined"]))
	t.Log("")
	printSeparator(t, "═", W)
	t.Log("")

	// ================================================================
	// PART II — HARD CASES (abbreviations, synonyms, paraphrases)
	// ================================================================
	hardCases := studyHardCases()

	allHard := make(map[string][]studyResult, len(matchers))
	for _, m := range matchers {
	allHard[m.name] = runMatcher(m.name, m.matcher, hardCases)
	}

	t.Log("")
	printSeparator(t, "═", W)
	t.Log(" PART II — HARD CASES (abbreviations, synonyms, paraphrases)")
	t.Logf(" %d queries \| 3 matcher groups: A=abbrev B=synonym C=ambiguous",
	len(hardCases))
	printSeparator(t, "═", W)

	// Overall hard summary
	t.Log("")
	t.Log(" HARD OVERALL")
	t.Log("")
	t.Logf(" %-12s %7s %7s %12s", "Matcher", "Acc@1", "Acc@3", "Latency (µs)")
	t.Log(" " + strings.Repeat("-", 46))
	for _, m := range matchers {
	res := allHard[m.name]
	h1, tot := acc(res, 1)
	h3, _ := acc(res, 3)
	lat := meanLatencyUs(res)
	t.Logf(" %-12s %s %s %10.1f µs",
	m.name, percent(h1, tot), percent(h3, tot), lat)
	}
	t.Log(" " + strings.Repeat("-", 46))
	t.Log("")

	// Case-by-case hard results
	printSeparator(t, "─", W)
	t.Log(" HARD CASE-BY-CASE (✓ = hit, ✗ = miss)")
	printSeparator(t, "─", W)
	t.Log("")
	t.Logf(" %-3s %-3s %-30s %-8s %-8s %-8s %-8s %-8s %-8s",
	"#", "Grp", "Query", "Lex@1", "Lex@3", "Emb@1", "Emb@3", "Com@1", "Com@3")
	t.Log(" " + strings.Repeat("-", 90))

	groups := []string{"A", "A", "A", "A", "A", "B", "B", "B", "B", "C"}
	for i, c := range hardCases {
	grp := groups[i]
	shortQ := c.query
	if len(shortQ) > 29 {
	shortQ = shortQ[:26] + "..."
	}
	lex := allHard["Lexical"][i]
	emb := allHard["Embedding"][i]
	com := allHard["Combined"][i]

	t.Logf(" %3d %-3s %-30s %-8s %-8s %-8s %-8s %-8s %-8s",
	i+1, grp, shortQ,
	mark(lex.hit1), mark(lex.hit3),
	mark(emb.hit1), mark(emb.hit3),
	mark(com.hit1), mark(com.hit3),
	)
	}
	t.Log("")

	// Per-group analysis
	printSeparator(t, "─", W)
	t.Log(" GROUP ANALYSIS")
	printSeparator(t, "─", W)
	t.Log("")
	t.Logf(" %-20s %-14s %-14s %-14s", "Group", "Lexical Acc@1", "Embedding Acc@1", "Combined Acc@1")
	t.Log(" " + strings.Repeat("-", 66))

	groupDefs := []struct {
	label string
	ids []int // 0-indexed
	desc string
	}{
	{"A (abbreviations)", []int{0, 1, 2, 3, 4}, "expected: Emb ≥ Lex"},
	{"B (synonyms)", []int{5, 6, 7, 8}, "expected: both struggle"},
	{"C (ambiguous)", []int{9}, "expected: Combined wins"},
	}

	for _, gd := range groupDefs {
	for _, mname := range []string{"Lexical", "Embedding", "Combined"} {
	var subset []studyResult
	for _, idx := range gd.ids {
	subset = append(subset, allHard[mname][idx])
	}
	_ = subset
	}
	lexSub := filterByIdx(allHard["Lexical"], gd.ids)
	embSub := filterByIdx(allHard["Embedding"], gd.ids)
	comSub := filterByIdx(allHard["Combined"], gd.ids)
	lh1, lt := acc(lexSub, 1)
	eh1, et := acc(embSub, 1)
	ch1, ct := acc(comSub, 1)
	t.Logf(" %-20s %-14s %-15s %-14s %s",
	gd.label,
	percent(lh1, lt), percent(eh1, et), percent(ch1, ct),
	gd.desc)
	}
	t.Log("")

	// Combined vs Individual: misses that Combined rescues
	printSeparator(t, "─", W)
	t.Log(" COMBINED RESCUES (in hard cases)")
	printSeparator(t, "─", W)
	t.Log("")
	rescueCount := 0
	for i := range hardCases {
	lex := allHard["Lexical"][i]
	emb := allHard["Embedding"][i]
	com := allHard["Combined"][i]
	if !lex.hit1 && !emb.hit1 && com.hit1 {
	rescueCount++
	t.Logf(" RESCUE [%d] %q — Lex✗ Emb✗ Com✓", i+1, hardCases[i].query)
	}
	if !lex.hit1 && emb.hit1 && com.hit1 {
	t.Logf(" EMB WIN [%d] %q — Lex✗ Emb✓ Com✓", i+1, hardCases[i].query)
	}
	if lex.hit1 && !emb.hit1 && com.hit1 {
	t.Logf(" LEX WIN [%d] %q — Lex✓ Emb✗ Com✓", i+1, hardCases[i].query)
	}
	if lex.hit1 && !emb.hit1 && !com.hit1 {
	t.Logf(" COM FAIL[%d] %q — Lex✓ Emb✗ Com✗ ← embedding dragged score down", i+1, hardCases[i].query)
	}
	}
	if rescueCount == 0 {
	t.Log(" No unique Combined rescues in hard cases")
	}
	t.Log("")

	// Score comparison for hard cases
	printSeparator(t, "─", W)
	t.Log(" SCORE DETAIL (best score returned by each matcher per hard case)")
	printSeparator(t, "─", W)
	t.Log("")
	t.Logf(" %-3s %-30s %-6s %-8s %-8s %-8s %-8s",
	"#", "Query", "Want", "Lex score", "Emb score", "Com score", "Winner")
	t.Log(" " + strings.Repeat("-", 78))
	for i, c := range hardCases {
	lex := allHard["Lexical"][i]
	emb := allHard["Embedding"][i]
	com := allHard["Combined"][i]
	shortQ := c.query
	if len(shortQ) > 29 {
	shortQ = shortQ[:26] + "..."
	}
	var winner string
	if !lex.hit1 && emb.hit1 {
	winner = "Embedding"
	} else if lex.hit1 && !emb.hit1 {
	winner = "Lexical"
	} else if lex.hit1 && emb.hit1 {
	winner = "both"
	} else {
	winner = "none"
	}
	t.Logf(" %3d %-30s %-6s %9.3f %9.3f %9.3f %s",
	i+1, shortQ, c.expectedRef,
	lex.bestScore, emb.bestScore, com.bestScore, winner)
	}
	t.Log("")

	// Final combined verdict
	printSeparator(t, "═", W)
	t.Log(" FINAL VERDICT — EASY + HARD COMBINED")
	printSeparator(t, "─", W)
	t.Log("")

	allTotal := len(cases) + len(hardCases)
	for _, m := range matchers {
	combined := append(allResults[m.name], allHard[m.name]...)
	h1, _ := acc(combined, 1)
	h3, _ := acc(combined, 3)
	lat := meanLatencyUs(combined)
	t.Logf(" %-12s Acc@1=%s Acc@3=%s µs=%.1f (total %d queries)",
	m.name, percent(h1, allTotal), percent(h3, allTotal), lat, allTotal)
	}
	t.Log("")
	printSeparator(t, "═", W)
	t.Log("")
	}

	// filterByIdx returns results at specified 0-based indices.
	func filterByIdx(results []studyResult, indices []int) []studyResult {
	out := make([]studyResult, 0, len(indices))
	for _, i := range indices {
	if i < len(results) {
	out = append(out, results[i])
	}
	}
	return out
	}