package semantic // benchmark_study_test.go — Controlled benchmark study // // 20 queries × 10 page types comparing: // • LexicalMatcher (Jaccard + stopwords + role boost) // • EmbeddingMatcher (128-dim HashingEmbedder + cosine similarity) // • CombinedMatcher (0.6 lexical + 0.4 embedding) // // Metrics reported: // • Acc@1 — correct element is the top-ranked result // • Acc@3 — correct element appears in top-3 results // • Mean Latency (µs) per matcher // // Run: // go test ./internal/semantic/ -run TestBenchmarkStudy -v // // Or with benchmark timing detail: // go test ./internal/semantic/ -run TestBenchmarkStudy -v -count 5 import ( "context" "fmt" "sort" "strings" "testing" "time" ) // ----------------------------------------------------------------------- // Data structures // ----------------------------------------------------------------------- // studyCase is a single (page, query, expected-ref) triple. type studyCase struct { page string // human-readable page name query string // natural language query expectedRef string // ref of the ground-truth element elements []ElementDescriptor } // studyResult records one matcher's answer for one case. type studyResult struct { matcherName string caseName string page string hit1 bool // Acc@1: best ref == expected hit3 bool // Acc@3: expected ref in top-3 latencyNs int64 bestRef string bestScore float64 } // ----------------------------------------------------------------------- // Ground-truth page element sets (10 pages × 2 queries each = 20 cases) // ----------------------------------------------------------------------- func studyCases() []studyCase { // ---- Page 1: Login Form ------------------------------------------------ login := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Sign In"}, {Ref: "e1", Role: "textbox", Name: "Email address"}, {Ref: "e2", Role: "textbox", Name: "Password"}, {Ref: "e3", Role: "checkbox", Name: "Remember me"}, {Ref: "e4", Role: "button", Name: "Sign In"}, {Ref: "e5", Role: "link", Name: "Forgot your password?"}, {Ref: "e6", Role: "link", Name: "Create account"}, {Ref: "e7", Role: "button", Name: "Continue with Google"}, {Ref: "e8", Role: "button", Name: "Continue with Apple"}, {Ref: "e9", Role: "img", Name: "Company logo"}, } // ---- Page 2: Registration Form ----------------------------------------- register := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Create your account"}, {Ref: "e1", Role: "textbox", Name: "First name"}, {Ref: "e2", Role: "textbox", Name: "Last name"}, {Ref: "e3", Role: "textbox", Name: "Email"}, {Ref: "e4", Role: "textbox", Name: "Password"}, {Ref: "e5", Role: "textbox", Name: "Confirm password"}, {Ref: "e6", Role: "combobox", Name: "Date of birth"}, {Ref: "e7", Role: "combobox", Name: "Country or region"}, {Ref: "e8", Role: "checkbox", Name: "I agree to the Terms and Conditions"}, {Ref: "e9", Role: "checkbox", Name: "Subscribe to marketing emails"}, {Ref: "e10", Role: "button", Name: "Create account"}, {Ref: "e11", Role: "link", Name: "Already have an account? Log in"}, } // ---- Page 3: E-commerce Product Page ----------------------------------- product := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Wireless Noise-Cancelling Headphones"}, {Ref: "e1", Role: "text", Name: "$299.99"}, {Ref: "e2", Role: "combobox", Name: "Color", Value: "Midnight Black"}, {Ref: "e3", Role: "spinbutton", Name: "Quantity", Value: "1"}, {Ref: "e4", Role: "button", Name: "Add to cart"}, {Ref: "e5", Role: "button", Name: "Buy now"}, {Ref: "e6", Role: "button", Name: "Add to wishlist"}, {Ref: "e7", Role: "tab", Name: "Description"}, {Ref: "e8", Role: "tab", Name: "Reviews"}, {Ref: "e9", Role: "tab", Name: "Specifications"}, {Ref: "e10", Role: "img", Name: "Product image front view"}, {Ref: "e11", Role: "text", Name: "Free shipping on orders over $50"}, } // ---- Page 4: Navigation Header ----------------------------------------- nav := []ElementDescriptor{ {Ref: "e0", Role: "img", Name: "Site logo"}, {Ref: "e1", Role: "link", Name: "Home"}, {Ref: "e2", Role: "link", Name: "Products"}, {Ref: "e3", Role: "link", Name: "Pricing"}, {Ref: "e4", Role: "link", Name: "Blog"}, {Ref: "e5", Role: "link", Name: "About Us"}, {Ref: "e6", Role: "link", Name: "Contact"}, {Ref: "e7", Role: "search", Name: "Search"}, {Ref: "e8", Role: "button", Name: "Search"}, {Ref: "e9", Role: "button", Name: "Open cart"}, {Ref: "e10", Role: "link", Name: "Sign in"}, {Ref: "e11", Role: "button", Name: "Open navigation menu"}, } // ---- Page 5: Analytics Dashboard --------------------------------------- dashboard := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Dashboard Overview"}, {Ref: "e1", Role: "button", Name: "Export Report"}, {Ref: "e2", Role: "button", Name: "Add Widget"}, {Ref: "e3", Role: "combobox", Name: "Date range", Value: "Last 30 days"}, {Ref: "e4", Role: "text", Name: "Total Revenue", Value: "$128,450"}, {Ref: "e5", Role: "text", Name: "Active Users", Value: "8,302"}, {Ref: "e6", Role: "text", Name: "Conversion Rate", Value: "3.4%"}, {Ref: "e7", Role: "text", Name: "Avg Session Duration", Value: "4m 12s"}, {Ref: "e8", Role: "button", Name: "Refresh Data"}, {Ref: "e9", Role: "link", Name: "View detailed report"}, {Ref: "e10", Role: "tab", Name: "Overview"}, {Ref: "e11", Role: "tab", Name: "Revenue"}, {Ref: "e12", Role: "tab", Name: "Users"}, {Ref: "e13", Role: "button", Name: "Notifications"}, } // ---- Page 6: Search Results Page --------------------------------------- search := []ElementDescriptor{ {Ref: "e0", Role: "search", Name: "Search"}, {Ref: "e1", Role: "button", Name: "Search"}, {Ref: "e2", Role: "heading", Name: "Search Results for \"golang\""}, {Ref: "e3", Role: "combobox", Name: "Sort by", Value: "Relevance"}, {Ref: "e4", Role: "checkbox", Name: "Filter: Last 24 hours"}, {Ref: "e5", Role: "checkbox", Name: "Filter: Images"}, {Ref: "e6", Role: "checkbox", Name: "Filter: Videos"}, {Ref: "e7", Role: "link", Name: "Next page"}, {Ref: "e8", Role: "link", Name: "Previous page"}, {Ref: "e9", Role: "button", Name: "Clear filters"}, {Ref: "e10", Role: "text", Name: "About 4,230,000 results"}, } // ---- Page 7: Admin Data Table ------------------------------------------ table := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Order Management"}, {Ref: "e1", Role: "search", Name: "Search orders"}, {Ref: "e2", Role: "button", Name: "Create order"}, {Ref: "e3", Role: "button", Name: "Export to CSV"}, {Ref: "e4", Role: "combobox", Name: "Status filter", Value: "All"}, {Ref: "e5", Role: "combobox", Name: "Rows per page", Value: "25"}, {Ref: "e6", Role: "columnheader", Name: "Order ID"}, {Ref: "e7", Role: "columnheader", Name: "Customer"}, {Ref: "e8", Role: "columnheader", Name: "Total"}, {Ref: "e9", Role: "columnheader", Name: "Status"}, {Ref: "e10", Role: "button", Name: "Previous page"}, {Ref: "e11", Role: "button", Name: "Next page"}, {Ref: "e12", Role: "button", Name: "Bulk delete"}, {Ref: "e13", Role: "checkbox", Name: "Select all orders"}, } // ---- Page 8: Confirmation Modal ---------------------------------------- modal := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Dashboard"}, {Ref: "e1", Role: "button", Name: "New Project"}, {Ref: "e2", Role: "dialog", Name: "Delete Project"}, {Ref: "e3", Role: "heading", Name: "Delete Project"}, {Ref: "e4", Role: "text", Name: "This will permanently delete the project and all its data. This action cannot be undone."}, {Ref: "e5", Role: "textbox", Name: "Type project name to confirm"}, {Ref: "e6", Role: "button", Name: "Delete project"}, {Ref: "e7", Role: "button", Name: "Cancel"}, {Ref: "e8", Role: "button", Name: "Close"}, {Ref: "e9", Role: "navigation", Name: "Sidebar"}, } // ---- Page 9: Settings / Preferences Page -------------------------------- settings := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Account Settings"}, {Ref: "e1", Role: "textbox", Name: "Display name"}, {Ref: "e2", Role: "textbox", Name: "Email address"}, {Ref: "e3", Role: "textbox", Name: "Phone number"}, {Ref: "e4", Role: "combobox", Name: "Language", Value: "English"}, {Ref: "e5", Role: "combobox", Name: "Timezone", Value: "UTC-5"}, {Ref: "e6", Role: "switch", Name: "Email notifications"}, {Ref: "e7", Role: "switch", Name: "Push notifications"}, {Ref: "e8", Role: "switch", Name: "Dark mode"}, {Ref: "e9", Role: "button", Name: "Save changes"}, {Ref: "e10", Role: "button", Name: "Cancel"}, {Ref: "e11", Role: "button", Name: "Delete account"}, {Ref: "e12", Role: "link", Name: "Change password"}, } // ---- Page 10: Checkout / Payment Page ---------------------------------- checkout := []ElementDescriptor{ {Ref: "e0", Role: "heading", Name: "Checkout"}, {Ref: "e1", Role: "textbox", Name: "Full name"}, {Ref: "e2", Role: "textbox", Name: "Email"}, {Ref: "e3", Role: "textbox", Name: "Shipping address"}, {Ref: "e4", Role: "textbox", Name: "City"}, {Ref: "e5", Role: "textbox", Name: "Postal code"}, {Ref: "e6", Role: "combobox", Name: "Country"}, {Ref: "e7", Role: "textbox", Name: "Card number"}, {Ref: "e8", Role: "textbox", Name: "Expiry date"}, {Ref: "e9", Role: "textbox", Name: "CVV"}, {Ref: "e10", Role: "checkbox", Name: "Save card for future use"}, {Ref: "e11", Role: "button", Name: "Place order"}, {Ref: "e12", Role: "button", Name: "Back to cart"}, {Ref: "e13", Role: "link", Name: "Apply coupon code"}, } return []studyCase{ // Page 1: Login — 2 queries {"Login Form", "sign in button", "e4", login}, {"Login Form", "email input field", "e1", login}, // Page 2: Registration — 2 queries {"Registration Form", "create account button", "e10", register}, {"Registration Form", "confirm password field", "e5", register}, // Page 3: E-commerce Product — 2 queries {"Product Page", "add to cart", "e4", product}, {"Product Page", "product reviews tab", "e8", product}, // Page 4: Navigation — 2 queries {"Navigation Header", "search box", "e7", nav}, {"Navigation Header", "shopping cart", "e9", nav}, // Page 5: Dashboard — 2 queries {"Analytics Dashboard", "export report", "e1", dashboard}, {"Analytics Dashboard", "date range selector", "e3", dashboard}, // Page 6: Search Results — 2 queries {"Search Results", "search input", "e0", search}, {"Search Results", "next page link", "e7", search}, // Page 7: Data Table — 2 queries {"Admin Data Table", "search orders", "e1", table}, {"Admin Data Table", "export csv", "e3", table}, // Page 8: Modal — 2 queries {"Confirmation Modal", "cancel button", "e7", modal}, {"Confirmation Modal", "confirm deletion input", "e5", modal}, // Page 9: Settings — 2 queries {"Settings Page", "save changes button", "e9", settings}, {"Settings Page", "dark mode toggle", "e8", settings}, // Page 10: Checkout — 2 queries {"Checkout Page", "place order button", "e11", checkout}, {"Checkout Page", "card number field", "e7", checkout}, } } // studyHardCases returns 10 intentionally-challenging query/element pairs // designed to reveal differentiation between matchers: // // Group A (query uses an abbreviation, 0 lexical word overlap): // expects embedding to win via character n-gram similarity. // // Group B (query uses a paraphrase / synonym): // expects both matchers to struggle, revealing the ceiling of // surface-form-only matching. // // Group C (ambiguous — multiple equally-plausible elements): // expects combined to win via score averaging. func studyHardCases() []studyCase { // Reuse page element sets from studyCases. cases := studyCases() // Helper: find element slice for a given page name. pageElems := map[string][]ElementDescriptor{} for _, c := range cases { if _, ok := pageElems[c.page]; !ok { pageElems[c.page] = c.elements } } product := pageElems["Product Page"] checkout := pageElems["Checkout Page"] login := pageElems["Login Form"] register := pageElems["Registration Form"] settings := pageElems["Settings Page"] table := pageElems["Admin Data Table"] modal := pageElems["Confirmation Modal"] return []studyCase{ // ── Group A: Abbreviations (lexical = 0, embedding has n-gram overlap) ── // "specs" ↔ "Specifications" — shares "^sp","spe","pec","spec" (4-gram) {"Product Page [HARD]", "specs tab", "e9", product}, // "qty" ↔ "Quantity" — shares "^q","ty","y$" {"Product Page [HARD]", "qty input", "e3", product}, // "addr" ↔ "address" — shares "^a","ad","dd","dr" {"Checkout Page [HARD]", "addr field", "e3", checkout}, // "pwd" ↔ "Password" — shares "^p","d$" (weak but present) {"Login Form [HARD]", "pwd textbox", "e2", login}, // "notifs" ↔ "notifications" — shares "not","oti" {"Settings Page [HARD]", "toggle email notifs", "e6", settings}, // ── Group B: Paraphrases / synonyms (no character overlap, both expected to struggle) ── // "download" ↔ "Export to CSV" — no shared characters {"Admin Data Table [HARD]", "download table data", "e3", table}, // "proceed" ↔ "Place order" — no shared characters {"Checkout Page [HARD]", "proceed to payment", "e11", checkout}, // "sign up" ↔ "Create account" — no shared characters {"Registration Form [HARD]", "sign up now", "e10", register}, // "dismiss" ↔ "Cancel" — no shared characters {"Confirmation Modal [HARD]", "dismiss dialog", "e7", modal}, // ── Group C: Ambiguous (multiple "button" elements, exact name helps) ── // "dark theme" ↔ "Dark mode" switch — "dark" word-matches exactly {"Settings Page [HARD]", "dark theme switch", "e8", settings}, } } // ----------------------------------------------------------------------- // Core evaluation logic // ----------------------------------------------------------------------- func runMatcher( name string, matcher ElementMatcher, cases []studyCase, ) []studyResult { ctx := context.Background() opts := FindOptions{Threshold: 0.0, TopK: 3} results := make([]studyResult, 0, len(cases)) for _, c := range cases { start := time.Now() fr, err := matcher.Find(ctx, c.query, c.elements, opts) elapsed := time.Since(start).Nanoseconds() if err != nil { results = append(results, studyResult{ matcherName: name, caseName: fmt.Sprintf("%s | %q", c.page, c.query), page: c.page, latencyNs: elapsed, }) continue } // Acc@1 hit1 := fr.BestRef == c.expectedRef // Acc@3 — expected ref in any of the top-3 matches hit3 := hit1 if !hit3 { for _, m := range fr.Matches { if m.Ref == c.expectedRef { hit3 = true break } } } results = append(results, studyResult{ matcherName: name, caseName: fmt.Sprintf("%s | %q", c.page, c.query), page: c.page, hit1: hit1, hit3: hit3, latencyNs: elapsed, bestRef: fr.BestRef, bestScore: fr.BestScore, }) } return results } // ----------------------------------------------------------------------- // Report generation helpers // ----------------------------------------------------------------------- func percent(n, total int) string { if total == 0 { return " 0.0%" } return fmt.Sprintf("%5.1f%%", float64(n)/float64(total)*100) } func meanLatencyUs(results []studyResult) float64 { if len(results) == 0 { return 0 } var sum int64 for _, r := range results { sum += r.latencyNs } return float64(sum) / float64(len(results)) / 1000.0 } func acc(results []studyResult, k int) (int, int) { hits := 0 for _, r := range results { if k == 1 && r.hit1 { hits++ } if k == 3 && r.hit3 { hits++ } } return hits, len(results) } func printSeparator(t *testing.T, char string, width int) { t.Log(strings.Repeat(char, width)) } // ----------------------------------------------------------------------- // Main study test // ----------------------------------------------------------------------- func TestBenchmarkStudy(t *testing.T) { cases := studyCases() matchers := []struct { name string matcher ElementMatcher }{ {"Lexical", NewLexicalMatcher()}, {"Embedding", NewEmbeddingMatcher(NewHashingEmbedder(128))}, {"Combined", NewCombinedMatcher(NewHashingEmbedder(128))}, } // Run all matchers allResults := make(map[string][]studyResult, len(matchers)) for _, m := range matchers { allResults[m.name] = runMatcher(m.name, m.matcher, cases) } // ---------------------------------------------------------------- // REPORT HEADER // ---------------------------------------------------------------- const W = 72 t.Log("") printSeparator(t, "═", W) t.Log(" SEMANTIC MATCHING — CONTROLLED BENCHMARK STUDY") t.Logf(" %d queries × %d page types | 3 matchers | TopK=3 | Threshold=0", len(cases), 10) printSeparator(t, "═", W) // ---------------------------------------------------------------- // OVERALL SUMMARY TABLE // ---------------------------------------------------------------- t.Log("") t.Log(" OVERALL RESULTS") t.Log("") t.Logf(" %-12s %7s %7s %12s", "Matcher", "Acc@1", "Acc@3", "Latency (µs)") t.Log(" " + strings.Repeat("-", 46)) type summaryRow struct { name string acc1 int acc3 int total int latency float64 } rows := make([]summaryRow, 0, len(matchers)) for _, m := range matchers { res := allResults[m.name] h1, tot := acc(res, 1) h3, _ := acc(res, 3) lat := meanLatencyUs(res) rows = append(rows, summaryRow{m.name, h1, h3, tot, lat}) t.Logf(" %-12s %s %s %10.1f µs", m.name, percent(h1, tot), percent(h3, tot), lat) } t.Log(" " + strings.Repeat("-", 46)) t.Log("") // ---------------------------------------------------------------- // PER-PAGE BREAKDOWN // ---------------------------------------------------------------- // Collect unique page names in order pages := make([]string, 0, 10) seen := map[string]bool{} for _, c := range cases { if !seen[c.page] { pages = append(pages, c.page) seen[c.page] = true } } printSeparator(t, "─", W) t.Log(" PER-PAGE BREAKDOWN (Acc@1 / Acc@3) ") printSeparator(t, "─", W) t.Log("") t.Logf(" %-26s %-14s %-14s %-14s", "Page", "Lexical", "Embedding", "Combined") t.Log(" " + strings.Repeat("-", 66)) for _, page := range pages { // Filter results for this page cols := make([]string, 0, 3) for _, m := range matchers { var pageRes []studyResult for _, r := range allResults[m.name] { if r.page == page { pageRes = append(pageRes, r) } } h1, tot := acc(pageRes, 1) h3, _ := acc(pageRes, 3) cols = append(cols, fmt.Sprintf("%s / %s", percent(h1, tot), percent(h3, tot))) } t.Logf(" %-26s %-14s %-14s %-14s", page, cols[0], cols[1], cols[2]) } t.Log("") // ---------------------------------------------------------------- // DETAILED CASE-BY-CASE TABLE // ---------------------------------------------------------------- printSeparator(t, "─", W) t.Log(" CASE-BY-CASE RESULTS (✓ = hit, ✗ = miss) ") printSeparator(t, "─", W) t.Log("") t.Logf(" %-6s %-28s %-8s %-8s %-8s %-8s %-8s %-8s", "#", "Query", "Lex@1", "Lex@3", "Emb@1", "Emb@3", "Com@1", "Com@3") t.Log(" " + strings.Repeat("-", 88)) mark := func(hit bool) string { if hit { return " ✓" } return " ✗" } for i, c := range cases { shortQ := c.query if len(shortQ) > 27 { shortQ = shortQ[:24] + "..." } lex := allResults["Lexical"][i] emb := allResults["Embedding"][i] com := allResults["Combined"][i] t.Logf(" %3d. %-28s %-8s %-8s %-8s %-8s %-8s %-8s", i+1, shortQ, mark(lex.hit1), mark(lex.hit3), mark(emb.hit1), mark(emb.hit3), mark(com.hit1), mark(com.hit3), ) } t.Log("") // ---------------------------------------------------------------- // MISSED CASES ANALYSIS // ---------------------------------------------------------------- printSeparator(t, "─", W) t.Log(" MISSED CASES ANALYSIS (Acc@1 misses)") printSeparator(t, "─", W) t.Log("") for _, m := range matchers { missCount := 0 for _, r := range allResults[m.name] { if !r.hit1 { missCount++ } } if missCount == 0 { t.Logf(" %s: perfect score — no Acc@1 misses", m.name) continue } t.Logf(" %s misses (%d):", m.name, missCount) for i, r := range allResults[m.name] { if r.hit1 { continue } t.Logf(" [%2d] %-28s expected=%-4s got=%-4s score=%.3f", i+1, fmt.Sprintf("%q", cases[i].query), cases[i].expectedRef, r.bestRef, r.bestScore) } } t.Log("") // ---------------------------------------------------------------- // MATCHER COMPARISON: cases where they DISAGREE // ---------------------------------------------------------------- printSeparator(t, "─", W) t.Log(" DISAGREMENT ANALYSIS (where Combined beats both)") printSeparator(t, "─", W) t.Log("") improvements := 0 for i := range cases { lex := allResults["Lexical"][i] emb := allResults["Embedding"][i] com := allResults["Combined"][i] if !lex.hit1 && !emb.hit1 && com.hit1 { improvements++ t.Logf(" [%2d] %q — Combined rescued (lex✗ emb✗ com✓)", i+1, cases[i].query) } if lex.hit1 && emb.hit1 && !com.hit1 { t.Logf(" [%2d] %q — Combined degraded (lex✓ emb✓ com✗) !", i+1, cases[i].query) } } if improvements == 0 { t.Log(" No unique rescues by Combined (or none needed)") } t.Log("") // ---------------------------------------------------------------- // LATENCY COMPARISON // ---------------------------------------------------------------- printSeparator(t, "─", W) t.Log(" LATENCY SUMMARY") printSeparator(t, "─", W) t.Log("") // Sort by latency ascending sort.Slice(rows, func(i, j int) bool { return rows[i].latency < rows[j].latency }) baseline := rows[0].latency for _, row := range rows { overhead := "" if row.latency > baseline { overhead = fmt.Sprintf(" (+%.1fx)", row.latency/baseline) } t.Logf(" %-12s %8.2f µs%s", row.name, row.latency, overhead) } t.Log("") // ---------------------------------------------------------------- // FINAL VERDICT // ---------------------------------------------------------------- printSeparator(t, "═", W) t.Log(" VERDICT") printSeparator(t, "─", W) t.Log("") // Find best Acc@1 bestAcc1Name := "" bestAcc1 := -1 for _, m := range matchers { h1, tot := acc(allResults[m.name], 1) pct := h1 * 100 / tot if pct > bestAcc1 { bestAcc1 = pct bestAcc1Name = m.name } } lexH1, tot := acc(allResults["Lexical"], 1) embH1, _ := acc(allResults["Embedding"], 1) comH1, _ := acc(allResults["Combined"], 1) lexH3, _ := acc(allResults["Lexical"], 3) embH3, _ := acc(allResults["Embedding"], 3) comH3, _ := acc(allResults["Combined"], 3) t.Logf(" Best Acc@1: %s (%d/%d = %s)", bestAcc1Name, comH1, tot, percent(comH1, tot)) t.Log("") t.Logf(" Acc@1 — Lexical: %s Embedding: %s Combined: %s", percent(lexH1, tot), percent(embH1, tot), percent(comH1, tot)) t.Logf(" Acc@3 — Lexical: %s Embedding: %s Combined: %s", percent(lexH3, tot), percent(embH3, tot), percent(comH3, tot)) t.Log("") t.Logf(" Mean latency — Lexical: %.1fµs Embedding: %.1fµs Combined: %.1fµs", meanLatencyUs(allResults["Lexical"]), meanLatencyUs(allResults["Embedding"]), meanLatencyUs(allResults["Combined"])) t.Log("") printSeparator(t, "═", W) t.Log("") // ================================================================ // PART II — HARD CASES (abbreviations, synonyms, paraphrases) // ================================================================ hardCases := studyHardCases() allHard := make(map[string][]studyResult, len(matchers)) for _, m := range matchers { allHard[m.name] = runMatcher(m.name, m.matcher, hardCases) } t.Log("") printSeparator(t, "═", W) t.Log(" PART II — HARD CASES (abbreviations, synonyms, paraphrases)") t.Logf(" %d queries | 3 matcher groups: A=abbrev B=synonym C=ambiguous", len(hardCases)) printSeparator(t, "═", W) // Overall hard summary t.Log("") t.Log(" HARD OVERALL") t.Log("") t.Logf(" %-12s %7s %7s %12s", "Matcher", "Acc@1", "Acc@3", "Latency (µs)") t.Log(" " + strings.Repeat("-", 46)) for _, m := range matchers { res := allHard[m.name] h1, tot := acc(res, 1) h3, _ := acc(res, 3) lat := meanLatencyUs(res) t.Logf(" %-12s %s %s %10.1f µs", m.name, percent(h1, tot), percent(h3, tot), lat) } t.Log(" " + strings.Repeat("-", 46)) t.Log("") // Case-by-case hard results printSeparator(t, "─", W) t.Log(" HARD CASE-BY-CASE (✓ = hit, ✗ = miss)") printSeparator(t, "─", W) t.Log("") t.Logf(" %-3s %-3s %-30s %-8s %-8s %-8s %-8s %-8s %-8s", "#", "Grp", "Query", "Lex@1", "Lex@3", "Emb@1", "Emb@3", "Com@1", "Com@3") t.Log(" " + strings.Repeat("-", 90)) groups := []string{"A", "A", "A", "A", "A", "B", "B", "B", "B", "C"} for i, c := range hardCases { grp := groups[i] shortQ := c.query if len(shortQ) > 29 { shortQ = shortQ[:26] + "..." } lex := allHard["Lexical"][i] emb := allHard["Embedding"][i] com := allHard["Combined"][i] t.Logf(" %3d %-3s %-30s %-8s %-8s %-8s %-8s %-8s %-8s", i+1, grp, shortQ, mark(lex.hit1), mark(lex.hit3), mark(emb.hit1), mark(emb.hit3), mark(com.hit1), mark(com.hit3), ) } t.Log("") // Per-group analysis printSeparator(t, "─", W) t.Log(" GROUP ANALYSIS") printSeparator(t, "─", W) t.Log("") t.Logf(" %-20s %-14s %-14s %-14s", "Group", "Lexical Acc@1", "Embedding Acc@1", "Combined Acc@1") t.Log(" " + strings.Repeat("-", 66)) groupDefs := []struct { label string ids []int // 0-indexed desc string }{ {"A (abbreviations)", []int{0, 1, 2, 3, 4}, "expected: Emb ≥ Lex"}, {"B (synonyms)", []int{5, 6, 7, 8}, "expected: both struggle"}, {"C (ambiguous)", []int{9}, "expected: Combined wins"}, } for _, gd := range groupDefs { for _, mname := range []string{"Lexical", "Embedding", "Combined"} { var subset []studyResult for _, idx := range gd.ids { subset = append(subset, allHard[mname][idx]) } _ = subset } lexSub := filterByIdx(allHard["Lexical"], gd.ids) embSub := filterByIdx(allHard["Embedding"], gd.ids) comSub := filterByIdx(allHard["Combined"], gd.ids) lh1, lt := acc(lexSub, 1) eh1, et := acc(embSub, 1) ch1, ct := acc(comSub, 1) t.Logf(" %-20s %-14s %-15s %-14s %s", gd.label, percent(lh1, lt), percent(eh1, et), percent(ch1, ct), gd.desc) } t.Log("") // Combined vs Individual: misses that Combined rescues printSeparator(t, "─", W) t.Log(" COMBINED RESCUES (in hard cases)") printSeparator(t, "─", W) t.Log("") rescueCount := 0 for i := range hardCases { lex := allHard["Lexical"][i] emb := allHard["Embedding"][i] com := allHard["Combined"][i] if !lex.hit1 && !emb.hit1 && com.hit1 { rescueCount++ t.Logf(" RESCUE [%d] %q — Lex✗ Emb✗ Com✓", i+1, hardCases[i].query) } if !lex.hit1 && emb.hit1 && com.hit1 { t.Logf(" EMB WIN [%d] %q — Lex✗ Emb✓ Com✓", i+1, hardCases[i].query) } if lex.hit1 && !emb.hit1 && com.hit1 { t.Logf(" LEX WIN [%d] %q — Lex✓ Emb✗ Com✓", i+1, hardCases[i].query) } if lex.hit1 && !emb.hit1 && !com.hit1 { t.Logf(" COM FAIL[%d] %q — Lex✓ Emb✗ Com✗ ← embedding dragged score down", i+1, hardCases[i].query) } } if rescueCount == 0 { t.Log(" No unique Combined rescues in hard cases") } t.Log("") // Score comparison for hard cases printSeparator(t, "─", W) t.Log(" SCORE DETAIL (best score returned by each matcher per hard case)") printSeparator(t, "─", W) t.Log("") t.Logf(" %-3s %-30s %-6s %-8s %-8s %-8s %-8s", "#", "Query", "Want", "Lex score", "Emb score", "Com score", "Winner") t.Log(" " + strings.Repeat("-", 78)) for i, c := range hardCases { lex := allHard["Lexical"][i] emb := allHard["Embedding"][i] com := allHard["Combined"][i] shortQ := c.query if len(shortQ) > 29 { shortQ = shortQ[:26] + "..." } var winner string if !lex.hit1 && emb.hit1 { winner = "Embedding" } else if lex.hit1 && !emb.hit1 { winner = "Lexical" } else if lex.hit1 && emb.hit1 { winner = "both" } else { winner = "none" } t.Logf(" %3d %-30s %-6s %9.3f %9.3f %9.3f %s", i+1, shortQ, c.expectedRef, lex.bestScore, emb.bestScore, com.bestScore, winner) } t.Log("") // Final combined verdict printSeparator(t, "═", W) t.Log(" FINAL VERDICT — EASY + HARD COMBINED") printSeparator(t, "─", W) t.Log("") allTotal := len(cases) + len(hardCases) for _, m := range matchers { combined := append(allResults[m.name], allHard[m.name]...) h1, _ := acc(combined, 1) h3, _ := acc(combined, 3) lat := meanLatencyUs(combined) t.Logf(" %-12s Acc@1=%s Acc@3=%s µs=%.1f (total %d queries)", m.name, percent(h1, allTotal), percent(h3, allTotal), lat, allTotal) } t.Log("") printSeparator(t, "═", W) t.Log("") } // filterByIdx returns results at specified 0-based indices. func filterByIdx(results []studyResult, indices []int) []studyResult { out := make([]studyResult, 0, len(indices)) for _, i := range indices { if i < len(results) { out = append(out, results[i]) } } return out }