| package semantic |
|
|
| import ( |
| "context" |
| "fmt" |
| "strings" |
| "testing" |
| ) |
|
|
| |
| |
| |
|
|
| func TestSynonymIndex_Bidirectional(t *testing.T) { |
| |
| for canonical, synonyms := range uiSynonyms { |
| for _, syn := range synonyms { |
| if syns, ok := synonymIndex[syn]; !ok { |
| t.Errorf("synonym %q (of %q) not in synonymIndex", syn, canonical) |
| } else if !syns[canonical] { |
| t.Errorf("synonymIndex[%q] does not map back to canonical %q", syn, canonical) |
| } |
| } |
| } |
| } |
|
|
| func TestSynonymScore_SignInLogIn(t *testing.T) { |
| qTokens := tokenize("sign in") |
| dTokens := tokenize("Log in") |
| score := synonymScore(qTokens, dTokens) |
| if score < 0.3 { |
| t.Errorf("expected synonym score >= 0.3 for 'sign in' vs 'Log in', got %f", score) |
| } |
| } |
|
|
| func TestSynonymScore_RegisterCreateAccount(t *testing.T) { |
| qTokens := tokenize("register") |
| dTokens := tokenize("Create account") |
| score := synonymScore(qTokens, dTokens) |
| if score < 0.5 { |
| t.Errorf("expected synonym score >= 0.5 for 'register' vs 'Create account', got %f", score) |
| } |
| } |
|
|
| func TestSynonymScore_LookUpSearch(t *testing.T) { |
| qTokens := tokenize("look up") |
| dTokens := tokenize("Search") |
| score := synonymScore(qTokens, dTokens) |
| if score < 0.3 { |
| t.Errorf("expected synonym score >= 0.3 for 'look up' vs 'Search', got %f", score) |
| } |
| } |
|
|
| func TestSynonymScore_NavigationMainMenu(t *testing.T) { |
| qTokens := tokenize("navigation") |
| dTokens := tokenize("Main menu") |
| score := synonymScore(qTokens, dTokens) |
| if score < 0.3 { |
| t.Errorf("expected synonym score >= 0.3 for 'navigation' vs 'Main menu', got %f", score) |
| } |
| } |
|
|
| func TestSynonymScore_NoRelation(t *testing.T) { |
| qTokens := tokenize("elephant") |
| dTokens := tokenize("button") |
| score := synonymScore(qTokens, dTokens) |
| if score > 0.1 { |
| t.Errorf("expected near-zero synonym score for unrelated terms, got %f", score) |
| } |
| } |
|
|
| func TestExpandWithSynonyms_MultiWord(t *testing.T) { |
| query := tokenize("sign in") |
| desc := tokenize("log in") |
| expanded := expandWithSynonyms(query, desc) |
| |
| found := false |
| for _, tok := range expanded { |
| if tok == "log" { |
| found = true |
| break |
| } |
| } |
| if !found { |
| t.Errorf("expanding 'sign in' against desc 'log in' should add 'log', got: %v", expanded) |
| } |
| } |
|
|
| func TestBuildPhrases(t *testing.T) { |
| tokens := []string{"sign", "in", "button"} |
| phrases := buildPhrases(tokens, 3) |
| if len(phrases) == 0 { |
| t.Fatal("expected at least one phrase") |
| } |
| found := false |
| for _, p := range phrases { |
| if p.text == "sign in" { |
| found = true |
| break |
| } |
| } |
| if !found { |
| texts := make([]string, len(phrases)) |
| for i, p := range phrases { |
| texts[i] = p.text |
| } |
| t.Errorf("expected phrase 'sign in', got: %v", texts) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestRemoveStopwordsContextAware_PreservesSignIn(t *testing.T) { |
| tokens := tokenize("sign in") |
| otherTokens := tokenize("log in button") |
| filtered := removeStopwordsContextAware(tokens, otherTokens) |
| |
| |
| hasIn := false |
| for _, tok := range filtered { |
| if tok == "in" { |
| hasIn = true |
| } |
| } |
| if !hasIn { |
| t.Errorf("expected 'in' to be preserved in context-aware removal for 'sign in', got: %v", filtered) |
| } |
| } |
|
|
| func TestRemoveStopwordsContextAware_RemovesIrrelevantStopwords(t *testing.T) { |
| tokens := tokenize("click the submit button") |
| otherTokens := tokenize("button Submit") |
| filtered := removeStopwordsContextAware(tokens, otherTokens) |
| for _, tok := range filtered { |
| if tok == "the" { |
| t.Errorf("expected 'the' to be removed in context-aware removal, got: %v", filtered) |
| } |
| } |
| } |
|
|
| func TestRemoveStopwordsContextAware_PreservesSemanticStopwordInContext(t *testing.T) { |
| |
| tokens := tokenize("not now") |
| otherTokens := tokenize("Not now button") |
| filtered := removeStopwordsContextAware(tokens, otherTokens) |
| hasNot := false |
| for _, tok := range filtered { |
| if tok == "not" { |
| hasNot = true |
| } |
| } |
| if !hasNot { |
| t.Errorf("expected 'not' to be preserved when it appears in other tokens, got: %v", filtered) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestTokenPrefixScore_BtnButton(t *testing.T) { |
| |
| |
| qTokens := tokenize("btn") |
| dTokens := tokenize("button") |
| score := tokenPrefixScore(qTokens, dTokens) |
| t.Logf("prefix score for 'btn' -> 'button' = %f (abbreviation handled by synonyms)", score) |
| if score > 0.5 { |
| t.Errorf("unexpected high prefix score for abbreviation 'btn' -> 'button', got %f", score) |
| } |
| } |
|
|
| func TestTokenPrefixScore_NavNavigation(t *testing.T) { |
| qTokens := tokenize("nav") |
| dTokens := tokenize("navigation menu") |
| score := tokenPrefixScore(qTokens, dTokens) |
| if score < 0.2 { |
| t.Errorf("expected prefix score >= 0.2 for 'nav' -> 'navigation', got %f", score) |
| } |
| } |
|
|
| func TestTokenPrefixScore_NoPrefix(t *testing.T) { |
| qTokens := tokenize("elephant") |
| dTokens := tokenize("button") |
| score := tokenPrefixScore(qTokens, dTokens) |
| if score > 0.01 { |
| t.Errorf("expected near-zero prefix score for unrelated terms, got %f", score) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestLexicalScore_SignIn_vs_LogIn(t *testing.T) { |
| |
| score := LexicalScore("sign in", "link: Log in") |
| t.Logf("LexicalScore('sign in', 'link: Log in') = %f", score) |
| if score < 0.15 { |
| t.Errorf("expected improved score for 'sign in' vs 'Log in', got %f (was 0.207 before improvements)", score) |
| } |
| } |
|
|
| func TestLexicalScore_Register_vs_CreateAccount(t *testing.T) { |
| score := LexicalScore("register", "link: Create account") |
| t.Logf("LexicalScore('register', 'link: Create account') = %f", score) |
| if score < 0.10 { |
| t.Errorf("expected improved score for 'register' vs 'Create account', got %f (was 0.134 before)", score) |
| } |
| } |
|
|
| func TestLexicalScore_LookUp_vs_Search(t *testing.T) { |
| score := LexicalScore("look up", "search: Search") |
| t.Logf("LexicalScore('look up', 'search: Search') = %f", score) |
| |
| if score < 0.10 { |
| t.Errorf("expected improved score for 'look up' vs 'Search', got %f", score) |
| } |
| } |
|
|
| func TestLexicalScore_Navigation_vs_MainMenu(t *testing.T) { |
| score := LexicalScore("navigation", "menu: Main menu") |
| t.Logf("LexicalScore('navigation', 'menu: Main menu') = %f", score) |
| if score < 0.15 { |
| t.Errorf("expected improved score for 'navigation' vs 'Main menu', got %f (was 0.206 before)", score) |
| } |
| } |
|
|
| func TestLexicalScore_Download_vs_Export(t *testing.T) { |
| score := LexicalScore("download report", "button: Export") |
| t.Logf("LexicalScore('download report', 'button: Export') = %f", score) |
| if score < 0.10 { |
| t.Errorf("expected improved score for 'download' vs 'Export', got %f", score) |
| } |
| } |
|
|
| func TestLexicalScore_Proceed_vs_PlaceOrder(t *testing.T) { |
| score := LexicalScore("proceed to payment", "button: Place order") |
| t.Logf("LexicalScore('proceed to payment', 'button: Place order') = %f", score) |
| |
| } |
|
|
| func TestLexicalScore_Dismiss_vs_Close(t *testing.T) { |
| score := LexicalScore("dismiss dialog", "button: Close") |
| t.Logf("LexicalScore('dismiss dialog', 'button: Close') = %f", score) |
| if score < 0.10 { |
| t.Errorf("expected improved score for 'dismiss' vs 'Close', got %f", score) |
| } |
| } |
|
|
| func TestLexicalScore_PrefixAbbreviation(t *testing.T) { |
| score := LexicalScore("btn submit", "button: Submit") |
| t.Logf("LexicalScore('btn submit', 'button: Submit') = %f", score) |
| if score < 0.3 { |
| t.Errorf("expected good score for 'btn submit' vs 'button: Submit', got %f", score) |
| } |
| } |
|
|
| func TestLexicalScore_StillExactMatch(t *testing.T) { |
| score := LexicalScore("submit button", "button: Submit") |
| if score < 0.5 { |
| t.Errorf("expected high score for exact match after improvements, got %f", score) |
| } |
| } |
|
|
| func TestLexicalScore_StillRejectsUnrelated(t *testing.T) { |
| score := LexicalScore("download pdf", "button: Login") |
| if score > 0.35 { |
| t.Errorf("expected low score for unrelated query after improvements, got %f", score) |
| } |
| } |
|
|
| |
| |
| |
|
|
| |
| func buildRealWorldElements() map[string][]ElementDescriptor { |
| return map[string][]ElementDescriptor{ |
| "wikipedia": { |
| {Ref: "e1", Role: "search", Name: "Search Wikipedia"}, |
| {Ref: "e2", Role: "button", Name: "Search"}, |
| {Ref: "e3", Role: "link", Name: "Main page"}, |
| {Ref: "e4", Role: "link", Name: "Contents"}, |
| {Ref: "e5", Role: "link", Name: "Current events"}, |
| {Ref: "e6", Role: "link", Name: "Random article"}, |
| {Ref: "e7", Role: "link", Name: "About Wikipedia"}, |
| {Ref: "e8", Role: "link", Name: "Log in"}, |
| {Ref: "e9", Role: "link", Name: "Create account"}, |
| {Ref: "e10", Role: "navigation", Name: "Main menu"}, |
| {Ref: "e11", Role: "link", Name: "Talk"}, |
| {Ref: "e12", Role: "link", Name: "Contributions"}, |
| {Ref: "e13", Role: "heading", Name: "Wikipedia, the free encyclopedia"}, |
| {Ref: "e14", Role: "link", Name: "(Top)"}, |
| {Ref: "e15", Role: "link", Name: "Languages"}, |
| }, |
| "github_login": { |
| {Ref: "e1", Role: "link", Name: "Homepage"}, |
| {Ref: "e2", Role: "heading", Name: "Sign in to GitHub"}, |
| {Ref: "e3", Role: "textbox", Name: "Username or email address"}, |
| {Ref: "e4", Role: "textbox", Name: "Password"}, |
| {Ref: "e5", Role: "button", Name: "Sign in"}, |
| {Ref: "e6", Role: "link", Name: "Forgot password?"}, |
| {Ref: "e7", Role: "link", Name: "Create an account"}, |
| {Ref: "e8", Role: "link", Name: "Terms"}, |
| {Ref: "e9", Role: "link", Name: "Privacy"}, |
| {Ref: "e10", Role: "link", Name: "Docs"}, |
| {Ref: "e11", Role: "link", Name: "Contact GitHub Support"}, |
| }, |
| "google": { |
| {Ref: "e1", Role: "combobox", Name: "Search"}, |
| {Ref: "e2", Role: "button", Name: "Google Search"}, |
| {Ref: "e3", Role: "button", Name: "I'm Feeling Lucky"}, |
| {Ref: "e4", Role: "link", Name: "Gmail"}, |
| {Ref: "e5", Role: "link", Name: "Images"}, |
| {Ref: "e6", Role: "link", Name: "Sign in"}, |
| {Ref: "e7", Role: "link", Name: "About"}, |
| {Ref: "e8", Role: "link", Name: "Store"}, |
| {Ref: "e9", Role: "link", Name: "Advertising"}, |
| {Ref: "e10", Role: "link", Name: "Privacy"}, |
| {Ref: "e11", Role: "link", Name: "Settings"}, |
| }, |
| "ecommerce": { |
| {Ref: "e1", Role: "search", Name: "Search products"}, |
| {Ref: "e2", Role: "link", Name: "Home"}, |
| {Ref: "e3", Role: "link", Name: "Cart"}, |
| {Ref: "e4", Role: "button", Name: "Add to Cart"}, |
| {Ref: "e5", Role: "link", Name: "Sign in"}, |
| {Ref: "e6", Role: "link", Name: "Register"}, |
| {Ref: "e7", Role: "button", Name: "Buy Now"}, |
| {Ref: "e8", Role: "button", Name: "Place Order"}, |
| {Ref: "e9", Role: "link", Name: "Checkout"}, |
| {Ref: "e10", Role: "button", Name: "Apply Coupon"}, |
| {Ref: "e11", Role: "textbox", Name: "Quantity"}, |
| {Ref: "e12", Role: "button", Name: "Export Orders"}, |
| {Ref: "e13", Role: "navigation", Name: "Main navigation"}, |
| {Ref: "e14", Role: "link", Name: "My Account"}, |
| {Ref: "e15", Role: "button", Name: "Close"}, |
| }, |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestCombined_ExactMatch_Wikipedia(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| tests := []struct { |
| query string |
| wantRef string |
| wantDesc string |
| }{ |
| {"Search Wikipedia", "e1", "Search Wikipedia"}, |
| {"Log in", "e8", "Log in"}, |
| {"Create account", "e9", "Create account"}, |
| {"Main menu", "e10", "Main menu"}, |
| {"Search button", "e2", "Search"}, |
| } |
|
|
| for _, tt := range tests { |
| t.Run(tt.query, func(t *testing.T) { |
| result, err := matcher.Find(context.Background(), tt.query, sites["wikipedia"], FindOptions{ |
| Threshold: 0.2, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatalf("Find error: %v", err) |
| } |
| if result.BestRef != tt.wantRef { |
| t.Errorf("query=%q: got BestRef=%s (score=%.3f), want %s (%s)", |
| tt.query, result.BestRef, result.BestScore, tt.wantRef, tt.wantDesc) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| } |
| }) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestCombined_Synonym_SignIn_LogIn(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| |
| result, err := matcher.Find(context.Background(), "sign in", sites["wikipedia"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='sign in': BestRef=%s Score=%.3f Confidence=%s", result.BestRef, result.BestScore, result.ConfidenceLabel()) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e8" { |
| t.Errorf("expected 'sign in' to match 'Log in' (e8), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Synonym_Register_CreateAccount(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "register", sites["wikipedia"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='register': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| foundInTop3 := false |
| for _, m := range result.Matches { |
| if m.Ref == "e9" { |
| foundInTop3 = true |
| break |
| } |
| } |
| if !foundInTop3 { |
| t.Errorf("expected 'register' to find 'Create account' (e9) in top matches") |
| } |
| } |
|
|
| func TestCombined_Synonym_LookUp_Search(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "look up", sites["wikipedia"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='look up': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| foundSearch := false |
| for _, m := range result.Matches { |
| if m.Ref == "e1" || m.Ref == "e2" { |
| foundSearch = true |
| break |
| } |
| } |
| if !foundSearch { |
| t.Errorf("expected 'look up' to find Search element in top matches") |
| } |
| } |
|
|
| func TestCombined_Synonym_Navigation_MainMenu(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "navigation", sites["wikipedia"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='navigation': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e10" { |
| t.Errorf("expected 'navigation' to match 'Main menu' (e10), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Synonym_Login_SignIn(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| |
| result, err := matcher.Find(context.Background(), "login", sites["github_login"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='login' on GitHub: BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| foundSignIn := false |
| for _, m := range result.Matches { |
| if m.Ref == "e5" || m.Ref == "e2" { |
| foundSignIn = true |
| break |
| } |
| } |
| if !foundSignIn { |
| t.Errorf("expected 'login' to find 'Sign in' element on GitHub login page") |
| } |
| } |
|
|
| func TestCombined_Synonym_Purchase_Checkout(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "purchase", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='purchase': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| foundPurchase := false |
| for _, m := range result.Matches { |
| if m.Ref == "e7" || m.Ref == "e8" || m.Ref == "e9" { |
| foundPurchase = true |
| break |
| } |
| } |
| if !foundPurchase { |
| t.Errorf("expected 'purchase' to find checkout/buy/order related element") |
| } |
| } |
|
|
| func TestCombined_Synonym_ProceedToPayment_PlaceOrder(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "proceed to payment", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='proceed to payment': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| if result.BestRef != "e8" { |
| t.Fatalf("expected 'proceed to payment' to match 'Place Order' (e8), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Synonym_Dismiss_Close(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "dismiss", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='dismiss': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e15" { |
| t.Errorf("expected 'dismiss' to match 'Close' (e15), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Synonym_Download_Export(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "download orders", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='download orders': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e12" { |
| t.Errorf("expected 'download orders' to match 'Export Orders' (e12), got %s", result.BestRef) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestCombined_Paraphrase_ForgotPassword(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "reset password", sites["github_login"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='reset password': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e6" { |
| t.Errorf("expected 'reset password' to match 'Forgot password?' (e6), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Paraphrase_ShoppingBag(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "shopping bag", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='shopping bag': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| foundCart := false |
| for _, m := range result.Matches { |
| if m.Ref == "e3" || m.Ref == "e4" { |
| foundCart = true |
| break |
| } |
| } |
| if !foundCart { |
| t.Errorf("expected 'shopping bag' to find Cart element") |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestCombined_Partial_Btn(t *testing.T) { |
| elements := []ElementDescriptor{ |
| {Ref: "e1", Role: "button", Name: "Submit"}, |
| {Ref: "e2", Role: "link", Name: "Home"}, |
| {Ref: "e3", Role: "textbox", Name: "Email"}, |
| } |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "submit btn", elements, FindOptions{ |
| Threshold: 0.15, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='submit btn': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| if result.BestRef != "e1" { |
| t.Errorf("expected 'submit btn' to match 'Submit' button (e1), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Partial_Nav(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "nav menu", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='nav menu': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e13" { |
| t.Errorf("expected 'nav menu' to match 'Main navigation' (e13), got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_Partial_Qty(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "qty", sites["ecommerce"], FindOptions{ |
| Threshold: 0.15, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Query='qty': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f role=%s name=%s", m.Ref, m.Score, m.Role, m.Name) |
| } |
| |
| if result.BestRef != "e11" { |
| t.Errorf("expected 'qty' to match 'Quantity' (e11), got %s", result.BestRef) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestCombined_EdgeCase_EmptyQuery(t *testing.T) { |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
| elements := []ElementDescriptor{{Ref: "e1", Role: "button", Name: "Submit"}} |
|
|
| result, err := matcher.Find(context.Background(), "", elements, FindOptions{ |
| Threshold: 0.1, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if len(result.Matches) > 0 { |
| t.Errorf("expected no matches for empty query, got %d", len(result.Matches)) |
| } |
| } |
|
|
| func TestCombined_EdgeCase_GibberishQuery(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| result, err := matcher.Find(context.Background(), "xyzzy plugh qwerty", sites["wikipedia"], FindOptions{ |
| Threshold: 0.3, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Gibberish query: matches=%d best_score=%.3f", len(result.Matches), result.BestScore) |
| |
| if len(result.Matches) > 0 { |
| t.Errorf("expected no matches for gibberish query at threshold 0.3, got %d", len(result.Matches)) |
| } |
| } |
|
|
| func TestCombined_EdgeCase_AllStopwords(t *testing.T) { |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
| elements := []ElementDescriptor{ |
| {Ref: "e1", Role: "button", Name: "Submit"}, |
| {Ref: "e2", Role: "link", Name: "The"}, |
| } |
|
|
| result, err := matcher.Find(context.Background(), "the a is", elements, FindOptions{ |
| Threshold: 0.1, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("All-stopwords query: matches=%d", len(result.Matches)) |
| } |
|
|
| func TestCombined_EdgeCase_VeryLongQuery(t *testing.T) { |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
| elements := []ElementDescriptor{ |
| {Ref: "e1", Role: "button", Name: "Submit"}, |
| } |
|
|
| longQuery := "I want to find the submit button that is located on the bottom right of the page and click on it to submit the form" |
| result, err := matcher.Find(context.Background(), longQuery, elements, FindOptions{ |
| Threshold: 0.1, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Long query: matches=%d best_score=%.3f", len(result.Matches), result.BestScore) |
| if result.BestRef != "e1" { |
| t.Errorf("expected long query to still find 'Submit' button, got %s", result.BestRef) |
| } |
| } |
|
|
| func TestCombined_EdgeCase_SingleCharQuery(t *testing.T) { |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
| elements := []ElementDescriptor{ |
| {Ref: "e1", Role: "link", Name: "X"}, |
| {Ref: "e2", Role: "button", Name: "Close"}, |
| } |
|
|
| result, err := matcher.Find(context.Background(), "x", elements, FindOptions{ |
| Threshold: 0.1, |
| TopK: 3, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Logf("Single char 'x': BestRef=%s Score=%.3f", result.BestRef, result.BestScore) |
| } |
|
|
| |
| |
| |
|
|
| func TestLexicalScore_MultipleRoleKeywordsAccumulate(t *testing.T) { |
| |
| |
| scoreMulti := LexicalScore("search input", "search: Email Input") |
| scoreSingle := LexicalScore("search something", "search: Email Input") |
|
|
| t.Logf("Multi-role score: %f, Single-role score: %f", scoreMulti, scoreSingle) |
| if scoreMulti <= scoreSingle { |
| t.Errorf("expected multi-role query to score higher than single-role, got multi=%f single=%f", scoreMulti, scoreSingle) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestComprehensiveEvaluation(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| type evalCase struct { |
| category string |
| query string |
| site string |
| wantRef string |
| wantName string |
| } |
|
|
| cases := []evalCase{ |
| |
| {"exact", "Search Wikipedia", "wikipedia", "e1", "Search Wikipedia"}, |
| {"exact", "Log in", "wikipedia", "e8", "Log in"}, |
| {"exact", "Create account", "wikipedia", "e9", "Create account"}, |
| {"exact", "Sign in", "github_login", "e5", "Sign in"}, |
| {"exact", "Google Search", "google", "e2", "Google Search"}, |
|
|
| |
| {"synonym", "sign in", "wikipedia", "e8", "Log in"}, |
| {"synonym", "register", "wikipedia", "e9", "Create account"}, |
| {"synonym", "look up", "wikipedia", "e1", "Search Wikipedia"}, |
| {"synonym", "navigation", "wikipedia", "e10", "Main menu"}, |
| {"synonym", "login button", "github_login", "e5", "Sign in"}, |
| {"synonym", "authenticate", "github_login", "e5", "Sign in"}, |
| {"synonym", "dismiss", "ecommerce", "e15", "Close"}, |
| {"synonym", "download orders", "ecommerce", "e12", "Export Orders"}, |
|
|
| |
| {"paraphrase", "reset password", "github_login", "e6", "Forgot password?"}, |
| {"paraphrase", "email field", "github_login", "e3", "Username or email address"}, |
|
|
| |
| {"partial", "qty", "ecommerce", "e11", "Quantity"}, |
| } |
|
|
| results := make(map[string][]bool) |
| var totalPass, totalFail int |
|
|
| for _, tc := range cases { |
| t.Run(fmt.Sprintf("%s/%s", tc.category, tc.query), func(t *testing.T) { |
| result, err := matcher.Find(context.Background(), tc.query, sites[tc.site], FindOptions{ |
| Threshold: 0.1, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
|
|
| pass := false |
| for i, m := range result.Matches { |
| if i >= 3 { |
| break |
| } |
| if m.Ref == tc.wantRef { |
| pass = true |
| break |
| } |
| } |
|
|
| results[tc.category] = append(results[tc.category], pass) |
| if pass { |
| totalPass++ |
| t.Logf("PASS: query=%q -> %s (score=%.3f)", tc.query, tc.wantName, result.BestScore) |
| } else { |
| totalFail++ |
| t.Logf("MISS: query=%q wanted %s (%s), got BestRef=%s (score=%.3f)", |
| tc.query, tc.wantRef, tc.wantName, result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" match: ref=%s score=%.3f name=%s", m.Ref, m.Score, m.Name) |
| } |
| } |
| }) |
| } |
|
|
| |
| t.Logf("\n=== EVALUATION SUMMARY ===") |
| t.Logf("Total: %d/%d (%.1f%%)", totalPass, totalPass+totalFail, 100*float64(totalPass)/float64(totalPass+totalFail)) |
| for cat, res := range results { |
| passed := 0 |
| for _, r := range res { |
| if r { |
| passed++ |
| } |
| } |
| t.Logf(" %s: %d/%d (%.0f%%)", cat, passed, len(res), 100*float64(passed)/float64(len(res))) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestHashingEmbedder_SynonymVectorsCloser(t *testing.T) { |
| embedder := NewHashingEmbedder(128) |
|
|
| vecs, err := embedder.Embed([]string{"sign in", "log in", "elephant"}) |
| if err != nil { |
| t.Fatal(err) |
| } |
|
|
| |
| |
| simSynonym := cosineSim(vecs[0], vecs[1]) |
| simUnrelated := cosineSim(vecs[0], vecs[2]) |
|
|
| t.Logf("sim('sign in', 'log in') = %.4f", simSynonym) |
| t.Logf("sim('sign in', 'elephant') = %.4f", simUnrelated) |
|
|
| if simSynonym <= simUnrelated { |
| t.Errorf("expected synonym embedding similarity (%.4f) > unrelated (%.4f)", simSynonym, simUnrelated) |
| } |
| } |
|
|
| func TestHashingEmbedder_AbbrVectorsCloser(t *testing.T) { |
| embedder := NewHashingEmbedder(128) |
|
|
| vecs, err := embedder.Embed([]string{"btn", "button", "elephant"}) |
| if err != nil { |
| t.Fatal(err) |
| } |
|
|
| simAbbr := cosineSim(vecs[0], vecs[1]) |
| simUnrelated := cosineSim(vecs[0], vecs[2]) |
|
|
| t.Logf("sim('btn', 'button') = %.4f", simAbbr) |
| t.Logf("sim('btn', 'elephant') = %.4f", simUnrelated) |
|
|
| if simAbbr <= simUnrelated { |
| t.Errorf("expected abbreviation embedding similarity (%.4f) > unrelated (%.4f)", simAbbr, simUnrelated) |
| } |
| } |
|
|
| |
| func cosineSim(a, b []float32) float64 { |
| var dot, normA, normB float64 |
| for i := range a { |
| dot += float64(a[i]) * float64(b[i]) |
| normA += float64(a[i]) * float64(a[i]) |
| normB += float64(b[i]) * float64(b[i]) |
| } |
| if normA == 0 || normB == 0 { |
| return 0 |
| } |
| return dot / (sqrt64(normA) * sqrt64(normB)) |
| } |
|
|
| |
| |
| |
|
|
| func TestScoreDistribution_BeforeVsExpected(t *testing.T) { |
| |
| |
| type scoreCase struct { |
| query string |
| desc string |
| minScore float64 |
| label string |
| } |
|
|
| cases := []scoreCase{ |
| |
| {"sign in", "link: Log in", 0.15, "synonym: sign in -> Log in"}, |
| {"register", "link: Create account", 0.10, "synonym: register -> Create account"}, |
| {"look up", "search: Search", 0.10, "synonym: look up -> Search"}, |
| {"navigation", "navigation: Main menu", 0.15, "synonym: navigation -> Main menu"}, |
| {"login button", "button: Sign in", 0.15, "synonym: login -> Sign in"}, |
| {"dismiss", "button: Close", 0.10, "synonym: dismiss -> Close"}, |
| {"download", "button: Export", 0.10, "synonym: download -> Export"}, |
|
|
| |
| {"btn submit", "button: Submit", 0.30, "prefix: btn -> button"}, |
| {"nav", "navigation: Main navigation", 0.15, "prefix: nav -> navigation"}, |
|
|
| |
| {"submit button", "button: Submit", 0.50, "exact: submit button"}, |
| {"search box", "search: Search", 0.30, "exact: search"}, |
| {"email input", "textbox: Email", 0.20, "exact-ish: email input"}, |
| } |
|
|
| for _, tc := range cases { |
| t.Run(tc.label, func(t *testing.T) { |
| score := LexicalScore(tc.query, tc.desc) |
| status := "PASS" |
| if score < tc.minScore { |
| status = "FAIL" |
| } |
| t.Logf("[%s] LexicalScore(%q, %q) = %.4f (min: %.2f)", status, tc.query, tc.desc, score, tc.minScore) |
| if score < tc.minScore { |
| t.Errorf("score %.4f below minimum %.2f", score, tc.minScore) |
| } |
| }) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestStopword_InPreservedInSignIn(t *testing.T) { |
| |
| q := tokenize("sign in button") |
| d := tokenize("button Log in") |
| filtered := removeStopwordsContextAware(q, d) |
|
|
| hasIn := false |
| for _, tok := range filtered { |
| if tok == "in" { |
| hasIn = true |
| } |
| } |
| if !hasIn { |
| t.Errorf("'in' should be preserved in 'sign in' context, filtered=%v", filtered) |
| } |
| } |
|
|
| func TestStopword_UpPreservedInSignUp(t *testing.T) { |
| q := tokenize("sign up now") |
| d := tokenize("Register button") |
| filtered := removeStopwordsContextAware(q, d) |
|
|
| hasUp := false |
| for _, tok := range filtered { |
| if tok == "up" { |
| hasUp = true |
| } |
| } |
| if !hasUp { |
| t.Errorf("'up' should be preserved in 'sign up' context, filtered=%v", filtered) |
| } |
| } |
|
|
| func TestStopword_NotPreservedInNotNow(t *testing.T) { |
| q := tokenize("not now") |
| d := tokenize("button Not now") |
| filtered := removeStopwordsContextAware(q, d) |
|
|
| hasNot := false |
| for _, tok := range filtered { |
| if tok == "not" { |
| hasNot = true |
| } |
| } |
| if !hasNot { |
| t.Errorf("'not' should be preserved when it appears in other tokens, filtered=%v", filtered) |
| } |
| } |
|
|
| |
| |
| |
|
|
| func BenchmarkSynonymScore(b *testing.B) { |
| qTokens := tokenize("sign in button") |
| dTokens := tokenize("button Log in") |
| b.ResetTimer() |
| for i := 0; i < b.N; i++ { |
| synonymScore(qTokens, dTokens) |
| } |
| } |
|
|
| func BenchmarkExpandWithSynonyms(b *testing.B) { |
| qTokens := tokenize("register now") |
| dTokens := tokenize("link Create account") |
| b.ResetTimer() |
| for i := 0; i < b.N; i++ { |
| expandWithSynonyms(qTokens, dTokens) |
| } |
| } |
|
|
| func BenchmarkLexicalScore_WithSynonyms(b *testing.B) { |
| b.ResetTimer() |
| for i := 0; i < b.N; i++ { |
| LexicalScore("sign in button", "button: Log in") |
| } |
| } |
|
|
| func BenchmarkLexicalScore_ExactMatch(b *testing.B) { |
| b.ResetTimer() |
| for i := 0; i < b.N; i++ { |
| LexicalScore("submit button", "button: Submit") |
| } |
| } |
|
|
| func BenchmarkCombinedMatcher_SynonymQuery(b *testing.B) { |
| elements := buildRealWorldElements()["wikipedia"] |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
| ctx := context.Background() |
| opts := FindOptions{Threshold: 0.15, TopK: 3} |
| b.ResetTimer() |
| for i := 0; i < b.N; i++ { |
| result, err := matcher.Find(ctx, "sign in", elements, opts) |
| if err != nil { |
| b.Fatalf("Find error: %v", err) |
| } |
| _ = result |
| } |
| } |
|
|
| |
| |
| |
|
|
| func TestMultiSiteEvaluation(t *testing.T) { |
| sites := buildRealWorldElements() |
| matcher := NewCombinedMatcher(NewHashingEmbedder(128)) |
|
|
| type testCase struct { |
| category string |
| query string |
| site string |
| wantRef string |
| wantName string |
| } |
|
|
| allCases := []testCase{ |
| |
| {"exact", "Search Wikipedia", "wikipedia", "e1", "Search Wikipedia"}, |
| {"exact", "Log in", "wikipedia", "e8", "Log in"}, |
| {"exact", "Create account", "wikipedia", "e9", "Create account"}, |
| {"exact", "Sign in", "github_login", "e5", "Sign in"}, |
| {"exact", "Password", "github_login", "e4", "Password"}, |
| {"exact", "Google Search", "google", "e2", "Google Search"}, |
| {"exact", "Cart", "ecommerce", "e3", "Cart"}, |
| {"exact", "Add to Cart", "ecommerce", "e4", "Add to Cart"}, |
|
|
| |
| {"synonym", "sign in", "wikipedia", "e8", "Log in"}, |
| {"synonym", "register", "wikipedia", "e9", "Create account"}, |
| {"synonym", "look up", "wikipedia", "e1", "Search Wikipedia"}, |
| {"synonym", "navigation", "wikipedia", "e10", "Main menu"}, |
| {"synonym", "login", "github_login", "e5", "Sign in"}, |
| {"synonym", "authenticate", "github_login", "e5", "Sign in"}, |
| {"synonym", "dismiss", "ecommerce", "e15", "Close"}, |
| {"synonym", "download orders", "ecommerce", "e12", "Export Orders"}, |
| {"synonym", "purchase", "ecommerce", "e7", "Buy Now"}, |
|
|
| |
| {"paraphrase", "reset password", "github_login", "e6", "Forgot password?"}, |
| {"paraphrase", "search input", "google", "e1", "Search"}, |
| {"paraphrase", "email field", "github_login", "e3", "Username or email address"}, |
| {"paraphrase", "shopping bag", "ecommerce", "e3", "Cart"}, |
|
|
| |
| {"partial", "qty", "ecommerce", "e11", "Quantity"}, |
| {"partial", "nav menu", "ecommerce", "e13", "Main navigation"}, |
|
|
| |
| {"edge", "top right login link", "wikipedia", "e8", "Log in"}, |
| } |
|
|
| catResults := make(map[string]struct{ pass, total int }) |
|
|
| for _, tc := range allCases { |
| t.Run(fmt.Sprintf("%s_%s_%s", tc.site, tc.category, strings.ReplaceAll(tc.query, " ", "_")), func(t *testing.T) { |
| result, err := matcher.Find(context.Background(), tc.query, sites[tc.site], FindOptions{ |
| Threshold: 0.1, |
| TopK: 5, |
| }) |
| if err != nil { |
| t.Fatal(err) |
| } |
|
|
| pass := false |
| for i, m := range result.Matches { |
| if i >= 3 { |
| break |
| } |
| if m.Ref == tc.wantRef { |
| pass = true |
| break |
| } |
| } |
|
|
| cr := catResults[tc.category] |
| cr.total++ |
| if pass { |
| cr.pass++ |
| t.Logf("query=%q -> %s score=%.3f", tc.query, tc.wantName, result.BestScore) |
| } else { |
| t.Logf("MISS query=%q wanted=%s (%s) got=%s score=%.3f", |
| tc.query, tc.wantRef, tc.wantName, result.BestRef, result.BestScore) |
| for _, m := range result.Matches { |
| t.Logf(" ref=%s score=%.3f name=%s", m.Ref, m.Score, m.Name) |
| } |
| } |
| catResults[tc.category] = cr |
| }) |
| } |
|
|
| |
| t.Logf("\nββββββββββββββββββββββββββββββββββββββββββββββββββββ") |
| t.Logf("β MULTI-SITE EVALUATION SUMMARY β") |
| t.Logf("β βββββββββββββββββββββββββββββββββββββββββββββββββββ£") |
| totalP, totalT := 0, 0 |
| for _, cat := range []string{"exact", "synonym", "paraphrase", "partial", "edge"} { |
| cr := catResults[cat] |
| pct := 0.0 |
| if cr.total > 0 { |
| pct = 100 * float64(cr.pass) / float64(cr.total) |
| } |
| t.Logf("β %-14s %d/%d (%.0f%%) β", cat, cr.pass, cr.total, pct) |
| totalP += cr.pass |
| totalT += cr.total |
| } |
| pct := 0.0 |
| if totalT > 0 { |
| pct = 100 * float64(totalP) / float64(totalT) |
| } |
| t.Logf("β βββββββββββββββββββββββββββββββββββββββββββββββββββ£") |
| t.Logf("β TOTAL %d/%d (%.0f%%) β", totalP, totalT, pct) |
| t.Logf("ββββββββββββββββββββββββββββββββββββββββββββββββββββ") |
| } |
|
|
| |
| |
| |
|
|
| func TestStopword_OnPreservedInLogOn(t *testing.T) { |
| query := removeStopwordsContextAware( |
| tokenize("log on"), |
| tokenize("button: Sign in [login]"), |
| ) |
| found := false |
| for _, tok := range query { |
| if tok == "on" { |
| found = true |
| } |
| } |
| if !found { |
| t.Errorf("'on' should be preserved in 'log on' context, got %v", query) |
| } |
| } |
|
|
| func TestSynonymScore_NoDuplicateCounting(t *testing.T) { |
| |
| |
| |
| score := synonymScore( |
| []string{"sign", "in"}, |
| []string{"login", "button"}, |
| ) |
| |
| |
| |
| if score > 0.55 { |
| t.Errorf("synonymScore should not double-count phrase components, got %.3f", score) |
| } |
| if score < 0.45 { |
| t.Errorf("synonymScore should recognise 'sign in' vs 'login', got %.3f", score) |
| } |
| } |
|
|
| func TestExpandWithSynonyms_NoDuplicateTokens(t *testing.T) { |
| expanded := expandWithSynonyms( |
| []string{"sign", "in"}, |
| []string{"login", "button"}, |
| ) |
| seen := make(map[string]int) |
| for _, tok := range expanded { |
| seen[tok]++ |
| } |
| for tok, cnt := range seen { |
| if cnt > 1 { |
| t.Errorf("token %q appears %d times in expanded set, expected at most 1", tok, cnt) |
| } |
| } |
| } |
|
|
| func TestSynonymIndex_LogOnBidirectional(t *testing.T) { |
| |
| syns, ok := synonymIndex["log on"] |
| if !ok { |
| t.Fatal("synonymIndex should contain 'log on'") |
| } |
| if _, has := syns["login"]; !has { |
| t.Error("'log on' should map to 'login'") |
| } |
|
|
| loginSyns, ok := synonymIndex["login"] |
| if !ok { |
| t.Fatal("synonymIndex should contain 'login'") |
| } |
| if _, has := loginSyns["log on"]; !has { |
| t.Error("'login' should map back to 'log on'") |
| } |
| } |
|
|
| func TestEmbedder_PhraseAwareSynonymInjection(t *testing.T) { |
| emb := NewHashingEmbedder(256) |
|
|
| |
| |
| vecs, err := emb.Embed([]string{ |
| "textbox: Look up bar", |
| "textbox: Search bar", |
| "textbox: Weather bar", |
| }) |
| if err != nil { |
| t.Fatalf("Embed failed: %v", err) |
| } |
| lookUp, search, weather := vecs[0], vecs[1], vecs[2] |
|
|
| simSyn := cosineSim(lookUp, search) |
| simUnrelated := cosineSim(lookUp, weather) |
|
|
| if simSyn <= simUnrelated { |
| t.Errorf("phrase-aware synonym injection should make 'look up' closer to 'search' "+ |
| "(got syn=%.4f, unrelated=%.4f)", simSyn, simUnrelated) |
| } |
| } |
|
|
| func TestLexicalScore_LogOn_vs_SignIn(t *testing.T) { |
| desc := "link: Sign in" |
| score := LexicalScore("log on", desc) |
| if score < 0.10 { |
| t.Errorf("'log on' vs '%s' should have meaningful score, got %.4f", desc, score) |
| } |
| } |
|
|