bat-6 commited on
Commit
12b83fb
·
1 Parent(s): 4c2a767

feat: implement hybrid ranking engine for semantic and feature-based project similarity

Browse files
models/faiss_index.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:469af18f0e06e31e389d476e47e643626f8ddfd69b593f299c78932080b5c858
3
- size 393810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228ea7b7be11e828d482432107d6c3baab192ea7018906f35da2282841f5a628
3
+ size 783405
models/metadata.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6765e155f8c73dc97d9f9d681291a1bba2a9d6c59c6712dd51a96e0cccce8058
3
- size 786476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:534c0c0a94e7082f80d28af3e3cc8228a3749078b95eb161cdc86b26b6afc293
3
+ size 786625
models/project_embeddings.npy CHANGED
Binary files a/models/project_embeddings.npy and b/models/project_embeddings.npy differ
 
src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc CHANGED
Binary files a/src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc and b/src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc differ
 
src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc CHANGED
Binary files a/src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc and b/src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc differ
 
src/similarity_model/__pycache__/preprocessing.cpython-313.pyc CHANGED
Binary files a/src/similarity_model/__pycache__/preprocessing.cpython-313.pyc and b/src/similarity_model/__pycache__/preprocessing.cpython-313.pyc differ
 
src/similarity_model/__pycache__/semantic_search.cpython-313.pyc CHANGED
Binary files a/src/similarity_model/__pycache__/semantic_search.cpython-313.pyc and b/src/similarity_model/__pycache__/semantic_search.cpython-313.pyc differ
 
src/similarity_model/hybrid_ranker.py CHANGED
@@ -47,8 +47,11 @@ def get_dynamic_weights(
47
  """
48
 
49
  # No feature evidence at all — rely on semantic regardless of query richness
 
 
 
50
  if coverage == 0:
51
- return 0.70, 0.20, 0.10
52
 
53
  # Rich features + high overlap → trust features heavily
54
  if feature_count >= 5 and coverage >= 0.60:
 
47
  """
48
 
49
  # No feature evidence at all — rely on semantic regardless of query richness
50
+ # Use 0.50 weight (not 0.70) to compensate for all-mpnet-base-v2's higher
51
+ # within-domain baseline scores; prevents same-domain/different-purpose
52
+ # projects from scoring too high when no features overlap.
53
  if coverage == 0:
54
+ return 0.50, 0.30, 0.20
55
 
56
  # Rich features + high overlap → trust features heavily
57
  if feature_count >= 5 and coverage >= 0.60: