feat: implement hybrid ranking engine for semantic and feature-based project similarity
Browse files- models/faiss_index.bin +2 -2
- models/metadata.parquet +2 -2
- models/project_embeddings.npy +0 -0
- src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc +0 -0
- src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc +0 -0
- src/similarity_model/__pycache__/preprocessing.cpython-313.pyc +0 -0
- src/similarity_model/__pycache__/semantic_search.cpython-313.pyc +0 -0
- src/similarity_model/hybrid_ranker.py +4 -1
models/faiss_index.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:228ea7b7be11e828d482432107d6c3baab192ea7018906f35da2282841f5a628
|
| 3 |
+
size 783405
|
models/metadata.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:534c0c0a94e7082f80d28af3e3cc8228a3749078b95eb161cdc86b26b6afc293
|
| 3 |
+
size 786625
|
models/project_embeddings.npy
CHANGED
|
Binary files a/models/project_embeddings.npy and b/models/project_embeddings.npy differ
|
|
|
src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc
CHANGED
|
Binary files a/src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc and b/src/similarity_model/__pycache__/feature_similarity.cpython-313.pyc differ
|
|
|
src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc
CHANGED
|
Binary files a/src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc and b/src/similarity_model/__pycache__/hybrid_ranker.cpython-313.pyc differ
|
|
|
src/similarity_model/__pycache__/preprocessing.cpython-313.pyc
CHANGED
|
Binary files a/src/similarity_model/__pycache__/preprocessing.cpython-313.pyc and b/src/similarity_model/__pycache__/preprocessing.cpython-313.pyc differ
|
|
|
src/similarity_model/__pycache__/semantic_search.cpython-313.pyc
CHANGED
|
Binary files a/src/similarity_model/__pycache__/semantic_search.cpython-313.pyc and b/src/similarity_model/__pycache__/semantic_search.cpython-313.pyc differ
|
|
|
src/similarity_model/hybrid_ranker.py
CHANGED
|
@@ -47,8 +47,11 @@ def get_dynamic_weights(
|
|
| 47 |
"""
|
| 48 |
|
| 49 |
# No feature evidence at all — rely on semantic regardless of query richness
|
|
|
|
|
|
|
|
|
|
| 50 |
if coverage == 0:
|
| 51 |
-
return 0.
|
| 52 |
|
| 53 |
# Rich features + high overlap → trust features heavily
|
| 54 |
if feature_count >= 5 and coverage >= 0.60:
|
|
|
|
| 47 |
"""
|
| 48 |
|
| 49 |
# No feature evidence at all — rely on semantic regardless of query richness
|
| 50 |
+
# Use 0.50 weight (not 0.70) to compensate for all-mpnet-base-v2's higher
|
| 51 |
+
# within-domain baseline scores; prevents same-domain/different-purpose
|
| 52 |
+
# projects from scoring too high when no features overlap.
|
| 53 |
if coverage == 0:
|
| 54 |
+
return 0.50, 0.30, 0.20
|
| 55 |
|
| 56 |
# Rich features + high overlap → trust features heavily
|
| 57 |
if feature_count >= 5 and coverage >= 0.60:
|