Jan Mühlnikel
commited on
Commit
·
1081227
1
Parent(s):
adc6364
different amtching method
Browse files- functions/calc_matches.py +3 -4
- functions/single_similar.py +1 -0
functions/calc_matches.py
CHANGED
|
@@ -2,9 +2,8 @@ import pandas as pd
|
|
| 2 |
import numpy as np
|
| 3 |
from scipy.sparse import csr_matrix, lil_matrix
|
| 4 |
|
| 5 |
-
"""
|
| 6 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
| 7 |
-
# matching project2 can be
|
| 8 |
# indecies (rows) = project1
|
| 9 |
# columns = project2
|
| 10 |
# -> find matches
|
|
@@ -33,8 +32,8 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
| 33 |
p2_df["similarity"] = top_values
|
| 34 |
|
| 35 |
return p1_df, p2_df
|
| 36 |
-
"""
|
| 37 |
|
|
|
|
| 38 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
| 39 |
# Ensure the matrix is in a suitable format for manipulation
|
| 40 |
if not isinstance(similarity_matrix, csr_matrix):
|
|
@@ -74,5 +73,5 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
| 74 |
print("finished calc matches")
|
| 75 |
|
| 76 |
return p1_df, p2_df
|
| 77 |
-
|
| 78 |
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
from scipy.sparse import csr_matrix, lil_matrix
|
| 4 |
|
|
|
|
| 5 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
| 6 |
+
# matching project2 can be any project
|
| 7 |
# indecies (rows) = project1
|
| 8 |
# columns = project2
|
| 9 |
# -> find matches
|
|
|
|
| 32 |
p2_df["similarity"] = top_values
|
| 33 |
|
| 34 |
return p1_df, p2_df
|
|
|
|
| 35 |
|
| 36 |
+
"""
|
| 37 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
| 38 |
# Ensure the matrix is in a suitable format for manipulation
|
| 39 |
if not isinstance(similarity_matrix, csr_matrix):
|
|
|
|
| 73 |
print("finished calc matches")
|
| 74 |
|
| 75 |
return p1_df, p2_df
|
| 76 |
+
"""
|
| 77 |
|
functions/single_similar.py
CHANGED
|
@@ -24,6 +24,7 @@ def find_similar(p_index, similarity_matrix, filtered_df, top_x):
|
|
| 24 |
|
| 25 |
return result_df
|
| 26 |
"""
|
|
|
|
| 27 |
def find_similar(p_index, similarity_matrix, filtered_df, top_x):
|
| 28 |
# Ensure the similarity_matrix is in a suitable sparse format like CSR
|
| 29 |
if not isinstance(similarity_matrix, csr_matrix):
|
|
|
|
| 24 |
|
| 25 |
return result_df
|
| 26 |
"""
|
| 27 |
+
|
| 28 |
def find_similar(p_index, similarity_matrix, filtered_df, top_x):
|
| 29 |
# Ensure the similarity_matrix is in a suitable sparse format like CSR
|
| 30 |
if not isinstance(similarity_matrix, csr_matrix):
|