Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Jan Mühlnikel
commited on
Commit
·
2a6aea4
1
Parent(s):
b0c3715
experiment
Browse files- functions/calc_matches.py +2 -35
- similarity_page.py +2 -2
functions/calc_matches.py
CHANGED
|
@@ -3,42 +3,8 @@ import numpy as np
|
|
| 3 |
from scipy.sparse import csr_matrix, lil_matrix
|
| 4 |
import streamlit as st
|
| 5 |
|
| 6 |
-
"""
|
| 7 |
-
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
| 8 |
-
# matching project2 can be any project
|
| 9 |
-
# indecies (rows) = project1
|
| 10 |
-
# columns = project2
|
| 11 |
-
# -> find matches
|
| 12 |
-
|
| 13 |
-
# filter out all row considering the filter
|
| 14 |
-
filtered_df_indecies_list = filtered_df.index
|
| 15 |
-
project_df_indecies_list = project_df.index
|
| 16 |
-
|
| 17 |
-
np.fill_diagonal(similarity_matrix, 0)
|
| 18 |
-
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
|
| 19 |
-
|
| 20 |
-
best_matches_list = np.argsort(match_matrix, axis=None)
|
| 21 |
-
|
| 22 |
-
if len(best_matches_list) < top_x:
|
| 23 |
-
top_x = len(best_matches_list)
|
| 24 |
-
|
| 25 |
-
# get row (project1) and column (project2) with highest similarity in filtered df
|
| 26 |
-
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
|
| 27 |
-
|
| 28 |
-
# get the corresponding similarity values
|
| 29 |
-
top_values = match_matrix[top_indices]
|
| 30 |
-
|
| 31 |
-
p1_df = filtered_df.iloc[top_indices[0]]
|
| 32 |
-
p1_df["similarity"] = top_values
|
| 33 |
-
p2_df = project_df.iloc[top_indices[1]]
|
| 34 |
-
p2_df["similarity"] = top_values
|
| 35 |
-
|
| 36 |
-
return p1_df, p2_df
|
| 37 |
-
"""
|
| 38 |
-
|
| 39 |
# multi_project_matching
|
| 40 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
| 41 |
-
st.dataframe(project_df.head(5))
|
| 42 |
st.write(filtered_df.shape)
|
| 43 |
st.write(project_df.shape)
|
| 44 |
st.write(similarity_matrix.shape)
|
|
@@ -56,7 +22,8 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
| 56 |
project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
|
| 57 |
|
| 58 |
# Select submatrix based on indices from both dataframes
|
| 59 |
-
match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
|
|
|
|
| 60 |
|
| 61 |
st.write(match_matrix.shape)
|
| 62 |
|
|
|
|
| 3 |
from scipy.sparse import csr_matrix, lil_matrix
|
| 4 |
import streamlit as st
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# multi_project_matching
|
| 7 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
|
|
| 8 |
st.write(filtered_df.shape)
|
| 9 |
st.write(project_df.shape)
|
| 10 |
st.write(similarity_matrix.shape)
|
|
|
|
| 22 |
project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
|
| 23 |
|
| 24 |
# Select submatrix based on indices from both dataframes
|
| 25 |
+
#match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
|
| 26 |
+
match_matrix = similarity_matrix[np.ix_(filtered_df_indices, project_df_indices)]
|
| 27 |
|
| 28 |
st.write(match_matrix.shape)
|
| 29 |
|
similarity_page.py
CHANGED
|
@@ -272,10 +272,10 @@ def show_multi_matching_page():
|
|
| 272 |
## if show only different orgas checkbox is activated
|
| 273 |
if different_orga_checkbox:
|
| 274 |
with st.spinner('Please wait...'):
|
| 275 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
| 276 |
else:
|
| 277 |
with st.spinner('Please wait...'):
|
| 278 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
| 279 |
|
| 280 |
# SHOW THE RESULT
|
| 281 |
show_multi_table(p1_df, p2_df)
|
|
|
|
| 272 |
## if show only different orgas checkbox is activated
|
| 273 |
if different_orga_checkbox:
|
| 274 |
with st.spinner('Please wait...'):
|
| 275 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
|
| 276 |
else:
|
| 277 |
with st.spinner('Please wait...'):
|
| 278 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
|
| 279 |
|
| 280 |
# SHOW THE RESULT
|
| 281 |
show_multi_table(p1_df, p2_df)
|