Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on May 26, 2024

Commit

5f41368

1 Parent(s): 2baee55

experiment

Browse files

Files changed (1) hide show

functions/calc_matches.py +31 -12

functions/calc_matches.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import pandas as pd
 import numpy as np
-from scipy.sparse import csr_matrix, lil_matrix
 import streamlit as st
 # multi_project_matching
@@ -19,11 +19,30 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
     # Create mapping dictionaries
     filtered_df_index_map = {i: index for i, index in enumerate(filtered_df_indices)}
     project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
     # Select submatrix based on indices from both dataframes
     match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
     st.write(match_matrix.shape)
     # Get the linear indices of the top 'top_x' values
@@ -38,29 +57,29 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
     # Get the corresponding similarity values
     #top_values = match_matrix.data[linear_indices]
-    flat_data = match_matrix.data
     # Get the indices that would sort the data array in descending order
-    sorted_indices = np.argsort(flat_data)[::-1]
     # Take the first k indices to get the top k maximum values
-    top_indices = sorted_indices[:top_x]
-    top_row_indices = []
-    top_col_indices = []
-    for idx in top_indices:
-        row, col = np.unravel_index(idx, match_matrix.shape)
-        top_row_indices.append(row)
-        top_col_indices.append(col)
-    st.write(top_col_indices)
     # Convert flat indices to 2D row and column indices
     #row_indices, col_indices = match_matrix.nonzero()
     #row_indices = row_indices[top_indices]
     #col_indices = col_indices[top_indices]
     # Get the values corresponding to the top k indices
-    top_values = flat_data[top_indices]
     # Get the values corresponding to the top k indices

 import pandas as pd
 import numpy as np
+from scipy.sparse import csr_matrix, coo_matrix
 import streamlit as st
 # multi_project_matching
     # Create mapping dictionaries
     filtered_df_index_map = {i: index for i, index in enumerate(filtered_df_indices)}
+    st.write(filtered_df_index_map)
     project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
     # Select submatrix based on indices from both dataframes
     match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
+    coo = match_matrix.tocoo()
+    data = coo.data
+    row_indices = coo.row
+    col_indices = coo.col
+    top_n = 15
+    if len(data) < top_n:
+        top_n = len(data)
+    top_n_indices = np.argsort(data)[-top_n:][::-1]
+    top_n_percentages = data[top_n_indices]
+    top_n_row_indices = row_indices[top_n_indices]
+    top_n_col_indices = col_indices[top_n_indices]
+    original_row_indices = filtered_df_indices[top_n_row_indices]
+    original_col_indices = project_df_indices[top_n_col_indices]
     st.write(match_matrix.shape)
     # Get the linear indices of the top 'top_x' values
     # Get the corresponding similarity values
     #top_values = match_matrix.data[linear_indices]
+    #flat_data = match_matrix.data
     # Get the indices that would sort the data array in descending order
+    #sorted_indices = np.argsort(flat_data)[::-1]
     # Take the first k indices to get the top k maximum values
+    #top_indices = sorted_indices[:top_x]
+    #top_row_indices = []
+    #top_col_indices = []
+    #for idx in top_indices:
+    #    row, col = np.unravel_index(idx, match_matrix.shape)
+    #    top_row_indices.append(row)
+    #    top_col_indices.append(col)
+    #st.write(top_col_indices)
     # Convert flat indices to 2D row and column indices
     #row_indices, col_indices = match_matrix.nonzero()
     #row_indices = row_indices[top_indices]
     #col_indices = col_indices[top_indices]
     # Get the values corresponding to the top k indices
+    #top_values = flat_data[top_indices]
     # Get the values corresponding to the top k indices