Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +26 -2
src/streamlit_app.py
CHANGED
|
@@ -237,8 +237,32 @@ def get_clusters_from_r_matrix(r_matrix):
|
|
| 237 |
return np.array([]) # Return empty array on error
|
| 238 |
|
| 239 |
|
| 240 |
-
def get_cluster_labels():
|
| 241 |
r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
| 243 |
if len(cluster_labels) == 0:
|
| 244 |
cluster_labels = [0] * len(user_id_to_index)
|
|
@@ -264,7 +288,7 @@ def get_user_cluster_label(user_id, ttl_hash=None):
|
|
| 264 |
if the user is not found or has no cluster label.
|
| 265 |
"""
|
| 266 |
# get_cluster_labels is already cached, so calling it repeatedly is fine
|
| 267 |
-
cluster_labels, user_id_to_index = get_cluster_labels()
|
| 268 |
|
| 269 |
# Create a reverse mapping from index to user_id for easier lookup
|
| 270 |
index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
|
|
|
|
| 237 |
return np.array([]) # Return empty array on error
|
| 238 |
|
| 239 |
|
| 240 |
+
def get_cluster_labels(user_id):
|
| 241 |
r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
|
| 242 |
+
# Check if the user_id exists in the matrix index
|
| 243 |
+
if user_id not in user_id_to_index:
|
| 244 |
+
print(f"Warning: User ID '{user_id}' not found in the R matrix. Cannot perform user-specific filtering for clustering.")
|
| 245 |
+
# Return empty results as filtering based on this user is not possible.
|
| 246 |
+
# The downstream function get_user_cluster_label handles the user not being in the index.
|
| 247 |
+
# Returning empty arrays/dict matches the structure of the expected return value.
|
| 248 |
+
return np.array([]), {} # Return empty labels and empty index map
|
| 249 |
+
|
| 250 |
+
# Get the row for the specific user
|
| 251 |
+
user_row = r_matrix.loc[user_id]
|
| 252 |
+
|
| 253 |
+
# Find columns where the user has voted (values are not NaN)
|
| 254 |
+
voted_comment_ids = user_row.dropna().index
|
| 255 |
+
|
| 256 |
+
# Ensure we handle the case where the user hasn't voted on anything
|
| 257 |
+
if voted_comment_ids.empty:
|
| 258 |
+
print(f"Warning: User ID '{user_id}' has not voted on any comments. Cannot perform clustering based on votes.")
|
| 259 |
+
# If no votes, no columns to cluster on. Return empty results.
|
| 260 |
+
return np.array([]), {}
|
| 261 |
+
|
| 262 |
+
# Filter the r_matrix to include only these columns
|
| 263 |
+
# This is the matrix that will be used for clustering in the next step.
|
| 264 |
+
# The subsequent line calling get_clusters_from_r_matrix should use this variable.
|
| 265 |
+
r_matrix_to_cluster = r_matrix[voted_comment_ids]
|
| 266 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
| 267 |
if len(cluster_labels) == 0:
|
| 268 |
cluster_labels = [0] * len(user_id_to_index)
|
|
|
|
| 288 |
if the user is not found or has no cluster label.
|
| 289 |
"""
|
| 290 |
# get_cluster_labels is already cached, so calling it repeatedly is fine
|
| 291 |
+
cluster_labels, user_id_to_index = get_cluster_labels(user_id)
|
| 292 |
|
| 293 |
# Create a reverse mapping from index to user_id for easier lookup
|
| 294 |
index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
|