Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +7 -57
src/streamlit_app.py
CHANGED
|
@@ -153,48 +153,6 @@ def get_r_matrix_from_votes():
|
|
| 153 |
local_con.close()
|
| 154 |
|
| 155 |
|
| 156 |
-
# Custom Hamming-like distance function handling NaNs for clustering
|
| 157 |
-
# Assumes numpy is imported as np
|
| 158 |
-
def hamming_distance_with_nan(u1, u2):
|
| 159 |
-
"""
|
| 160 |
-
Calculates a Hamming-like distance between two vectors (user vote profiles)
|
| 161 |
-
ignoring positions where either value is NaN.
|
| 162 |
-
|
| 163 |
-
Args:
|
| 164 |
-
u1 (np.ndarray or pd.Series): First vector.
|
| 165 |
-
u2 (np.ndarray or pd.Series): Second vector.
|
| 166 |
-
|
| 167 |
-
Returns:
|
| 168 |
-
float: The proportion of differing elements among non-NaN positions.
|
| 169 |
-
Returns 0.0 if vectors are identical (including all NaN),
|
| 170 |
-
1.0 if different but no common non-NaN positions.
|
| 171 |
-
"""
|
| 172 |
-
u1 = np.asarray(u1)
|
| 173 |
-
u2 = np.asarray(u2)
|
| 174 |
-
|
| 175 |
-
# Find positions where both are not NaN
|
| 176 |
-
both_not_nan_mask = ~np.isnan(u1) & ~np.isnan(u2)
|
| 177 |
-
|
| 178 |
-
# If no common non-NaN values
|
| 179 |
-
if not np.any(both_not_nan_mask):
|
| 180 |
-
# If vectors are identical (e.g., both all NaN), distance is 0.
|
| 181 |
-
# If different vectors with no common non-NaN, distance is 1 (max difference).
|
| 182 |
-
if np.array_equal(u1, u2, equal_nan=True):
|
| 183 |
-
return 0.0
|
| 184 |
-
else:
|
| 185 |
-
return 1.0
|
| 186 |
-
|
| 187 |
-
# Filter to only positions where both are not NaN
|
| 188 |
-
u1_filtered = u1[both_not_nan_mask]
|
| 189 |
-
u2_filtered = u2[both_not_nan_mask]
|
| 190 |
-
|
| 191 |
-
# Calculate proportion of differing elements among common non-NaN positions
|
| 192 |
-
diff_count = np.sum(u1_filtered != u2_filtered)
|
| 193 |
-
total_count = len(u1_filtered)
|
| 194 |
-
|
| 195 |
-
return diff_count / total_count
|
| 196 |
-
|
| 197 |
-
|
| 198 |
# Function to get clusters using HDBSCAN with the custom Hamming distance
|
| 199 |
# Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
|
| 200 |
def get_clusters_from_r_matrix(r_matrix):
|
|
@@ -222,11 +180,10 @@ def get_clusters_from_r_matrix(r_matrix):
|
|
| 222 |
# These might need tuning based on data characteristics and desired cluster granularity
|
| 223 |
# allow_single_cluster=True prevents an error if all points form one cluster
|
| 224 |
clusterer = hdbscan.HDBSCAN(
|
| 225 |
-
metric=
|
| 226 |
allow_single_cluster=True,
|
| 227 |
min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
|
| 228 |
-
min_samples=None
|
| 229 |
-
)
|
| 230 |
|
| 231 |
# Fit the model directly to the DataFrame values
|
| 232 |
# HDBSCAN fit expects a numpy array or similar structure
|
|
@@ -267,7 +224,7 @@ def get_cluster_labels(user_id):
|
|
| 267 |
# Filter the r_matrix to include only these columns
|
| 268 |
# This is the matrix that will be used for clustering in the next step.
|
| 269 |
# The subsequent line calling get_clusters_from_r_matrix should use this variable.
|
| 270 |
-
|
| 271 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
| 272 |
if len(cluster_labels) == 0:
|
| 273 |
cluster_labels = [0] * len(user_id_to_index)
|
|
@@ -983,6 +940,7 @@ def view_topic_page():
|
|
| 983 |
st.markdown(random.choice(prompts))
|
| 984 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
| 985 |
if st.button("Share Your Wisdom"):
|
|
|
|
| 986 |
if new_comment_text and len(new_comment_text.strip()):
|
| 987 |
user_email = st.session_state.get('user_email', '')
|
| 988 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
|
@@ -999,17 +957,7 @@ def view_topic_page():
|
|
| 999 |
# Append new comment to history
|
| 1000 |
st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
|
| 1001 |
|
| 1002 |
-
# Get next comment (could be the one just submitted)
|
| 1003 |
-
next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
|
| 1004 |
-
st.session_state.current_comment_id = next_comment_id
|
| 1005 |
-
st.session_state.current_comment_content = next_comment_content
|
| 1006 |
-
|
| 1007 |
-
# Update progress
|
| 1008 |
-
update_user_progress(user_id, topic_id, next_comment_id)
|
| 1009 |
-
|
| 1010 |
st.session_state.tmp_new_comment_input = "" # Clear input box
|
| 1011 |
-
st.rerun() # Rerun to update UI
|
| 1012 |
-
|
| 1013 |
except Exception as e:
|
| 1014 |
st.error(f"Error sharing information: {e}")
|
| 1015 |
finally:
|
|
@@ -1017,6 +965,7 @@ def view_topic_page():
|
|
| 1017 |
local_con.close()
|
| 1018 |
else:
|
| 1019 |
st.error("Could not find or create user.")
|
|
|
|
| 1020 |
|
| 1021 |
# Get next comment
|
| 1022 |
# This should always get the next unvoted comment for the user in this topic.
|
|
@@ -1029,7 +978,8 @@ def view_topic_page():
|
|
| 1029 |
update_user_progress(user_id, topic_id, next_comment_id)
|
| 1030 |
|
| 1031 |
st.session_state._voting_in_progress = False
|
| 1032 |
-
st.
|
|
|
|
| 1033 |
|
| 1034 |
except Exception as e:
|
| 1035 |
st.error(f"Error processing vote: {e}")
|
|
|
|
| 153 |
local_con.close()
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
# Function to get clusters using HDBSCAN with the custom Hamming distance
|
| 157 |
# Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
|
| 158 |
def get_clusters_from_r_matrix(r_matrix):
|
|
|
|
| 180 |
# These might need tuning based on data characteristics and desired cluster granularity
|
| 181 |
# allow_single_cluster=True prevents an error if all points form one cluster
|
| 182 |
clusterer = hdbscan.HDBSCAN(
|
| 183 |
+
metric='hamming',
|
| 184 |
allow_single_cluster=True,
|
| 185 |
min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
|
| 186 |
+
min_samples=None)
|
|
|
|
| 187 |
|
| 188 |
# Fit the model directly to the DataFrame values
|
| 189 |
# HDBSCAN fit expects a numpy array or similar structure
|
|
|
|
| 224 |
# Filter the r_matrix to include only these columns
|
| 225 |
# This is the matrix that will be used for clustering in the next step.
|
| 226 |
# The subsequent line calling get_clusters_from_r_matrix should use this variable.
|
| 227 |
+
r_matrix = r_matrix[voted_comment_ids]
|
| 228 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
| 229 |
if len(cluster_labels) == 0:
|
| 230 |
cluster_labels = [0] * len(user_id_to_index)
|
|
|
|
| 940 |
st.markdown(random.choice(prompts))
|
| 941 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
| 942 |
if st.button("Share Your Wisdom"):
|
| 943 |
+
st.session_state.handling_vote = True # lock
|
| 944 |
if new_comment_text and len(new_comment_text.strip()):
|
| 945 |
user_email = st.session_state.get('user_email', '')
|
| 946 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
|
|
|
| 957 |
# Append new comment to history
|
| 958 |
st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
|
| 959 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 960 |
st.session_state.tmp_new_comment_input = "" # Clear input box
|
|
|
|
|
|
|
| 961 |
except Exception as e:
|
| 962 |
st.error(f"Error sharing information: {e}")
|
| 963 |
finally:
|
|
|
|
| 965 |
local_con.close()
|
| 966 |
else:
|
| 967 |
st.error("Could not find or create user.")
|
| 968 |
+
st.session_state.handling_vote = False # lock
|
| 969 |
|
| 970 |
# Get next comment
|
| 971 |
# This should always get the next unvoted comment for the user in this topic.
|
|
|
|
| 978 |
update_user_progress(user_id, topic_id, next_comment_id)
|
| 979 |
|
| 980 |
st.session_state._voting_in_progress = False
|
| 981 |
+
if st.session_state.get("handling_vote", False) is False:
|
| 982 |
+
st.rerun() # Rerun to update UI
|
| 983 |
|
| 984 |
except Exception as e:
|
| 985 |
st.error(f"Error processing vote: {e}")
|