Spaces:
Sleeping
Sleeping
fix bugs
Browse files- src/streamlit_app.py +52 -31
src/streamlit_app.py
CHANGED
|
@@ -320,7 +320,8 @@ def get_ttl_hash(seconds=360):
|
|
| 320 |
def get_r_matrix_from_votes():
|
| 321 |
local_con = None
|
| 322 |
try:
|
| 323 |
-
|
|
|
|
| 324 |
|
| 325 |
# Fetch all vote data
|
| 326 |
# fetchdf requires pandas
|
|
@@ -504,58 +505,71 @@ def get_user_cluster_label(user_id, ttl_hash=None):
|
|
| 504 |
|
| 505 |
|
| 506 |
# Helper function to get top k most polarized comments for a list of users
|
| 507 |
-
def
|
| 508 |
"""
|
| 509 |
-
Retrieves the top k comments
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
Args:
|
| 513 |
user_ids (list[str]): A list of user IDs.
|
|
|
|
| 514 |
k (int): The number of top comments to retrieve.
|
| 515 |
|
| 516 |
Returns:
|
| 517 |
list[tuple]: A list of tuples, where each tuple contains
|
| 518 |
-
(comment_id, comment_content,
|
| 519 |
-
ordered by
|
| 520 |
Returns an empty list if no votes are found for these users
|
| 521 |
-
or on error.
|
| 522 |
"""
|
| 523 |
-
if not user_ids:
|
| 524 |
-
#
|
| 525 |
-
|
|
|
|
| 526 |
|
| 527 |
local_con = None
|
| 528 |
try:
|
| 529 |
local_con = duckdb.connect(database=DB_PATH, read_only=True)
|
| 530 |
|
| 531 |
-
# Use parameterized query for the list of user IDs
|
| 532 |
# DuckDB's Python API handles lists for IN clauses
|
| 533 |
query = """
|
| 534 |
SELECT
|
| 535 |
v.comment_id,
|
| 536 |
c.content,
|
| 537 |
-
|
| 538 |
WHEN v.vote_type = 'agree' THEN 1.0
|
| 539 |
WHEN v.vote_type = 'neutral' THEN 0.0
|
| 540 |
WHEN v.vote_type = 'disagree' THEN -1.0
|
| 541 |
ELSE NULL -- Should not happen with current data
|
| 542 |
-
END) as
|
|
|
|
| 543 |
FROM votes v
|
| 544 |
JOIN comments c ON v.comment_id = c.id
|
| 545 |
-
WHERE v.user_id IN (?)
|
| 546 |
GROUP BY v.comment_id, c.content
|
| 547 |
-
HAVING COUNT(v.user_id)
|
| 548 |
-
ORDER BY
|
| 549 |
LIMIT ?
|
| 550 |
"""
|
| 551 |
-
# Pass the list of user_ids and k as parameters
|
| 552 |
-
|
|
|
|
| 553 |
|
| 554 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
|
| 556 |
except Exception as e:
|
| 557 |
# st.error is not available here, just print or log
|
| 558 |
-
print(f"Error getting top k
|
| 559 |
return [] # Return empty list on error
|
| 560 |
finally:
|
| 561 |
if local_con:
|
|
@@ -591,17 +605,20 @@ def estimate_group_voting_diversity(user_ids, topic_id):
|
|
| 591 |
|
| 592 |
# Get all votes for the given topic by the specified users
|
| 593 |
# Join with comments to filter by topic_id
|
| 594 |
-
|
|
|
|
|
|
|
| 595 |
SELECT
|
| 596 |
v.comment_id,
|
| 597 |
v.user_id,
|
| 598 |
v.vote_type
|
| 599 |
FROM votes v
|
| 600 |
JOIN comments c ON v.comment_id = c.id
|
| 601 |
-
WHERE c.topic_id = ? AND v.user_id IN (
|
| 602 |
"""
|
| 603 |
-
#
|
| 604 |
-
|
|
|
|
| 605 |
|
| 606 |
if not results:
|
| 607 |
return 0.0 # No votes found for this group on this topic
|
|
@@ -643,7 +660,6 @@ def estimate_group_voting_diversity(user_ids, topic_id):
|
|
| 643 |
if local_con:
|
| 644 |
local_con.close()
|
| 645 |
|
| 646 |
-
|
| 647 |
# Helper function to name a group of users based on their participation and voting diversity
|
| 648 |
def name_user_group(user_ids, topic_id):
|
| 649 |
"""
|
|
@@ -805,8 +821,10 @@ def get_random_unvoted_comment(user_id, topic_id):
|
|
| 805 |
if current_label is not None and previous_label is not None and current_label != previous_label:
|
| 806 |
if current_users_set != previous_users_set:
|
| 807 |
# Set a flag in session state to display the message later in the main rendering logic
|
|
|
|
|
|
|
| 808 |
st.session_state._show_new_area_message = True
|
| 809 |
-
new_area_comments =
|
| 810 |
st.session_state._new_area_comments = new_area_comments
|
| 811 |
# print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
|
| 812 |
# print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
|
|
@@ -1157,7 +1175,7 @@ def view_topic_page():
|
|
| 1157 |
st.markdown(random.choice(prompts))
|
| 1158 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
| 1159 |
st.session_state.handling_vote = True # lock
|
| 1160 |
-
if st.button("Share
|
| 1161 |
if new_comment_text and len(new_comment_text.strip()):
|
| 1162 |
user_email = st.session_state.get('user_email', '')
|
| 1163 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
|
@@ -1296,11 +1314,14 @@ if 'comment_history' not in st.session_state:
|
|
| 1296 |
if 'processed_url_params' not in st.session_state:
|
| 1297 |
st.session_state.processed_url_params = False # Add flag initialization
|
| 1298 |
|
| 1299 |
-
# Initialize the database
|
| 1300 |
-
|
| 1301 |
-
|
|
|
|
| 1302 |
add_dummy_topic()
|
| 1303 |
-
st.session_state.
|
|
|
|
|
|
|
| 1304 |
|
| 1305 |
# Handle initial load from URL query parameters
|
| 1306 |
# Process only once per session load using the flag
|
|
|
|
| 320 |
def get_r_matrix_from_votes():
|
| 321 |
local_con = None
|
| 322 |
try:
|
| 323 |
+
# Use read_only=False to maintain consistent configuration across all connections
|
| 324 |
+
local_con = duckdb.connect(database=DB_PATH, read_only=False)
|
| 325 |
|
| 326 |
# Fetch all vote data
|
| 327 |
# fetchdf requires pandas
|
|
|
|
| 505 |
|
| 506 |
|
| 507 |
# Helper function to get top k most polarized comments for a list of users
|
| 508 |
+
def get_top_k_consensus_comments_for_users(user_ids, topic_id, k=5):
|
| 509 |
"""
|
| 510 |
+
Retrieves the top k comments with the highest voting consensus (lowest variance)
|
| 511 |
+
among a given list of users *for a specific topic*.
|
| 512 |
+
|
| 513 |
+
Consensus is measured by the population variance (VAR_POP) of numerical
|
| 514 |
+
vote scores (-1 for 'disagree', 0 for 'neutral', 1 for 'agree').
|
| 515 |
+
Lower variance indicates higher consensus.
|
| 516 |
|
| 517 |
Args:
|
| 518 |
user_ids (list[str]): A list of user IDs.
|
| 519 |
+
topic_id (str): The ID of the topic to filter comments by.
|
| 520 |
k (int): The number of top comments to retrieve.
|
| 521 |
|
| 522 |
Returns:
|
| 523 |
list[tuple]: A list of tuples, where each tuple contains
|
| 524 |
+
(comment_id, comment_content, vote_variance),
|
| 525 |
+
ordered by vote_variance ascending (lowest variance first).
|
| 526 |
Returns an empty list if no votes are found for these users
|
| 527 |
+
on this topic, or on error, or if the group has fewer than 2 users.
|
| 528 |
"""
|
| 529 |
+
if not user_ids or len(user_ids) < 2:
|
| 530 |
+
# Need at least 2 users from the group to calculate meaningful variance
|
| 531 |
+
# print("Warning: get_top_k_consensus_comments_for_users called with fewer than 2 user_ids.") # Optional debug
|
| 532 |
+
return [] # Cannot query without user IDs or with only one user
|
| 533 |
|
| 534 |
local_con = None
|
| 535 |
try:
|
| 536 |
local_con = duckdb.connect(database=DB_PATH, read_only=True)
|
| 537 |
|
| 538 |
+
# Use parameterized query for the list of user IDs and topic ID
|
| 539 |
# DuckDB's Python API handles lists for IN clauses
|
| 540 |
query = """
|
| 541 |
SELECT
|
| 542 |
v.comment_id,
|
| 543 |
c.content,
|
| 544 |
+
VAR_POP(CASE
|
| 545 |
WHEN v.vote_type = 'agree' THEN 1.0
|
| 546 |
WHEN v.vote_type = 'neutral' THEN 0.0
|
| 547 |
WHEN v.vote_type = 'disagree' THEN -1.0
|
| 548 |
ELSE NULL -- Should not happen with current data
|
| 549 |
+
END) as vote_variance,
|
| 550 |
+
COUNT(v.user_id) as num_votes_in_group -- Include count for potential tie-breaking
|
| 551 |
FROM votes v
|
| 552 |
JOIN comments c ON v.comment_id = c.id
|
| 553 |
+
WHERE v.user_id IN (?) AND c.topic_id = ? -- Filter by user IDs and topic ID
|
| 554 |
GROUP BY v.comment_id, c.content
|
| 555 |
+
HAVING COUNT(v.user_id) >= 2 -- Ensure at least 2 users from the list voted on this comment
|
| 556 |
+
ORDER BY vote_variance ASC, num_votes_in_group DESC -- Order by lowest variance, then by number of votes (more votes = stronger consensus)
|
| 557 |
LIMIT ?
|
| 558 |
"""
|
| 559 |
+
# Pass the list of user_ids, topic_id, and k as parameters
|
| 560 |
+
# DuckDB requires list parameters to be wrapped in a list/tuple for the execute method
|
| 561 |
+
result = local_con.execute(query, [user_ids, topic_id, k]).fetchall()
|
| 562 |
|
| 563 |
+
# The result includes comment_id, content, variance, and count.
|
| 564 |
+
# We only need comment_id, content, and variance for the return value as per docstring.
|
| 565 |
+
# The count was used for ordering.
|
| 566 |
+
formatted_result = [(row[0], row[1], row[2]) for row in result]
|
| 567 |
+
|
| 568 |
+
return formatted_result
|
| 569 |
|
| 570 |
except Exception as e:
|
| 571 |
# st.error is not available here, just print or log
|
| 572 |
+
print(f"Error getting top k consensus comments for users {user_ids} in topic {topic_id}: {e}")
|
| 573 |
return [] # Return empty list on error
|
| 574 |
finally:
|
| 575 |
if local_con:
|
|
|
|
| 605 |
|
| 606 |
# Get all votes for the given topic by the specified users
|
| 607 |
# Join with comments to filter by topic_id
|
| 608 |
+
# Construct the IN clause dynamically to avoid the conversion error
|
| 609 |
+
placeholders = ', '.join(['?'] * len(user_ids_tuple))
|
| 610 |
+
query = f"""
|
| 611 |
SELECT
|
| 612 |
v.comment_id,
|
| 613 |
v.user_id,
|
| 614 |
v.vote_type
|
| 615 |
FROM votes v
|
| 616 |
JOIN comments c ON v.comment_id = c.id
|
| 617 |
+
WHERE c.topic_id = ? AND v.user_id IN ({placeholders})
|
| 618 |
"""
|
| 619 |
+
# Pass topic_id and then all user_ids as separate parameters
|
| 620 |
+
params = [topic_id] + list(user_ids_tuple) # Combine topic_id and user_ids
|
| 621 |
+
results = local_con.execute(query, params).fetchall()
|
| 622 |
|
| 623 |
if not results:
|
| 624 |
return 0.0 # No votes found for this group on this topic
|
|
|
|
| 660 |
if local_con:
|
| 661 |
local_con.close()
|
| 662 |
|
|
|
|
| 663 |
# Helper function to name a group of users based on their participation and voting diversity
|
| 664 |
def name_user_group(user_ids, topic_id):
|
| 665 |
"""
|
|
|
|
| 821 |
if current_label is not None and previous_label is not None and current_label != previous_label:
|
| 822 |
if current_users_set != previous_users_set:
|
| 823 |
# Set a flag in session state to display the message later in the main rendering logic
|
| 824 |
+
print("st.session_state._show_new_area_message = True")
|
| 825 |
+
print("st.session_state._show_new_area_message = True")
|
| 826 |
st.session_state._show_new_area_message = True
|
| 827 |
+
new_area_comments = get_top_k_consensus_comments_for_users(current_users_set, topic_id, k=5)
|
| 828 |
st.session_state._new_area_comments = new_area_comments
|
| 829 |
# print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
|
| 830 |
# print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
|
|
|
|
| 1175 |
st.markdown(random.choice(prompts))
|
| 1176 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
| 1177 |
st.session_state.handling_vote = True # lock
|
| 1178 |
+
if st.button("Share Wisdom"):
|
| 1179 |
if new_comment_text and len(new_comment_text.strip()):
|
| 1180 |
user_email = st.session_state.get('user_email', '')
|
| 1181 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
|
|
|
| 1314 |
if 'processed_url_params' not in st.session_state:
|
| 1315 |
st.session_state.processed_url_params = False # Add flag initialization
|
| 1316 |
|
| 1317 |
+
# Initialize the database and add dummy data only once per session
|
| 1318 |
+
if st.session_state.get("db_initialized", False) is False:
|
| 1319 |
+
print("INFO: Initializing database and adding dummy data...") # Optional: Info message
|
| 1320 |
+
initialize_database()
|
| 1321 |
add_dummy_topic()
|
| 1322 |
+
st.session_state.db_initialized = True
|
| 1323 |
+
print("INFO: Database initialization complete.") # Optional: Info message
|
| 1324 |
+
|
| 1325 |
|
| 1326 |
# Handle initial load from URL query parameters
|
| 1327 |
# Process only once per session load using the flag
|