Spaces:

vedant2905
/

Salient_3_problems

Build error

App Files Files Community

vedant2905 commited on May 6, 2025

Commit

e795f9f

verified ·

1 Parent(s): 612f59a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +54 -61

src/streamlit_app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import streamlit as st
 import pandas as pd
 import numpy as np
 import os
-import re
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 from collections import Counter, defaultdict
@@ -316,68 +315,62 @@ def main():
             token_instances = predictions_df[predictions_df['Token'] == selected_token]
             if not token_instances.empty:
-                # Create container for token info
-                with st.container():
-                    # Display token info with styling
-                    st.markdown(f'<h2 class="section-header">Salient Token: <span class="highlight-token">{selected_token}</span></h2>',
-                               unsafe_allow_html=True)
-                    # Get most frequent cluster (Top 1) for this token
-                    top_cluster = token_instances['Top 1'].value_counts().index[0]
-                    # Display primary cluster
-                    col1, = st.columns(1)
-                    with col1:
-                        st.metric("Primary Cluster", top_cluster)
-                    st.markdown("<hr>", unsafe_allow_html=True)
-                    # Check if token is salient (has position_idx = -1)
-                    is_salient = any(token_instances['position_idx'] == -1)
-                    if is_salient:  # If position_idx is -1, this is a salient token
-                        st.subheader("Original Sentence Context")
-                        dev_sentences = load_dev_sentences()
-                        if dev_sentences:
-                            line_numbers = token_instances[token_instances['position_idx'] == -1]['line_idx'].unique()
-                            for line_num in line_numbers:
-                                if 0 <= line_num < len(dev_sentences):
-                                    st.code(dev_sentences[line_num], language="java")
-                        else:
-                            st.warning("Could not load sentences from dev.in")
-                    elif not selected_token.startswith("[CLS]"):  # Show wordcloud only for non-CLS tokens
-                        # Word cloud visualization for non-salient, non-CLS tokens
-                        unique_tokens = set(token for token, _ in clusters[top_cluster])
-                        st.subheader("Tokens in Predicted Cluster")
-                        if unique_tokens:
-                            fig = create_wordcloud(unique_tokens)
-                            if fig:
-                                st.pyplot(fig)
-                                plt.close(fig)
                         else:
                             st.info("No tokens found in this cluster")
-                    # Only show cluster statistics for non-[CLS] tokens (including numbered ones)
-                    if not selected_token.startswith("[CLS]"):
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            # Show cluster statistics
-                            unique_tokens = len(set(token for token, _ in clusters[top_cluster]))
-                            total_occurrences = len(clusters[top_cluster])
-                            st.metric("Unique Tokens in Cluster", unique_tokens)
-                        with col2:
-                            st.metric("Total Token Occurrences", total_occurrences)
-                    # Show all contexts from predicted cluster in an expander
-                    with st.expander("👀 View Contexts (from Predicted Cluster)", expanded=False):
-                        cluster_contexts = [(token, line_num) for token, line_num in clusters[top_cluster]
-                                         if 0 <= line_num - 1 < len(sentences)]
-                        if cluster_contexts:
-                            for token, line_num in cluster_contexts:
-                                st.code(f"{sentences[line_num - 1]}", language="python")
-                        else:
-                            st.info("No contexts found in this cluster")
             else:
                 st.warning(f"No instances found for token: {selected_token}")
         else:

 import pandas as pd
 import numpy as np
 import os
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 from collections import Counter, defaultdict
             token_instances = predictions_df[predictions_df['Token'] == selected_token]
             if not token_instances.empty:
+                # Display token info with styling
+                st.markdown(f'<h2 class="section-header">Salient Token: <span class="highlight-token">{selected_token}</span></h2>',
+                          unsafe_allow_html=True)
+                # Get most frequent cluster (Top 1) for this token
+                top_cluster = token_instances['Top 1'].value_counts().index[0]
+                # Display primary cluster
+                st.metric("Primary Cluster", top_cluster)
+                st.markdown("<hr>", unsafe_allow_html=True)
+                # Check if token is salient (has position_idx = -1)
+                is_salient = any(token_instances['position_idx'] == -1)
+                if is_salient:  # If position_idx is -1, this is a salient token
+                    st.subheader("Original Sentence Context")
+                    dev_sentences = load_dev_sentences()
+                    if dev_sentences:
+                        line_numbers = token_instances[token_instances['position_idx'] == -1]['line_idx'].unique()
+                        for line_num in line_numbers:
+                            if 0 <= line_num < len(dev_sentences):
+                                st.code(dev_sentences[line_num], language="java")
+                    else:
+                        st.warning("Could not load sentences from dev.in")
+                elif not selected_token.startswith("[CLS]"):
+                    # Word cloud visualization for non-salient, non-CLS tokens
+                    unique_tokens = set(token for token, _ in clusters[top_cluster])
+                    st.subheader("Tokens in Predicted Cluster")
+                    if unique_tokens:
+                        fig = create_wordcloud(unique_tokens)
+                        if fig:
+                            st.pyplot(fig)
+                            plt.close(fig)
                         else:
                             st.info("No tokens found in this cluster")
+                # Only show cluster statistics for non-[CLS] tokens
+                if not selected_token.startswith("[CLS]"):
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        unique_tokens = len(set(token for token, _ in clusters[top_cluster]))
+                        st.metric("Unique Tokens in Cluster", unique_tokens)
+                    with col2:
+                        total_occurrences = len(clusters[top_cluster])
+                        st.metric("Total Token Occurrences", total_occurrences)
+                # Show contexts from predicted cluster in an expander
+                with st.expander("👀 View Similar Contexts (from Predicted Cluster)", expanded=False):
+                    cluster_contexts = [(token, line_num) for token, line_num in clusters[top_cluster]
+                                     if 0 <= line_num - 1 < len(sentences)]
+                    if cluster_contexts:
+                        for token, line_num in cluster_contexts:
+                            st.code(f"{sentences[line_num - 1]}", language="python")
+                    else:
+                        st.info("No contexts found in this cluster")
             else:
                 st.warning(f"No instances found for token: {selected_token}")
         else: