Spaces:

UFOSINT
/

UAP-Data-Analysis-Tool

Sleeping

App Files Files Community

Ashoka74 commited on Jul 15, 2024

Commit

ea97c58

verified ·

1 Parent(s): 3f33ebc

Update analyzing.py

Browse files

Files changed (1) hide show

analyzing.py +82 -80

analyzing.py CHANGED Viewed

@@ -594,88 +594,90 @@ st.session_state['stage'] = 1
 if st.session_state['stage'] > 0 :
-    columns_to_analyze = st.multiselect(
-        label='Select columns to analyze',
-        options=st.session_state['parsed_responses'].columns
-    )
-    if columns_to_analyze:
-        analyzers = []
-        col_names = []
-        clusters = {}
-        for column in columns_to_analyze:
-            with torch.no_grad():
-                with st.status(f"Processing {column}", expanded=True) as status:
-                    analyzer = UAPAnalyzer(st.session_state['parsed_responses'], column)
-                    st.write(f"Processing {column}...")
-                    analyzer.preprocess_data(top_n=32)
-                    st.write("Reducing dimensionality...")
-                    analyzer.reduce_dimensionality(method='UMAP', n_components=2, n_neighbors=15, min_dist=0.1)
-                    st.write("Clustering data...")
-                    analyzer.cluster_data(method='HDBSCAN', min_cluster_size=15)
-                    analyzer.get_tf_idf_clusters(top_n=3)
-                    st.write("Naming clusters...")
-                    analyzers.append(analyzer)
-                    col_names.append(column)
-                    clusters[column] = analyzer.merge_similar_clusters(cluster_terms=analyzer.__dict__['cluster_terms'], cluster_labels=analyzer.__dict__['cluster_labels'])
-                    # Run the visualization
-                    # fig = datamapplot.create_plot(
-                    #     analyzer.__dict__['reduced_embeddings'],
-                    #     analyzer.__dict__['cluster_labels'].astype(str),
-                    #     #label_font_size=11,
-                    #     label_wrap_width=20,
-                    #     use_medoids=True,
-                    # )#.to_html(full_html=False, include_plotlyjs='cdn')
-                    # st.pyplot(fig.savefig())
-                    status.update(label=f"Processing {column} complete", expanded=False)
-        st.session_state['analyzers'] = analyzers
-        st.session_state['col_names'] = col_names
-        st.session_state['clusters'] = clusters
-        # save space
-        parsed = None
-        analyzers = None
-        col_names = None
-        clusters = None
-        if st.session_state['clusters'] is not None:
-            try:
-                new_data, parsed_responses = analyze_and_predict(st.session_state['parsed_responses'], st.session_state['analyzers'], st.session_state['col_names'], st.session_state['clusters'])
-                st.session_state['dataset'] = parsed_responses
-                st.session_state['new_data'] = new_data
-                st.session_state['data_processed'] = True
-            except Exception as e:
-                st.write(f"Error processing data: {e}")
-        if st.session_state['data_processed']:
-            try:
-                visualizer = UAPVisualizer(data=st.session_state['new_data'])
-                #new_data = pd.DataFrame()  # Assuming new_data is prepared earlier in the code
-                fig2 = visualizer.plot_cramers_v_heatmap(data=st.session_state['new_data'], significance_level=0.05)
-                with st.status(f"Cramer's V Chart", expanded=True) as statuss:
-                    st.pyplot(fig2)
-                    statuss.update(label="Cramer's V chart plotted", expanded=False)
-            except Exception as e:
-                st.write(f"Error plotting Cramers V: {e}")
-            for i, column in enumerate(st.session_state['col_names']):
-                #if stateful_button(f"Show {column} clusters {i}", key=f"show_{column}_clusters"):
-                # if st.session_state['data_processed']:
-                #     with st.status(f"Show clusters {column}", expanded=True) as stats:
-                #         fig3 = st.session_state['analyzers'][i].plot_embeddings4(title=f"{column} clusters", cluster_terms=st.session_state['analyzers'][i].__dict__['cluster_terms'], cluster_labels=st.session_state['analyzers'][i].__dict__['cluster_labels'], reduced_embeddings=st.session_state['analyzers'][i].__dict__['reduced_embeddings'], column=f'Analyzer_{column}', data=st.session_state['new_data'])
-                #         stats.update(label=f"Show clusters {column} complete", expanded=False)
                 if st.session_state['data_processed']:
-                    with st.status(f"Show clusters {column}", expanded=True) as stats:
-                        fig3 = st.session_state['analyzers'][i].plot_embeddings4(
-                            title=f"{column} clusters",
-                            cluster_terms=st.session_state['analyzers'][i].__dict__['cluster_terms'],
-                            cluster_labels=st.session_state['analyzers'][i].__dict__['cluster_labels'],
-                            reduced_embeddings=st.session_state['analyzers'][i].__dict__['reduced_embeddings'],
-                            column=column,  # Use the original column name here
-                            data=st.session_state['parsed_responses']  # Use the original dataset here
-                        )
-                        stats.update(label=f"Show clusters {column} complete", expanded=False)
-                st.session_state['analysis_complete'] = True
 # this will check if the dataframe is not empty

 if st.session_state['stage'] > 0 :
+    with st.form(border=True, key='Select Columns for Analysis'):
+        columns_to_analyze = st.multiselect(
+            label='Select columns to analyze',
+            options=st.session_state['parsed_responses'].columns
+        )
+        if st.form_submit_button("Process Data"):
+            if columns_to_analyze:
+                analyzers = []
+                col_names = []
+                clusters = {}
+                for column in columns_to_analyze:
+                    with torch.no_grad():
+                        with st.status(f"Processing {column}", expanded=True) as status:
+                            analyzer = UAPAnalyzer(st.session_state['parsed_responses'], column)
+                            st.write(f"Processing {column}...")
+                            analyzer.preprocess_data(top_n=32)
+                            st.write("Reducing dimensionality...")
+                            analyzer.reduce_dimensionality(method='UMAP', n_components=2, n_neighbors=15, min_dist=0.1)
+                            st.write("Clustering data...")
+                            analyzer.cluster_data(method='HDBSCAN', min_cluster_size=15)
+                            analyzer.get_tf_idf_clusters(top_n=3)
+                            st.write("Naming clusters...")
+                            analyzers.append(analyzer)
+                            col_names.append(column)
+                            clusters[column] = analyzer.merge_similar_clusters(cluster_terms=analyzer.__dict__['cluster_terms'], cluster_labels=analyzer.__dict__['cluster_labels'])
+                            # Run the visualization
+                            # fig = datamapplot.create_plot(
+                            #     analyzer.__dict__['reduced_embeddings'],
+                            #     analyzer.__dict__['cluster_labels'].astype(str),
+                            #     #label_font_size=11,
+                            #     label_wrap_width=20,
+                            #     use_medoids=True,
+                            # )#.to_html(full_html=False, include_plotlyjs='cdn')
+                            # st.pyplot(fig.savefig())
+                            status.update(label=f"Processing {column} complete", expanded=False)
+                st.session_state['analyzers'] = analyzers
+                st.session_state['col_names'] = col_names
+                st.session_state['clusters'] = clusters
+                # save space
+                parsed = None
+                analyzers = None
+                col_names = None
+                clusters = None
+                if st.session_state['clusters'] is not None:
+                    try:
+                        new_data, parsed_responses = analyze_and_predict(st.session_state['parsed_responses'], st.session_state['analyzers'], st.session_state['col_names'], st.session_state['clusters'])
+                        st.session_state['dataset'] = parsed_responses
+                        st.session_state['new_data'] = new_data
+                        st.session_state['data_processed'] = True
+                    except Exception as e:
+                        st.write(f"Error processing data: {e}")
                 if st.session_state['data_processed']:
+                    try:
+                        visualizer = UAPVisualizer(data=st.session_state['new_data'])
+                        #new_data = pd.DataFrame()  # Assuming new_data is prepared earlier in the code
+                        fig2 = visualizer.plot_cramers_v_heatmap(data=st.session_state['new_data'], significance_level=0.05)
+                        with st.status(f"Cramer's V Chart", expanded=True) as statuss:
+                            st.pyplot(fig2)
+                            statuss.update(label="Cramer's V chart plotted", expanded=False)
+                    except Exception as e:
+                        st.write(f"Error plotting Cramers V: {e}")
+                    for i, column in enumerate(st.session_state['col_names']):
+                        #if stateful_button(f"Show {column} clusters {i}", key=f"show_{column}_clusters"):
+                        # if st.session_state['data_processed']:
+                        #     with st.status(f"Show clusters {column}", expanded=True) as stats:
+                        #         fig3 = st.session_state['analyzers'][i].plot_embeddings4(title=f"{column} clusters", cluster_terms=st.session_state['analyzers'][i].__dict__['cluster_terms'], cluster_labels=st.session_state['analyzers'][i].__dict__['cluster_labels'], reduced_embeddings=st.session_state['analyzers'][i].__dict__['reduced_embeddings'], column=f'Analyzer_{column}', data=st.session_state['new_data'])
+                        #         stats.update(label=f"Show clusters {column} complete", expanded=False)
+                        if st.session_state['data_processed']:
+                            with st.status(f"Show clusters {column}", expanded=True) as stats:
+                                fig3 = st.session_state['analyzers'][i].plot_embeddings4(
+                                    title=f"{column} clusters",
+                                    cluster_terms=st.session_state['analyzers'][i].__dict__['cluster_terms'],
+                                    cluster_labels=st.session_state['analyzers'][i].__dict__['cluster_labels'],
+                                    reduced_embeddings=st.session_state['analyzers'][i].__dict__['reduced_embeddings'],
+                                    column=column,  # Use the original column name here
+                                    data=st.session_state['parsed_responses']  # Use the original dataset here
+                                )
+                                stats.update(label=f"Show clusters {column} complete", expanded=False)
+                        st.session_state['analysis_complete'] = True
 # this will check if the dataframe is not empty