Spaces:

ImagingDataCommons
/

CloudSegmentatorResults

Sleeping

App Files Files Community

Vamsi Thiriveedhi commited on May 24, 2024

Commit

c6d0240

1 Parent(s): 6c72b9f

Add large files tracked with Git LFS

Browse files

Files changed (4) hide show

.gitattributes +1 -0
filter_data_app.py +121 -63
qual-checks-and-quant-values.parquet +3 -0
requirements.txt +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+qual-checks-and-quant-values.parquet filter=lfs diff=lfs merge=lfs -text

filter_data_app.py CHANGED Viewed

@@ -1,42 +1,49 @@
 import streamlit as st
 import duckdb
-import os
 import requests
 import pandas as pd
 from upsetplot import UpSet
 import matplotlib.pyplot as plt
 # Set page configuration
 st.set_page_config(layout="wide")
 # URL and local path to the Parquet file
 PARQUET_URL = 'https://github.com/vkt1414/idc-index-data/releases/download/0.1/qualitative_checks.parquet'
-LOCAL_PARQUET_FILE = 'qualitative_checks.parquet'
-# Function to download the Parquet file if it doesn't exist
-def download_parquet(url, local_path):
-    if not os.path.exists(local_path):
-        response = requests.get(url)
-        with open(local_path, 'wb') as file:
-            file.write(response.content)
-        st.write(f"Downloaded {local_path}")
 @st.cache_data
 def load_data():
-    return duckdb.query(f"SELECT *, concat('https://viewer.imaging.datacommons.cancer.gov/viewer/', StudyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
 # Function to filter data based on user input
 def filter_data(df, filters):
     for col, value in filters.items():
         if value:
-            df = df[df[col] == value]
     return df
 # Function to create an UpSet plot for failed checks
 def create_upset_plot_failures(df):
-    df = df.set_index(df['segmentation_completeness'] != 'pass').set_index(df['laterality_check'] != 'pass', append=True)
-    df = df.set_index(df['series_with_vertabra_on_every_slice'] != 'pass', append=True)
-    df = df.set_index(df['connected_volumes'] != 'pass', append=True)
     df = df[df.index.to_frame().any(axis=1)]  # Ignore the case when all conditions are false
     fig = plt.figure()
@@ -46,6 +53,7 @@ def create_upset_plot_failures(df):
 # Function to create an UpSet plot for passed checks
 def create_upset_plot_passes(df):
     df = df.set_index(df['segmentation_completeness'] == 'pass').set_index(df['laterality_check'] == 'pass', append=True)
     df = df.set_index(df['series_with_vertabra_on_every_slice'] == 'pass', append=True)
     df = df.set_index(df['connected_volumes'] == 'pass', append=True)
@@ -63,50 +71,99 @@ def main():
     # Sidebar widgets for navigation and filtering
     page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
-    # Download the Parquet file if it doesn't exist
-    download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
     # Load the data
     df = load_data()
-    # Preselect one combination for bodyPart based on the first row
-    initial_body_part = df.iloc[0]['bodyPart']
-    with st.sidebar:
-        st.title("Filters")
-        # Body part filter
-        body_part_options = df['bodyPart'].unique().tolist()
-        body_part = st.selectbox("Body Part", options=body_part_options, index=body_part_options.index(initial_body_part))
-        # Filter the dataframe based on the selected body part
-        filtered_df = df[df['bodyPart'] == body_part]
-        # Fetch unique values for other filters based on the filtered dataframe
-        segmentation_completeness_options = [""] + filtered_df['segmentation_completeness'].unique().tolist()
-        laterality_check_options = [""] + filtered_df['laterality_check'].unique().tolist()
-        series_with_vertabra_on_every_slice_options = [""] + filtered_df['series_with_vertabra_on_every_slice'].unique().tolist()
-        connected_volumes_options = [""] + filtered_df['connected_volumes'].unique().tolist()
-        laterality_options = [""] + filtered_df['laterality'].unique().tolist()
-        segmentation_completeness = st.selectbox("Segmentation Completeness", options=segmentation_completeness_options)
-        laterality_check = st.selectbox("Laterality Check", options=laterality_check_options)
-        series_with_vertabra_on_every_slice = st.selectbox("Series with Vertebra on Every Slice", options=series_with_vertabra_on_every_slice_options)
-        connected_volumes = st.selectbox("Connected Volumes", options=connected_volumes_options)
-        laterality = st.selectbox("Laterality", options=laterality_options)
-    # Filtering the data based on user input
-    filters = {
-        'segmentation_completeness': segmentation_completeness if segmentation_completeness else None,
-        'laterality_check': laterality_check if laterality_check else None,
-        'series_with_vertabra_on_every_slice': series_with_vertabra_on_every_slice if series_with_vertabra_on_every_slice else None,
-        'connected_volumes': connected_volumes if connected_volumes else None,
-        'bodyPart': body_part,
-        'laterality': laterality if laterality else None
-    }
-    filtered_df = filter_data(df, filters)
     # Define the pages
     if page == "Summary":
@@ -116,30 +173,34 @@ def main():
             WITH Checks AS (
                 SELECT
                     bodyPart,
                     COUNT(*) AS total_count,
                     SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
                     SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
                     SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
                     SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
                 FROM
-                    'qualitative_checks.parquet'
                 GROUP BY
-                    bodyPart
             )
             SELECT
                 bodyPart,
                 ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
-                ROUND((laterality_pass_count * 100.0) / total_count, 2) || '% (' || laterality_pass_count || '/' || total_count || ')' AS laterality_check,
                 ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
                 ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
             FROM
                 Checks
             ORDER BY
-                bodyPart;
-        """).to_df()
-        st.dataframe(summary_df)
     elif page == "UpSet Plots":
         st.header("UpSet Plots of Qualitative Checks")
@@ -161,7 +222,7 @@ def main():
         start_idx = (page_number - 1) * page_size
         end_idx = min(start_idx + page_size, len(filtered_df))  # Ensure end_idx does not go beyond the dataframe length
-        paginated_df = filtered_df.iloc[start_idx:end_idx]
         # Display the paginated dataframe
         st.header("Filtered Data")
@@ -169,9 +230,6 @@ def main():
         st.data_editor(
             paginated_df,
-            column_config={
-                "viewerUrl": st.column_config.LinkColumn("Viewer Url")
-            },
             hide_index=True,
         )
@@ -184,13 +242,13 @@ def main():
         # Create and display the UpSet plot for failed checks
         st.header("UpSet Plot for Failed Checks")
         st.write("This plot shows the combinations of checks that failed.")
-        if not filtered_df.empty:
             create_upset_plot_failures(filtered_df)
         # Create and display the UpSet plot for passed checks
         st.header("UpSet Plot for Passed Checks")
         st.write("This plot shows the combinations of checks that passed.")
-        if not filtered_df.empty:
             create_upset_plot_passes(filtered_df)
 if __name__ == "__main__":

 import streamlit as st
 import duckdb
 import requests
 import pandas as pd
 from upsetplot import UpSet
 import matplotlib.pyplot as plt
+import polars as pl
 # Set page configuration
 st.set_page_config(layout="wide")
 # URL and local path to the Parquet file
 PARQUET_URL = 'https://github.com/vkt1414/idc-index-data/releases/download/0.1/qualitative_checks.parquet'
+LOCAL_PARQUET_FILE = 'qual-checks-and-quant-values.parquet'
 @st.cache_data
 def load_data():
+    cols = [
+        'PatientID',
+        'StudyInstanceUID',
+        'seriesNumber',
+        'bodyPart',
+        'laterality',
+        'segmentation_completeness',
+        'laterality_check',
+        'series_with_vertabra_on_every_slice',
+        'connected_volumes',
+        'Volume from Voxel Summation'
+    ]
+    return pl.read_parquet(LOCAL_PARQUET_FILE, columns=cols)
 # Function to filter data based on user input
 def filter_data(df, filters):
     for col, value in filters.items():
         if value:
+            df = df.filter(pl.col(col) == value)
     return df
 # Function to create an UpSet plot for failed checks
 def create_upset_plot_failures(df):
+    df = df.to_pandas()  # Convert to Pandas DataFrame
+    # Treat 'pass' and null values as passing
+    df = df.set_index(~((df['segmentation_completeness'] == 'pass') | df['segmentation_completeness'].isnull())).set_index(~((df['laterality_check'] == 'pass') | df['laterality_check'].isnull()), append=True)
+    df = df.set_index(~((df['series_with_vertabra_on_every_slice'] == 'pass') | df['series_with_vertabra_on_every_slice'].isnull()), append=True)
+    df = df.set_index(~((df['connected_volumes'] == 'pass') | df['connected_volumes'].isnull()), append=True)
     df = df[df.index.to_frame().any(axis=1)]  # Ignore the case when all conditions are false
     fig = plt.figure()
 # Function to create an UpSet plot for passed checks
 def create_upset_plot_passes(df):
+    df = df.to_pandas()  # Convert to Pandas DataFrame
     df = df.set_index(df['segmentation_completeness'] == 'pass').set_index(df['laterality_check'] == 'pass', append=True)
     df = df.set_index(df['series_with_vertabra_on_every_slice'] == 'pass', append=True)
     df = df.set_index(df['connected_volumes'] == 'pass', append=True)
     # Sidebar widgets for navigation and filtering
     page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
     # Load the data
     df = load_data()
+    if page == "UpSet Plots":
+        with st.sidebar:
+            st.title("Filters")
+            # Initialize filters with None values in session state
+            if 'filters' not in st.session_state:
+                st.session_state.filters = {
+                    'bodyPart': None,
+                    'segmentation_completeness': None,
+                    'laterality_check': None,
+                    'series_with_vertabra_on_every_slice': None,
+                    'connected_volumes': None,
+                    'laterality': None
+                }
+            filters = st.session_state.filters
+            # Define functions to handle filter updates
+            def reset_filters():
+                filters.update({
+                    'segmentation_completeness': None,
+                    'laterality_check': None,
+                    'series_with_vertabra_on_every_slice': None,
+                    'connected_volumes': None,
+                    'laterality': None
+                })
+                st.session_state.filters = filters
+            def apply_filter(filter_name, value):
+                filters[filter_name] = value
+                st.session_state.filters = filters
+            # Body part filter
+            body_part_options = sorted(df['bodyPart'].unique().to_list())
+            body_part = st.selectbox("Body Part", options=body_part_options, key='bodyPart', on_change=reset_filters)
+            filters['bodyPart'] = body_part
+            # Apply the current filters to update options for other filters
+            filtered_df = filter_data(df, filters)
+            # Update options for other filters based on the current selection
+            segmentation_completeness_options = [""] + filtered_df['segmentation_completeness'].unique().to_list()
+            laterality_check_options = [""] + filtered_df['laterality_check'].unique().to_list()
+            series_with_vertabra_on_every_slice_options = [""] + filtered_df['series_with_vertabra_on_every_slice'].unique().to_list()
+            connected_volumes_options = [""] + filtered_df['connected_volumes'].unique().to_list()
+            laterality_options = [""] + filtered_df['laterality'].unique().to_list()
+            # Add remaining filters with default values from session state
+            segmentation_completeness = st.selectbox(
+                "Segmentation Completeness",
+                options=segmentation_completeness_options,
+                index=segmentation_completeness_options.index(filters['segmentation_completeness']) if filters['segmentation_completeness'] else 0,
+                key='segmentation_completeness',
+                on_change=lambda: apply_filter('segmentation_completeness', st.session_state.segmentation_completeness)
+            )
+            laterality_check = st.selectbox(
+                "Laterality Check",
+                options=laterality_check_options,
+                index=laterality_check_options.index(filters['laterality_check']) if filters['laterality_check'] else 0,
+                key='laterality_check',
+                on_change=lambda: apply_filter('laterality_check', st.session_state.laterality_check)
+            )
+            series_with_vertabra_on_every_slice = st.selectbox(
+                "Series with Vertebra on Every Slice",
+                options=series_with_vertabra_on_every_slice_options,
+                index=series_with_vertabra_on_every_slice_options.index(filters['series_with_vertabra_on_every_slice']) if filters['series_with_vertabra_on_every_slice'] else 0,
+                key='series_with_vertabra_on_every_slice',
+                on_change=lambda: apply_filter('series_with_vertabra_on_every_slice', st.session_state.series_with_vertabra_on_every_slice)
+            )
+            connected_volumes = st.selectbox(
+                "Connected Volumes",
+                options=connected_volumes_options,
+                index=connected_volumes_options.index(filters['connected_volumes']) if filters['connected_volumes'] else 0,
+                key='connected_volumes',
+                on_change=lambda: apply_filter('connected_volumes', st.session_state.connected_volumes)
+            )
+            laterality = st.selectbox(
+                "Laterality",
+                options=laterality_options,
+                index=laterality_options.index(filters['laterality']) if filters['laterality'] else 0,
+                key='laterality',
+                on_change=lambda: apply_filter('laterality', st.session_state.laterality)
+            )
+            st.session_state.filters = filters
     # Define the pages
     if page == "Summary":
             WITH Checks AS (
                 SELECT
                     bodyPart,
+                    laterality,
                     COUNT(*) AS total_count,
                     SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
                     SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
                     SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
                     SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
                 FROM
+                    'qual-checks-and-quant-values.parquet'
                 GROUP BY
+                    bodyPart, laterality
             )
             SELECT
                 bodyPart,
+                laterality,
                 ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
+                CASE WHEN laterality IS NOT NULL
+                    THEN ROUND((laterality_pass_count * 100.0) / NULLIF(total_count, 0), 2) || '% (' || laterality_pass_count || '/' || total_count || ')'
+                    ELSE 'N/A' END AS laterality_check,
                 ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
                 ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
             FROM
                 Checks
             ORDER BY
+                bodyPart, laterality;
+        """).pl()
+        summary_df = summary_df.to_pandas()
+        st.data_editor(summary_df,  hide_index=True,use_container_width=True,height=1500)
     elif page == "UpSet Plots":
         st.header("UpSet Plots of Qualitative Checks")
         start_idx = (page_number - 1) * page_size
         end_idx = min(start_idx + page_size, len(filtered_df))  # Ensure end_idx does not go beyond the dataframe length
+        paginated_df = filtered_df[start_idx:end_idx].to_pandas()  # Convert to Pandas DataFrame
         # Display the paginated dataframe
         st.header("Filtered Data")
         st.data_editor(
             paginated_df,
             hide_index=True,
         )
         # Create and display the UpSet plot for failed checks
         st.header("UpSet Plot for Failed Checks")
         st.write("This plot shows the combinations of checks that failed.")
+        if not filtered_df.is_empty():
             create_upset_plot_failures(filtered_df)
         # Create and display the UpSet plot for passed checks
         st.header("UpSet Plot for Passed Checks")
         st.write("This plot shows the combinations of checks that passed.")
+        if not filtered_df.is_empty():
             create_upset_plot_passes(filtered_df)
 if __name__ == "__main__":

qual-checks-and-quant-values.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:527bf1b978eec82de57e9b4f22d1470da418c47a45ee79c47a3af6857ee850e1
+size 1127681711

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 duckdb
 matplotlib
 pandas
 pyarrow
 streamlit
 streamlit_extras

 duckdb
 matplotlib
 pandas
+polars
 pyarrow
 streamlit
 streamlit_extras