Vamsi Thiriveedhi
commited on
Commit
·
cfdbb51
1
Parent(s):
caced37
enh: provide ohif links with series uids, download with seg uuids
Browse files- Dockerfile +3 -0
- filter_data_app.py +44 -5
Dockerfile
CHANGED
|
@@ -23,6 +23,9 @@ COPY --chown=user . $HOME/app
|
|
| 23 |
# Download the parquet file from github
|
| 24 |
RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/qual_checks_and_quantitative_measurements.parquet
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
# Install any needed packages specified in requirements.txt
|
| 27 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 28 |
|
|
|
|
| 23 |
# Download the parquet file from github
|
| 24 |
RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/qual_checks_and_quantitative_measurements.parquet
|
| 25 |
|
| 26 |
+
# Download the mapping parquet file from github
|
| 27 |
+
RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/ct-seg-sr-map-with-series-uids-gcs-aws-ulrs.parquet
|
| 28 |
+
|
| 29 |
# Install any needed packages specified in requirements.txt
|
| 30 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 31 |
|
filter_data_app.py
CHANGED
|
@@ -15,11 +15,15 @@ st.set_page_config(layout="wide")
|
|
| 15 |
# Local path to the Parquet file
|
| 16 |
LOCAL_PARQUET_FILE = 'qual_checks_and_quantitative_measurements.parquet'
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
@st.cache_data
|
| 19 |
def load_data(radiomics_feature='Volume from Voxel Summation'):
|
| 20 |
cols = [
|
| 21 |
'PatientID',
|
| 22 |
'StudyInstanceUID',
|
|
|
|
| 23 |
'SeriesNumber',
|
| 24 |
'bodyPart',
|
| 25 |
'laterality',
|
|
@@ -36,8 +40,13 @@ def load_data(radiomics_feature='Volume from Voxel Summation'):
|
|
| 36 |
# pl.col('connected_volumes').cast(pl.Int32, strict=False)
|
| 37 |
# ).alias('connected_volumes')
|
| 38 |
# ])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
return df
|
| 41 |
|
| 42 |
# Function to filter data based on user input
|
| 43 |
def filter_data(df, filters,radiomics_feature):
|
|
@@ -190,7 +199,7 @@ def main():
|
|
| 190 |
on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
|
| 191 |
)
|
| 192 |
filters['radiomics_feature'] = radiomics_feature
|
| 193 |
-
df = load_data(radiomics_feature=radiomics_feature)
|
| 194 |
|
| 195 |
|
| 196 |
# Body part filter
|
|
@@ -303,8 +312,15 @@ def main():
|
|
| 303 |
end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
|
| 304 |
paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
|
| 305 |
paginated_df = paginated_df.rename(columns={"connected_volumes": "connected_components"})
|
| 306 |
-
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
# Display the paginated dataframe
|
| 309 |
st.header("Filtered Data")
|
| 310 |
st.write("Number of Rows:", len(filtered_df))
|
|
@@ -313,7 +329,7 @@ def main():
|
|
| 313 |
paginated_df,
|
| 314 |
column_config={
|
| 315 |
"Viewer Url":st.column_config.LinkColumn("StudyInstanceUID",
|
| 316 |
-
display_text=r"https:\/\/viewer\.imaging\.datacommons\.cancer\.gov\/v3\/viewer
|
| 317 |
),
|
| 318 |
|
| 319 |
},
|
|
@@ -367,6 +383,29 @@ def main():
|
|
| 367 |
create_upset_plot_passes(filtered_df)
|
| 368 |
|
| 369 |
def convert_df(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
return df.write_csv()
|
| 371 |
|
| 372 |
csv= convert_df(filtered_df)
|
|
|
|
| 15 |
# Local path to the Parquet file
|
| 16 |
LOCAL_PARQUET_FILE = 'qual_checks_and_quantitative_measurements.parquet'
|
| 17 |
|
| 18 |
+
|
| 19 |
+
CT_SEG_SR_MAPPING_FILE = 'ct-seg-sr-map-with-series-uids-gcs-aws-ulrs.parquet'
|
| 20 |
+
|
| 21 |
@st.cache_data
|
| 22 |
def load_data(radiomics_feature='Volume from Voxel Summation'):
|
| 23 |
cols = [
|
| 24 |
'PatientID',
|
| 25 |
'StudyInstanceUID',
|
| 26 |
+
'CT_SeriesInstanceUID',
|
| 27 |
'SeriesNumber',
|
| 28 |
'bodyPart',
|
| 29 |
'laterality',
|
|
|
|
| 40 |
# pl.col('connected_volumes').cast(pl.Int32, strict=False)
|
| 41 |
# ).alias('connected_volumes')
|
| 42 |
# ])
|
| 43 |
+
map_cols = [
|
| 44 |
+
'seg_seriesInstanceUID',
|
| 45 |
+
'Referenced_CT_SeriesInstanceUID',
|
| 46 |
+
]
|
| 47 |
+
map_df = pl.read_parquet(CT_SEG_SR_MAPPING_FILE, columns=map_cols)
|
| 48 |
|
| 49 |
+
return df, map_df
|
| 50 |
|
| 51 |
# Function to filter data based on user input
|
| 52 |
def filter_data(df, filters,radiomics_feature):
|
|
|
|
| 199 |
on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
|
| 200 |
)
|
| 201 |
filters['radiomics_feature'] = radiomics_feature
|
| 202 |
+
df,map_df = load_data(radiomics_feature=radiomics_feature)
|
| 203 |
|
| 204 |
|
| 205 |
# Body part filter
|
|
|
|
| 312 |
end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
|
| 313 |
paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
|
| 314 |
paginated_df = paginated_df.rename(columns={"connected_volumes": "connected_components"})
|
| 315 |
+
sql='''
|
| 316 |
+
SELECT
|
| 317 |
+
DISTINCT *
|
| 318 |
+
FROM
|
| 319 |
+
paginated_df pdf
|
| 320 |
+
JOIN map_df mdf on pdf.CT_SeriesInstanceUID = mdf.Referenced_CT_SeriesInstanceUID
|
| 321 |
+
'''
|
| 322 |
+
paginated_df = duckdb.sql(sql).df()
|
| 323 |
+
paginated_df['Viewer Url'] = 'https://viewer.imaging.datacommons.cancer.gov/v3/viewer/?StudyInstanceUIDs='+paginated_df['StudyInstanceUID']+'&SeriesInstanceUIDs='+paginated_df['CT_SeriesInstanceUID']+','+paginated_df['seg_seriesInstanceUID']
|
| 324 |
# Display the paginated dataframe
|
| 325 |
st.header("Filtered Data")
|
| 326 |
st.write("Number of Rows:", len(filtered_df))
|
|
|
|
| 329 |
paginated_df,
|
| 330 |
column_config={
|
| 331 |
"Viewer Url":st.column_config.LinkColumn("StudyInstanceUID",
|
| 332 |
+
display_text=r"https:\/\/viewer\.imaging\.datacommons\.cancer\.gov\/v3\/viewer\/\?\StudyInstanceUIDs=(.*)&"
|
| 333 |
),
|
| 334 |
|
| 335 |
},
|
|
|
|
| 383 |
create_upset_plot_passes(filtered_df)
|
| 384 |
|
| 385 |
def convert_df(df):
|
| 386 |
+
sql=f'''
|
| 387 |
+
SELECT
|
| 388 |
+
DISTINCT
|
| 389 |
+
PatientID,
|
| 390 |
+
StudyInstanceUID,
|
| 391 |
+
CT_SeriesInstanceUID,
|
| 392 |
+
SEG_seriesInstanceUID,
|
| 393 |
+
SeriesNumber,
|
| 394 |
+
bodyPart,
|
| 395 |
+
laterality,
|
| 396 |
+
segmentation_completeness,
|
| 397 |
+
laterality_check,
|
| 398 |
+
volume_from_voxel_summation_check,
|
| 399 |
+
connected_volumes,
|
| 400 |
+
voxel_num,
|
| 401 |
+
"{radiomics_feature}"
|
| 402 |
+
FROM
|
| 403 |
+
filtered_df
|
| 404 |
+
JOIN
|
| 405 |
+
map_df on filtered_df.CT_SeriesInstanceUID= map_df.Referenced_CT_SeriesInstanceUId
|
| 406 |
+
'''
|
| 407 |
+
df= duckdb.sql(sql).pl()
|
| 408 |
+
|
| 409 |
return df.write_csv()
|
| 410 |
|
| 411 |
csv= convert_df(filtered_df)
|