Spaces:
No application file
No application file
Update test.py
Browse files
test.py
CHANGED
|
@@ -9,6 +9,38 @@ import re
|
|
| 9 |
import boto3
|
| 10 |
import botocore
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def list_files_in_bucket(bucket_name, prefix=''):
|
| 13 |
"""
|
| 14 |
List all files in a given S3 bucket.
|
|
|
|
| 9 |
import boto3
|
| 10 |
import botocore
|
| 11 |
|
| 12 |
+
def filter_rows(group):
|
| 13 |
+
condition1 = (
|
| 14 |
+
((group["biomarker_name"]=="er") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
|
| 15 |
+
((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
condition2 = (
|
| 19 |
+
((group["biomarker_name"]=="pr") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
|
| 20 |
+
((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
|
| 21 |
+
)
|
| 22 |
+
return group[condition1 | condition2]
|
| 23 |
+
|
| 24 |
+
filtered_df_final = pd.concat([filter_rows(group) for _, group in finaldf.groupby("chai_patient_id")], ignore_index=True)
|
| 25 |
+
stage_filter = ['1', '2', '3', 'i', 'ii', 'iii', 'iia', 'iiia', 'iib', 'iiib']
|
| 26 |
+
x = filtered_df_final[filtered_df_final["stage_status"].isin(stage_filter)]
|
| 27 |
+
y = x[["chai_patient_id", "clq_id"]].drop_duplicates()
|
| 28 |
+
|
| 29 |
+
def filter_rows(group):
|
| 30 |
+
condition1 = (
|
| 31 |
+
((group["biomarker_name"]=="er") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
|
| 32 |
+
((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
condition2 = (
|
| 36 |
+
((group["biomarker_name"]=="pr") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
|
| 37 |
+
((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
|
| 38 |
+
)
|
| 39 |
+
return group[condition1 | condition2]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
filtered_df = pd.concat([filter_rows(group) for _, group in df.groupby("chai_patient_id")], ignore_index=True)
|
| 43 |
+
|
| 44 |
def list_files_in_bucket(bucket_name, prefix=''):
|
| 45 |
"""
|
| 46 |
List all files in a given S3 bucket.
|