rcai commited on
Commit
e62e621
·
verified ·
1 Parent(s): 0058258

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +32 -0
test.py CHANGED
@@ -9,6 +9,38 @@ import re
9
  import boto3
10
  import botocore
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def list_files_in_bucket(bucket_name, prefix=''):
13
  """
14
  List all files in a given S3 bucket.
 
9
  import boto3
10
  import botocore
11
 
12
+ def filter_rows(group):
13
+ condition1 = (
14
+ ((group["biomarker_name"]=="er") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
15
+ ((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
16
+ )
17
+
18
+ condition2 = (
19
+ ((group["biomarker_name"]=="pr") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
20
+ ((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
21
+ )
22
+ return group[condition1 | condition2]
23
+
24
+ filtered_df_final = pd.concat([filter_rows(group) for _, group in finaldf.groupby("chai_patient_id")], ignore_index=True)
25
+ stage_filter = ['1', '2', '3', 'i', 'ii', 'iii', 'iia', 'iiia', 'iib', 'iiib']
26
+ x = filtered_df_final[filtered_df_final["stage_status"].isin(stage_filter)]
27
+ y = x[["chai_patient_id", "clq_id"]].drop_duplicates()
28
+
29
+ def filter_rows(group):
30
+ condition1 = (
31
+ ((group["biomarker_name"]=="er") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
32
+ ((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
33
+ )
34
+
35
+ condition2 = (
36
+ ((group["biomarker_name"]=="pr") & (group["test_result"].str.lower().isin(['positive', 'pos', '+' ]))) |
37
+ ((group["biomarker_name"]=="her2") & (group["test_result"].str.lower().isin(['negative', 'neg', '-' ])))
38
+ )
39
+ return group[condition1 | condition2]
40
+
41
+
42
+ filtered_df = pd.concat([filter_rows(group) for _, group in df.groupby("chai_patient_id")], ignore_index=True)
43
+
44
  def list_files_in_bucket(bucket_name, prefix=''):
45
  """
46
  List all files in a given S3 bucket.