Spaces:

rcai
/

doctr_test

No application file

App Files Files Community

rcai commited on Oct 16, 2024

Commit

cd081ac

verified ·

1 Parent(s): 63ecc52

Update test.py

Browse files

Files changed (1) hide show

test.py +34 -0

test.py CHANGED Viewed

@@ -5,6 +5,40 @@ import json
 import pandas as pd
 import re
 def filter_medical_terms(lines):
     terms = ['er', 'pr', 'her2', 'mammaprint', 'oncotype']
     filtered_lines = []

 import pandas as pd
 import re
+# List containing data from the snapshot
+data = [
+    ['p.G6bS$ts', 'p.G6bS$ts | 8', '8 |', 'C.1994delG', 'p.G6bS$ts | 17'],
+    ['pS12/ifs', 'pS12/ifs | 16', '16 |', 'Â©3810dupC', 'pS12/ifs | 14'],
+    ['pAs042fs', 'pAs042fs | 48', '48 |', 'c.15124delG', 'pAs042fs | 6'],
+    ['â€ on', 'â€ on 2', 'C8Â§5â€”2A>G', 'â€ on', '64'],
+    ['p.Y628fs', 'p.Y628fs |', '', 'c.1882delT,c.2851â€”1G>T', 'p.Y628fs | 16'],
+    ['p.H1O4/R', 'p.H1O4 /R', '21', 'C.3140A>G', 'p.H1O4/R | 13'],
+    ['pK26/fs', 'pK26/fs |', '', 'c.800delA', 'pK26/fs | 6'],
+    ['O.T542fs', 'O.T542fs | 9', '9', 'C.1624delA', 'O.T542fs | 18'],
+    ['p.r224D', 'p.r224D | 6', '6', 'c6/2G>T', 'p.r224D | 16']
+]
+# Function to split on '|' and return the second part
+def extract_post_split(value):
+    parts = value.split('|')
+    return parts[1].strip() if len(parts) > 1 else ''  # Return second part if exists, else empty string
+# Extract 1st, 3rd, and post-split second and last values
+extracted_data = []
+for row in data:
+    extracted_row = [
+        row[0],                          # 1st value as is
+        extract_post_split(row[1]),       # Split 2nd value on '|' and take second part
+        row[3],                          # 4th value as is
+        extract_post_split(row[-1])       # Split last value on '|' and take second part
+    ]
+    extracted_data.append(extracted_row)
+# Print the result
+for row in extracted_data:
+    print(row)
 def filter_medical_terms(lines):
     terms = ['er', 'pr', 'her2', 'mammaprint', 'oncotype']
     filtered_lines = []