Spaces:
No application file
No application file
Update test.py
Browse files
test.py
CHANGED
|
@@ -5,6 +5,40 @@ import json
|
|
| 5 |
import pandas as pd
|
| 6 |
import re
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def filter_medical_terms(lines):
|
| 9 |
terms = ['er', 'pr', 'her2', 'mammaprint', 'oncotype']
|
| 10 |
filtered_lines = []
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
import re
|
| 7 |
|
| 8 |
+
# List containing data from the snapshot
|
| 9 |
+
data = [
|
| 10 |
+
['p.G6bS$ts', 'p.G6bS$ts | 8', '8 |', 'C.1994delG', 'p.G6bS$ts | 17'],
|
| 11 |
+
['pS12/ifs', 'pS12/ifs | 16', '16 |', '©3810dupC', 'pS12/ifs | 14'],
|
| 12 |
+
['pAs042fs', 'pAs042fs | 48', '48 |', 'c.15124delG', 'pAs042fs | 6'],
|
| 13 |
+
['†on', '†on 2', 'C8§5—2A>G', '†on', '64'],
|
| 14 |
+
['p.Y628fs', 'p.Y628fs |', '', 'c.1882delT,c.2851—1G>T', 'p.Y628fs | 16'],
|
| 15 |
+
['p.H1O4/R', 'p.H1O4 /R', '21', 'C.3140A>G', 'p.H1O4/R | 13'],
|
| 16 |
+
['pK26/fs', 'pK26/fs |', '', 'c.800delA', 'pK26/fs | 6'],
|
| 17 |
+
['O.T542fs', 'O.T542fs | 9', '9', 'C.1624delA', 'O.T542fs | 18'],
|
| 18 |
+
['p.r224D', 'p.r224D | 6', '6', 'c6/2G>T', 'p.r224D | 16']
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
# Function to split on '|' and return the second part
|
| 22 |
+
def extract_post_split(value):
|
| 23 |
+
parts = value.split('|')
|
| 24 |
+
return parts[1].strip() if len(parts) > 1 else '' # Return second part if exists, else empty string
|
| 25 |
+
|
| 26 |
+
# Extract 1st, 3rd, and post-split second and last values
|
| 27 |
+
extracted_data = []
|
| 28 |
+
for row in data:
|
| 29 |
+
extracted_row = [
|
| 30 |
+
row[0], # 1st value as is
|
| 31 |
+
extract_post_split(row[1]), # Split 2nd value on '|' and take second part
|
| 32 |
+
row[3], # 4th value as is
|
| 33 |
+
extract_post_split(row[-1]) # Split last value on '|' and take second part
|
| 34 |
+
]
|
| 35 |
+
extracted_data.append(extracted_row)
|
| 36 |
+
|
| 37 |
+
# Print the result
|
| 38 |
+
for row in extracted_data:
|
| 39 |
+
print(row)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
def filter_medical_terms(lines):
|
| 43 |
terms = ['er', 'pr', 'her2', 'mammaprint', 'oncotype']
|
| 44 |
filtered_lines = []
|