rcai commited on
Commit
cd081ac
·
verified ·
1 Parent(s): 63ecc52

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +34 -0
test.py CHANGED
@@ -5,6 +5,40 @@ import json
5
  import pandas as pd
6
  import re
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def filter_medical_terms(lines):
9
  terms = ['er', 'pr', 'her2', 'mammaprint', 'oncotype']
10
  filtered_lines = []
 
5
  import pandas as pd
6
  import re
7
 
8
+ # List containing data from the snapshot
9
+ data = [
10
+ ['p.G6bS$ts', 'p.G6bS$ts | 8', '8 |', 'C.1994delG', 'p.G6bS$ts | 17'],
11
+ ['pS12/ifs', 'pS12/ifs | 16', '16 |', '©3810dupC', 'pS12/ifs | 14'],
12
+ ['pAs042fs', 'pAs042fs | 48', '48 |', 'c.15124delG', 'pAs042fs | 6'],
13
+ ['†on', '†on 2', 'C8§5—2A>G', '†on', '64'],
14
+ ['p.Y628fs', 'p.Y628fs |', '', 'c.1882delT,c.2851—1G>T', 'p.Y628fs | 16'],
15
+ ['p.H1O4/R', 'p.H1O4 /R', '21', 'C.3140A>G', 'p.H1O4/R | 13'],
16
+ ['pK26/fs', 'pK26/fs |', '', 'c.800delA', 'pK26/fs | 6'],
17
+ ['O.T542fs', 'O.T542fs | 9', '9', 'C.1624delA', 'O.T542fs | 18'],
18
+ ['p.r224D', 'p.r224D | 6', '6', 'c6/2G>T', 'p.r224D | 16']
19
+ ]
20
+
21
+ # Function to split on '|' and return the second part
22
+ def extract_post_split(value):
23
+ parts = value.split('|')
24
+ return parts[1].strip() if len(parts) > 1 else '' # Return second part if exists, else empty string
25
+
26
+ # Extract 1st, 3rd, and post-split second and last values
27
+ extracted_data = []
28
+ for row in data:
29
+ extracted_row = [
30
+ row[0], # 1st value as is
31
+ extract_post_split(row[1]), # Split 2nd value on '|' and take second part
32
+ row[3], # 4th value as is
33
+ extract_post_split(row[-1]) # Split last value on '|' and take second part
34
+ ]
35
+ extracted_data.append(extracted_row)
36
+
37
+ # Print the result
38
+ for row in extracted_data:
39
+ print(row)
40
+
41
+
42
  def filter_medical_terms(lines):
43
  terms = ['er', 'pr', 'her2', 'mammaprint', 'oncotype']
44
  filtered_lines = []