ready2drop commited on
Commit
b09b487
ยท
verified ยท
1 Parent(s): d3c46ef

Upload 2 files

Browse files
Files changed (2) hide show
  1. util.py +85 -0
  2. view.py +56 -0
util.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.preprocessing import MinMaxScaler
3
+ from sklearn.model_selection import train_test_split
4
+ import os
5
+ from imblearn.over_sampling import SMOTE
6
+ import warnings
7
+ warnings.filterwarnings("ignore")
8
+
9
+
10
+ def load_data(data_dir : str,
11
+ excel_file : str,
12
+ mode : str = "train",
13
+ scale = bool,
14
+ smote = bool,
15
+ ):
16
+
17
+
18
+ print("--------------Load RawData--------------")
19
+ df = pd.read_csv(os.path.join(data_dir, excel_file))
20
+
21
+ #Inclusion
22
+ print("--------------Inclusion--------------")
23
+ print('Total : ', len(df))
24
+
25
+ print("--------------fillNA--------------")
26
+ # data = data.dropna()
27
+ df.fillna(0.0,inplace=True)
28
+ print(df['REAL_STONE'].value_counts())
29
+
30
+ #Column rename
31
+ df.rename(columns={'ID': 'patient_id', 'REAL_STONE':'target'}, inplace=True)
32
+
33
+ # df_all = ['SEX', 'FIRST_SBP', 'FIRST_DBP', 'FIRST_HR', 'FIRST_RR', 'FIRST_BT',
34
+ # 'AGE', 'VISIBLE_STONE_CT', 'PANCREATITIS', 'DUCT_DILIATATION_10MM',
35
+ # 'DUCT_DILIATATION_8MM', 'Hb', 'PLT', 'WBC', 'ALP', 'ALT', 'AST', 'CRP',
36
+ # 'BILIRUBIN', 'HR_100', 'GGT', 'BUN', 'CREATININE', 'BT_38', 'target']
37
+
38
+ # backward (n=13)
39
+ columns = ['patient_id','SEX', 'AGE', 'DUCT_DILIATATION_10MM', 'DUCT_DILIATATION_8MM', 'Hb', 'PLT', 'WBC', 'ALP', 'ALT', 'AST', 'GGT', 'BUN', 'CREATININE', 'target']
40
+
41
+
42
+ data = df[columns]
43
+
44
+ if scale:
45
+ print("--------------Scaling--------------")
46
+ columns_to_scale = ['SEX', 'AGE', 'DUCT_DILIATATION_10MM', 'DUCT_DILIATATION_8MM', 'Hb', 'PLT', 'WBC', 'ALP', 'ALT', 'AST', 'GGT', 'BUN', 'CREATININE']
47
+
48
+ columns_to_scale_existing = [col for col in columns_to_scale if col in data.columns]
49
+
50
+ if columns_to_scale_existing:
51
+ scaler = MinMaxScaler()
52
+ data[columns_to_scale_existing] = scaler.fit_transform(data[columns_to_scale_existing])
53
+ else:
54
+ print("No columns to scale.")
55
+
56
+ if mode == 'train' or mode == 'test':
57
+ if smote: # Apply SMOTE if the flag is set
58
+ print(data['target'].value_counts())
59
+ print("Applying SMOTE...")
60
+ smote = SMOTE(sampling_strategy='all', random_state=42)
61
+ X_data = data.drop(columns=['target'])
62
+ y_data = data['target']
63
+ X_data_res, y_data_res = smote.fit_resample(X_data, y_data)
64
+ data_resampled = pd.DataFrame(X_data_res, columns=X_data.columns)
65
+ data_resampled['target'] = y_data_res
66
+ data = data_resampled # Update train_data with resampled data
67
+ print(data['target'].value_counts())
68
+
69
+ train_data, test_data = train_test_split(data, test_size=0.3, stratify=data['target'], random_state=123)
70
+ valid_data, test_data = train_test_split(test_data, test_size=0.4, stratify=test_data['target'], random_state=123)
71
+
72
+ if mode == 'train':
73
+ print("Train set shape:", train_data.shape)
74
+ print("Validation set shape:", valid_data.shape)
75
+ return train_data, valid_data
76
+
77
+ elif mode == 'test':
78
+ print("Test set shape:", test_data.shape)
79
+ return test_data
80
+
81
+ else:
82
+ raise ValueError("Choose mode!")
83
+
84
+
85
+
view.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio
2
+ examples = [
3
+ [
4
+ [['1', '0', '0', '104', '24', '10.6', '171', '14.54', '236', '182', '12.33', '3.2', '72']],
5
+ "PT_NO = 10001862, VISIBLE_STONE_CT = True, REAL_STONE = True",
6
+ ],
7
+ [
8
+ [['0', '1','0','106','18','13.6', '388', '21.13', '196', '118', '1.87', '2.7', '58']],
9
+ "PT_NO = 10007376, VISIBLE_STONE_CT = True, REAL_STONE = True",
10
+ ],
11
+ [
12
+ [['1', '0','1','205','18','9.3', '103', '8.45', '440', '100', '4.21', '4.5', '63']],
13
+ "PT_NO = 10040285, VISIBLE_STONE_CT = False, REAL_STONE = True",
14
+ ],
15
+ [
16
+ [['0', '1','1','130','20','12.1', '192', '8.63', '47', '59', '0.02', '0.4', '57']],
17
+ "PT_NO = 10005545, VISIBLE_STONE_CT = False, REAL_STONE = False",
18
+ ],
19
+ ]
20
+
21
+ tabular_header = ['DUCT_DILIATATION_8MM', 'DUCT_DILIATATION_10MM','PANCREATITIS','FIRST_SBP','FIRST_RR','Hb', 'PLT', 'WBC', 'ALP', 'AST', 'CRP', 'BILIRUBIN', 'AGE']
22
+
23
+ description = """
24
+ GPU ๋ฆฌ์†Œ์Šค ์ œ์•ฝ์œผ๋กœ ์ธํ•ด, ์˜จ๋ผ์ธ ๋ฐ๋ชจ์—์„œ๋Š” NVIDIA RTX 3090 24GB๋ฅผ ์‚ฌ์šฉํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. \n
25
+
26
+ **Note**: ํ˜„์žฌ ์ €ํฌ ๋ชจ๋ธ์€ **์ด๋‹ด๊ด€๊ฒฐ์„์ฆ**์˜ ๋ถ„์„ ๋ฐ ์ง„๋‹จ์„ ์ค‘์‹ฌ์œผ๋กœ ์ตœ์ ํ™”๋˜์–ด ์žˆ์œผ๋ฉฐ, ์ •ํ™•ํ•˜๊ณ  ์‹ ๋ขฐํ•  ์ˆ˜ ์žˆ๋Š” ๊ฒฐ๊ณผ๋ฅผ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. \n
27
+ ๋ชจ๋ธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์€ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ๋ฅผ ์ฒ˜๋ฆฌํ•˜๋ฉฐ, ์•„๋ž˜์™€ ๊ฐ™์ด ๊ฐ๊ฐ **์ด์‚ฐํ˜•(discrete)** **์—ฐ์†ํ˜•(continuous)** ๋ฐ์ดํ„ฐ๋กœ ์ฒ˜๋ฆฌ๋ฉ๋‹ˆ๋‹ค. \n
28
+
29
+ - ์ด์‚ฐํ˜• ๋ณ€์ˆ˜:
30
+ - DUCT_DILIATATION_8MM
31
+ - DUCT_DILIATATION_10MM
32
+ - PANCREATITIS
33
+
34
+ - ์—ฐ์†ํ˜• ๋ณ€์ˆ˜:
35
+ - FIRST_SBP (Systolic blood pressure)
36
+ - FIRST_RR (Respiratory rate)
37
+ - Hb (Hemoglobin)
38
+ - PLT (Platelet)
39
+ - WBC (White Blood Cell)
40
+ - ALP (Alkaline Phosphatase)
41
+ - ALT (Alanine Aminotransferase)
42
+ - AST (Aspartate Aminotransferase)
43
+ - CRP (C-Reactive Protein)
44
+ - BILIRUBIN
45
+ - AGE
46
+
47
+ **์ค‘์š”**: ์ž…๋ ฅ ๋ฐ์ดํ„ฐ์˜ ์ปฌ๋Ÿผ์ด ๋ณ€๊ฒฝ(์ถ”๊ฐ€, ์‚ญ์ œ)๋  ๊ฒฝ์šฐ, ๋ชจ๋ธ์˜ ์˜ˆ์ธก ๊ฒฐ๊ณผ๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. \n
48
+ ๋”ฐ๋ผ์„œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ์˜ ๊ตฌ์กฐ๋ฅผ ๋ณ€๊ฒฝํ•˜๊ธฐ ์ „์— ๋ชจ๋ธ์˜ ์žฌํ•™์Šต ๋˜๋Š” ์žฌ๊ฒ€์ฆ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. \n
49
+ """
50
+
51
+ title_markdown = ("""
52
+ # ์ž„์ƒ ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ ๋จธ์‹ ๋Ÿฌ๋‹์„ ์ด์šฉํ•œ ์ด๋‹ด๊ด€์„ ์˜ˆ์ธก ๋ชจ๋ธ
53
+ ## Development of a Common Bile Duct Stone Prediction Model Using Machine Learning Based on Clinical Data
54
+ [๐Ÿ“–[Learn more about Common Bile Duct Stones (์ด๋‹ด๊ด€๊ฒฐ์„์ฆ)](https://namu.wiki/w/%EC%B4%9D%EB%8B%B4%EA%B4%80%EA%B2%B0%EC%84%9D%EC%A6%9D)]
55
+ ### Copyright ยฉ 2024 Dongguk University (DGU) and Dongguk University Medical Center (DUMC). All rights reserved.
56
+ """)