Spaces:

ready2drop
/

CalculatorCBD

Sleeping

App Files Files Community

ready2drop commited on Jan 13, 2025

Commit

bb9eeda

verified ·

1 Parent(s): ee42bae

test

Browse files

Files changed (1) hide show

app.py +140 -158

app.py CHANGED Viewed

@@ -1,16 +1,13 @@
 import argparse
 import os
-import io
-import base64
 import matplotlib.pyplot as plt
 import sys
-import bleach
 import gradio as gr
 import torch
-import numpy as np
 import pandas as pd
-import pickle
-from sklearn.preprocessing import StandardScaler
 from lime.lime_tabular import LimeTabularExplainer
 from pycaret.classification import *
 import warnings
@@ -102,20 +99,6 @@ def load_data(data_dir : str,
     #if only  tabular use
     if modality == 'tabular':
         train_df = data
-    print("--------------Scaling--------------")
-    if modality in ['mm', 'tabular']:
-        columns_to_scale = ['Hb', 'PLT', 'WBC', 'ALP', 'ALT',
-       'AST', 'CRP', 'BILIRUBIN', 'FIRST_SBP', 'FIRST_DBP', 'FIRST_HR', 'FIRST_RR',
-       'FIRST_BT','AGE']
-        columns_to_scale_existing = [col for col in columns_to_scale if col in train_df.columns]
-        if columns_to_scale_existing:
-            scaler = MinMaxScaler()
-            train_df[columns_to_scale_existing] = scaler.fit_transform(train_df[columns_to_scale_existing])
-        else:
-            print("No columns to scale.")
     if mode == 'train' or mode == 'test':
         print("--------------Class balance--------------")
@@ -219,11 +202,11 @@ def classify(tabular_data):
         # Convert input data to a pandas DataFrame
         input_data = pd.DataFrame([tabular_data], columns= tabular_header)
-        print(f"Input DataFrame:\n{input_data}")
         # Use PyCaret's predict_model to make predictions
         prediction = predict_model(model, data=input_data)
-        print('OK')
         # Extract predicted class and probability
         predicted_class = prediction.loc[0, "prediction_label"]
         class_probability = prediction.loc[0, "prediction_score"]
@@ -235,144 +218,143 @@ def classify(tabular_data):
     except Exception as e:
         return f"An error occurred during classification: {str(e)}"
-args = parse_args(sys.argv[1:])
-# x_train, y_train, x_val, y_val, x_test, y_test = load_data_and_prepare(args.data_dir, args.excel_file, args.modality, args.phase, args.smote)
-train = load_data_and_prepare(args.data_dir, args.excel_file, args.modality, args.phase, args.smote)
-model = load_model(args.model_name_or_path)
-device = torch.device(args.device)
-# Gradio
-examples = [
-    [
-        [['1', '0', '0', '104', '24', '10.6', '171', '14.54', '236', '182', '12.33', '3.2', '72']],
-        "PT_NO = 10001862, VISIBLE_STONE_CT = True, REAL_STONE = True",
-    ],
-    [
-        [['0', '1','0','106','18','13.6', '388', '21.13', '196', '118', '1.87', '2.7', '58']],
-        "PT_NO = 10007376, VISIBLE_STONE_CT = True, REAL_STONE = True",
-    ],
-    [
-        [['1', '0','1','205','18','9.3', '103', '8.45', '440', '100', '4.21', '4.5', '63']],
-        "PT_NO = 10040285, VISIBLE_STONE_CT = False, REAL_STONE = True",
-    ],
-    [
-        [['0', '1','1','130','20','12.1', '192', '8.63', '47', '59', '0.02', '0.4', '57']],
-        "PT_NO = 10005545, VISIBLE_STONE_CT = False, REAL_STONE = False",
-    ],
-]
-tabular_header = ['DUCT_DILIATATION_8MM', 'DUCT_DILIATATION_10MM','PANCREATITIS','FIRST_SBP','FIRST_RR','Hb', 'PLT', 'WBC', 'ALP', 'AST', 'CRP', 'BILIRUBIN', 'AGE']
-description = """
-GPU 리소스 제약으로 인해, 온라인 데모에서는 NVIDIA RTX 3090 24GB를 사용하고 있습니다. \n
-**Note**: 현재 저희 모델은 **총담관결석증**의 분석 및 진단을 중심으로 최적화되어 있으며, 정확하고 신뢰할 수 있는 결과를 제공합니다. \n
-모델은 다음과 같은 입력 데이터를 처리하며, 아래와 같이 각각 **이산형(discrete)** **연속형(continuous)** 데이터로 처리됩니다. \n
-- 이산형 변수:
-  - DUCT_DILIATATION_8MM
-  - DUCT_DILIATATION_10MM
-  - PANCREATITIS
-- 연속형 변수:
-  - FIRST_SBP (Systolic blood pressure)
-  - FIRST_RR (Respiratory rate)
-  - Hb (Hemoglobin)
-  - PLT (Platelet)
-  - WBC (White Blood Cell)
-  - ALP (Alkaline Phosphatase)
-  - ALT (Alanine Aminotransferase)
-  - AST (Aspartate Aminotransferase)
-  - CRP (C-Reactive Protein)
-  - BILIRUBIN
-  - AGE
-**중요**: 입력 데이터의 컬럼이 변경(추가, 삭제)될 경우, 모델의 예측 결과가 달라질 수 있습니다. \n
-따라서 입력 데이터의 구조를 변경하기 전에 모델의 재학습 또는 재검증이 필요합니다. \n
-"""
-title_markdown = ("""
-# 임상 데이터 기반 머신러닝을 이용한 총담관석 예측 모델
-## Development of a Common Bile Duct Stone Prediction Model Using Machine Learning Based on Clinical Data
-[📖[Learn more about Common Bile Duct Stones (총담관결석증)](https://namu.wiki/w/%EC%B4%9D%EB%8B%B4%EA%B4%80%EA%B2%B0%EC%84%9D%EC%A6%9D)]
-### Copyright © 2024 Dongguk University (DGU) and Dongguk University Medical Center (DUMC). All rights reserved.
-""")
-# def explain_with_lime(tabular_data):
-#     """
-#     Apply LIME to explain predictions.
-#     Args:
-#         tabular_data (list): List of input data points (e.g., rows in a dataframe)
-#     Returns:
-#         str: HTML or image showing LIME explanation
-#     """
-#     input_data = np.array(tabular_data, dtype=float)
-#     explainer = LimeTabularExplainer(
-#         training_data=x_train.values,  # Replace with your training data
-#         feature_names=tabular_header,
-#         class_names=['intermediate', 'High'],  # Replace with actual class names
-#         mode='classification'
-#     )
-#     explanation = explainer.explain_instance(
-#         input_data[0],  # Single instance to explain
-#         model.predict_proba,  # Probability prediction function
-#         num_features=len(tabular_header)
-#     )
-#     # Plot LIME explanation
-#     fig = explanation.as_pyplot_figure()
-#     fig.set_size_inches(25, 8)
-#     buf = io.BytesIO()
-#     fig.savefig(buf, format='png')
-#     buf.seek(0)
-#     encoded_image = base64.b64encode(buf.read()).decode('utf-8')
-#     buf.close()
-#     plt.close(fig)
-#     return f"<img src='data:image/png;base64,{encoded_image}'/>"
-tabular_header = ['DUCT_DILIATATION_8MM', 'DUCT_DILIATATION_10MM','PANCREATITIS','FIRST_SBP','FIRST_RR','Hb', 'PLT', 'WBC', 'ALP', 'AST', 'CRP', 'BILIRUBIN', 'AGE']
-tabular_dtype = ['number'] * len(tabular_header)
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(title_markdown)
-    gr.Markdown(description)
-    with gr.Row():
-        with gr.Column():
-            tabular_input = gr.Dataframe(headers= tabular_header, datatype= tabular_dtype, label="Tabular Input", type="array", interactive=True, row_count=1, col_count=13)
-            info = gr.Textbox(lines=1, label="Patient info", visible = False)
-            with gr.Accordion("Parameters", open=False) as parameter_row:
-                temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True,
-                                        label="Temperature", )
-                top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.1, interactive=True, label="Top P", )
-            with gr.Row():
-                # btn_c = gr.ClearButton([tabular_input])
-                btn_c = gr.Button("Clear")
-                btn = gr.Button("Run")
-    result_output = gr.Textbox(lines=2, label="Classification Result")
-    lime_output = gr.HTML(label="LIME Explanation")
-    gr.Examples(examples=examples, inputs=[tabular_input, info])
-    btn.click(fn=classify, inputs=tabular_input, outputs=result_output)
-    # btn.click(fn=explain_with_lime, inputs=tabular_input, outputs=lime_output)  # Add LIME button
-    # Clear functionality: resets inputs and outputs
-    def clear_fields():
-        return None, None, [[None] * len(tabular_header)]
-    btn_c.click(fn=clear_fields, inputs=[], outputs=[result_output, lime_output, tabular_input])
-demo.queue()
-demo.launch(share=True)

 import argparse
 import os
 import matplotlib.pyplot as plt
 import sys
 import gradio as gr
 import torch
 import pandas as pd
+import numpy as np
+import io
+import base64
 from lime.lime_tabular import LimeTabularExplainer
 from pycaret.classification import *
 import warnings
     #if only  tabular use
     if modality == 'tabular':
         train_df = data
     if mode == 'train' or mode == 'test':
         print("--------------Class balance--------------")
         # Convert input data to a pandas DataFrame
         input_data = pd.DataFrame([tabular_data], columns= tabular_header)
+        print(f"Original Input DataFrame:\n{input_data}")
         # Use PyCaret's predict_model to make predictions
         prediction = predict_model(model, data=input_data)
         # Extract predicted class and probability
         predicted_class = prediction.loc[0, "prediction_label"]
         class_probability = prediction.loc[0, "prediction_score"]
     except Exception as e:
         return f"An error occurred during classification: {str(e)}"
+if __name__ == '__main__':
+    args = parse_args(sys.argv[1:])
+    train = load_data_and_prepare(args.data_dir, args.excel_file, args.modality, args.phase, args.smote)
+    model = load_model(args.model_name_or_path)
+    device = torch.device(args.device)
+    # Gradio
+    examples = [
+        [
+            [['1', '0', '0', '104', '24', '10.6', '171', '14.54', '236', '182', '12.33', '3.2', '72']],
+            "PT_NO = 10001862, VISIBLE_STONE_CT = True, REAL_STONE = True",
+        ],
+        [
+            [['0', '1','0','106','18','13.6', '388', '21.13', '196', '118', '1.87', '2.7', '58']],
+            "PT_NO = 10007376, VISIBLE_STONE_CT = True, REAL_STONE = True",
+        ],
+        [
+            [['1', '0','1','205','18','9.3', '103', '8.45', '440', '100', '4.21', '4.5', '63']],
+            "PT_NO = 10040285, VISIBLE_STONE_CT = False, REAL_STONE = True",
+        ],
+        [
+            [['0', '1','1','130','20','12.1', '192', '8.63', '47', '59', '0.02', '0.4', '57']],
+            "PT_NO = 10005545, VISIBLE_STONE_CT = False, REAL_STONE = False",
+        ],
+    ]
+    tabular_header = ['DUCT_DILIATATION_8MM', 'DUCT_DILIATATION_10MM','PANCREATITIS','FIRST_SBP','FIRST_RR','Hb', 'PLT', 'WBC', 'ALP', 'AST', 'CRP', 'BILIRUBIN', 'AGE']
+    description = """
+    GPU 리소스 제약으로 인해, 온라인 데모에서는 NVIDIA RTX 3090 24GB를 사용하고 있습니다. \n
+    **Note**: 현재 저희 모델은 **총담관결석증**의 분석 및 진단을 중심으로 최적화되어 있으며, 정확하고 신뢰할 수 있는 결과를 제공합니다. \n
+    모델은 다음과 같은 입력 데이터를 처리하며, 아래와 같이 각각 **이산형(discrete)** **연속형(continuous)** 데이터로 처리됩니다. \n
+    - 이산형 변수:
+    - DUCT_DILIATATION_8MM
+    - DUCT_DILIATATION_10MM
+    - PANCREATITIS
+    - 연속형 변수:
+    - FIRST_SBP (Systolic blood pressure)
+    - FIRST_RR (Respiratory rate)
+    - Hb (Hemoglobin)
+    - PLT (Platelet)
+    - WBC (White Blood Cell)
+    - ALP (Alkaline Phosphatase)
+    - ALT (Alanine Aminotransferase)
+    - AST (Aspartate Aminotransferase)
+    - CRP (C-Reactive Protein)
+    - BILIRUBIN
+    - AGE
+    **중요**: 입력 데이터의 컬럼이 변경(추가, 삭제)될 경우, 모델의 예측 결과가 달라질 수 있습니다. \n
+    따라서 입력 데이터의 구조를 변경하기 전에 모델의 재학습 또는 재검증이 필요합니다. \n
+    """
+    title_markdown = ("""
+    # 임상 데이터 기반 머신러닝을 이용한 총담관석 예측 모델
+    ## Development of a Common Bile Duct Stone Prediction Model Using Machine Learning Based on Clinical Data
+    [📖[Learn more about Common Bile Duct Stones (총담관결석증)](https://namu.wiki/w/%EC%B4%9D%EB%8B%B4%EA%B4%80%EA%B2%B0%EC%84%9D%EC%A6%9D)]
+    ### Copyright © 2024 Dongguk University (DGU) and Dongguk University Medical Center (DUMC). All rights reserved.
+    """)
+    # def explain_with_lime(tabular_data):
+    #     """
+    #     Apply LIME to explain predictions.
+    #     Args:
+    #         tabular_data (list): List of input data points (e.g., rows in a dataframe)
+    #     Returns:
+    #         str: HTML or image showing LIME explanation
+    #     """
+    #     input_data = np.array(tabular_data, dtype=float)
+    #     explainer = LimeTabularExplainer(
+    #         training_data=x_train.values,  # Replace with your training data
+    #         feature_names=tabular_header,
+    #         class_names=['intermediate', 'High'],  # Replace with actual class names
+    #         mode='classification'
+    #     )
+    #     explanation = explainer.explain_instance(
+    #         input_data[0],  # Single instance to explain
+    #         model.predict_proba,  # Probability prediction function
+    #         num_features=len(tabular_header)
+    #     )
+    #     # Plot LIME explanation
+    #     fig = explanation.as_pyplot_figure()
+    #     fig.set_size_inches(25, 8)
+    #     buf = io.BytesIO()
+    #     fig.savefig(buf, format='png')
+    #     buf.seek(0)
+    #     encoded_image = base64.b64encode(buf.read()).decode('utf-8')
+    #     buf.close()
+    #     plt.close(fig)
+    #     return f"<img src='data:image/png;base64,{encoded_image}'/>"
+    tabular_header = ['DUCT_DILIATATION_8MM', 'DUCT_DILIATATION_10MM','PANCREATITIS','FIRST_SBP','FIRST_RR','Hb', 'PLT', 'WBC', 'ALP', 'AST', 'CRP', 'BILIRUBIN', 'AGE']
+    tabular_dtype = ['number'] * len(tabular_header)
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown(title_markdown)
+        gr.Markdown(description)
+        with gr.Row():
+            with gr.Column():
+                tabular_input = gr.Dataframe(headers= tabular_header, datatype= tabular_dtype, label="Tabular Input", type="array", interactive=True, row_count=1, col_count=13)
+                info = gr.Textbox(lines=1, label="Patient info", visible = False)
+                with gr.Accordion("Parameters", open=False) as parameter_row:
+                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True,
+                                            label="Temperature", )
+                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.1, interactive=True, label="Top P", )
+                with gr.Row():
+                    # btn_c = gr.ClearButton([tabular_input])
+                    btn_c = gr.Button("Clear")
+                    btn = gr.Button("Run")
+        result_output = gr.Textbox(lines=2, label="Classification Result")
+        lime_output = gr.HTML(label="LIME Explanation")
+        gr.Examples(examples=examples, inputs=[tabular_input, info])
+        btn.click(fn=classify, inputs=tabular_input, outputs=result_output)
+        # btn.click(fn=explain_with_lime, inputs=tabular_input, outputs=lime_output)  # Add LIME button
+        # Clear functionality: resets inputs and outputs
+        def clear_fields():
+            return None, None, [[None] * len(tabular_header)]
+        btn_c.click(fn=clear_fields, inputs=[], outputs=[result_output, lime_output, tabular_input])
+    demo.queue()
+    demo.launch(share=True)