File size: 9,464 Bytes
46e4a69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9617259
 
46e4a69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9617259
46e4a69
 
 
 
 
9617259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46e4a69
 
9617259
46e4a69
 
9617259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46e4a69
 
 
 
9617259
46e4a69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import gradio as gr
import numpy as np
import joblib
import lightgbm

import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Conv1D,
    LSTM,
    Bidirectional,
    Dense,
    Dropout,
    Concatenate,
    Layer,
    BatchNormalization,
)
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.saving import register_keras_serializable


# 🔹 Attention Layer
@register_keras_serializable()
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.attention_dense = Dense(1, activation=None)

    def call(self, inputs):
        attention_scores = self.attention_dense(inputs)
        attention_weights = tf.nn.softmax(attention_scores, axis=1)
        return tf.reduce_sum(inputs * attention_weights, axis=1)


# ✅ Register the custom object manually
get_custom_objects()["Attention"] = Attention


# 🔹 Normalize Data Properly
def normalize(X):
    return (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0) + 1e-8)


# Load the trained model
lr_model = joblib.load("saved_model/best_lgbm_model.pkl")
with open("saved_model/scaler_xgb_lgbm.pkl", "rb") as s:
    scaler = pickle.load(s)

# Define the feature list based on the dataset (excluding 'id' and target column)
feature_names = [
    "Age",
    "CoughScale",
    "WeightLoss(kg)",
    "Gender_Male",
    "Gender_Other",
    "Fever_Yes",
    "NightSweats_Yes",
    "ChestPain_Yes",
    "Hemoptysis_Yes",
    "Breathlessness_Moderate",
    "Breathlessness_Severe",
    "ContactHistory_Yes",
    "TravelHistory_Yes",
    "HIVStatus_Positive",
    "PreviousTB_Yes",
]


# Define the prediction function
def predict_tuberculosis(*features):
    try:
        # Convert inputs to a NumPy array and reshape for model prediction
        features_array = np.array(features, dtype=float).reshape(1, -1)
        print(features_array)
        features_array_scaled = scaler.transform(features_array)
        print(features_array_scaled)
        prediction = lr_model.predict(features_array_scaled)
        print(prediction)

        return (
            "Tuberculosis Positive" if prediction[0] == 1 else "Tuberculosis Negative"
        )
    except Exception as e:
        return f"Error: {e}"


# Load the trained model
model = load_model("saved_model/best_model.keras")


# Function to extract audio features
def extract_features(audio_path, sr=22050, max_len=157):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=sr)

        # Extract features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)  # (20, time)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)  # (12, time)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # (128, time)
        spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)  # (7, time)

        # Transpose and pad/crop to max_len
        def pad_or_truncate(feature, target_shape):
            if feature.shape[1] > target_shape:
                return feature[:, :target_shape]
            else:
                return np.pad(
                    feature,
                    ((0, 0), (0, target_shape - feature.shape[1])),
                    mode="constant",
                )

        mfcc = pad_or_truncate(mfcc, max_len).T  # (157, 20)
        chroma = pad_or_truncate(chroma, max_len).T  # (157, 12)
        mel = pad_or_truncate(mel, max_len).T  # (157, 128)
        spec_contrast = pad_or_truncate(spec_contrast, max_len).T  # (157, 7)

        # Normalize
        def normalize(X):
            return (X - np.min(X)) / (np.max(X) - np.min(X) + 1e-8)

        mfcc = normalize(mfcc)
        chroma = normalize(chroma)
        mel = normalize(mel)
        spec_contrast = normalize(spec_contrast)

        return (
            np.array([mfcc]),
            np.array([chroma]),
            np.array([mel]),
            np.array([spec_contrast]),
        )

    except Exception as e:
        print(f"Error processing audio: {e}")
        return None, None, None, None


# Function to predict from audio
def predict_from_audio(audio_path):
    mfcc, chroma, mel, spec = extract_features(audio_path)
    if mfcc is None:
        return "Error processing audio file."

    # Predict
    prediction = model.predict([mfcc, chroma, mel, spec])
    predicted_class = np.argmax(prediction, axis=1)[0]
    print(predicted_class)
    return (
        "Tuberculosis Detected" if predicted_class == 1 else "No Tuberculosis Detected"
    )


def get_description(language):
    if language == "English":
        return """This project, developed by ehealth.co.id, leverages advanced machine learning models 
        to predict tuberculosis (TB) based on patient data. You can predict TB status using 
        numeric health features or by uploading an audio file that may contain cough sounds.
        
        Cough Scale 1-10:

        Scale 1-2 (Mild):

        1. Cough occurs rarely, only occasionally.
        1. No sputum or blood in the cough.
        1. Does not interfere with daily activities.
        
        Scale 3-4 (Slight):

        1. Cough occurs occasionally, especially in the morning.
        1. Cough is not accompanied by sputum or blood.
        1. Does not interfere with activities but is still noticeable.
        
        Scale 5-6 (Moderate):

        1. Cough occurs several times a day, with some sputum.
        1. The cough may worsen at night or in the morning.
        1. No blood in the sputum, but it disrupts normal activities.
        
        Scale 7-8 (Severe):

        Cough occurs every day with a lot of sputum, including possible blood in the sputum.
        May cause disturbances in sleep or daily activities.
        May indicate a more serious lung infection, such as tuberculosis (TB).
        
        Scale 9-10 (Very Severe):

        Continuous and highly productive cough with a lot of sputum, possibly with blood.
        Cough severely disrupts sleep, activities, and overall quality of life.
        These symptoms likely indicate active TB infection that should be promptly examined by a doctor.

        """
    else:
        return """Proyek ini, dikembangkan oleh ehealth.co.id, memanfaatkan model pembelajaran mesin canggih 
        untuk memprediksi tuberkulosis (TB) berdasarkan data pasien. Anda dapat memprediksi status TB menggunakan 
        fitur kesehatan numerik atau dengan mengunggah file audio yang mungkin berisi suara batuk.
        
        Skala Batuk 1-10:
        
        Skala 1-2 (Ringan):
        Batuk jarang terjadi, hanya sekali-sekali.
        Tidak ada batuk berdahak atau darah.
        Tidak mengganggu aktivitas sehari-hari.
        
        Skala 3-4 (Sedikit):
        Batuk terjadi sesekali, terutama di pagi hari.
        Batuk tidak disertai dahak atau darah.
        Tidak mengganggu aktivitas, tetapi tetap terasa.
        
        Skala 5-6 (Sedang):
        Batuk terjadi beberapa kali sehari, dengan sedikit dahak.
        Mungkin batuk lebih parah saat malam hari atau pagi hari.
        Tidak ada darah pada dahak, tetapi mengganggu kegiatan normal.
        
        Skala 7-8 (Berat):
        Batuk terjadi setiap hari dengan banyak dahak, termasuk kemungkinan darah pada dahak.
        Dapat menyebabkan gangguan tidur atau aktivitas sehari-hari.
        Dapat mengindikasikan infeksi paru yang lebih serius, seperti TB.
        
        Skala 9-10 (Sangat Berat):
        Batuk terus-menerus dan sangat produktif dengan banyak dahak, kemungkinan disertai darah.
        Batuk sangat mengganggu tidur, aktivitas, dan kualitas hidup secara keseluruhan.
        Gejala ini mengindikasikan kemungkinan besar ada infeksi TB aktif yang harus segera diperiksa lebih lanjut oleh dokter.
        """


DEFAULT_LANG = "English"
# Create the Gradio interface
with gr.Blocks() as demo:

    gr.Markdown("# Tuberculosis Prediction Model")

    # Language selection dropdown
    language_select = gr.Dropdown(
        choices=[DEFAULT_LANG, "Indonesian"],
        value=DEFAULT_LANG,
        label="Select Language",
        interactive=True,
    )

    # Display dynamic description based on selected language
    description_output = gr.Markdown(get_description(DEFAULT_LANG))

    # Update description when language is toggled
    language_select.change(
        fn=get_description,
        inputs=language_select,
        outputs=description_output,
    )

    with gr.Tab("Predict with Numeric Features"):
        inputs = [
            (
                gr.Number(label=feature)
                if feature in ["Age", "CoughScale", "WeightLoss(kg)"]
                else gr.Radio([0, 1], label=feature)
            )
            for feature in feature_names
        ]
        predict_button = gr.Button("Predict Tuberculosis")
        output_text = gr.Textbox(label="Prediction Result")
        predict_button.click(predict_tuberculosis, inputs, output_text)

    with gr.Tab("Predict with Audio File"):
        audio_input = gr.Audio(type="filepath", label="Upload Audio File")
        predict_audio_button = gr.Button("Predict from Audio")
        audio_output = gr.Textbox(label="Audio Prediction Result")
        predict_audio_button.click(predict_from_audio, audio_input, audio_output)

# Run Gradio
if __name__ == "__main__":
    demo.launch()