Spaces:

spriambada3
/

tbdetection

Sleeping

File size: 9,464 Bytes

import gradio as gr
import numpy as np
import joblib
import lightgbm

import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Conv1D,
    LSTM,
    Bidirectional,
    Dense,
    Dropout,
    Concatenate,
    Layer,
    BatchNormalization,
)
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.saving import register_keras_serializable


# 🔹 Attention Layer
@register_keras_serializable()
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.attention_dense = Dense(1, activation=None)

    def call(self, inputs):
        attention_scores = self.attention_dense(inputs)
        attention_weights = tf.nn.softmax(attention_scores, axis=1)
        return tf.reduce_sum(inputs * attention_weights, axis=1)


# ✅ Register the custom object manually
get_custom_objects()["Attention"] = Attention


# 🔹 Normalize Data Properly
def normalize(X):
    return (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0) + 1e-8)


# Load the trained model
lr_model = joblib.load("saved_model/best_lgbm_model.pkl")
with open("saved_model/scaler_xgb_lgbm.pkl", "rb") as s:
    scaler = pickle.load(s)

# Define the feature list based on the dataset (excluding 'id' and target column)
feature_names = [
    "Age",
    "CoughScale",
    "WeightLoss(kg)",
    "Gender_Male",
    "Gender_Other",
    "Fever_Yes",
    "NightSweats_Yes",
    "ChestPain_Yes",
    "Hemoptysis_Yes",
    "Breathlessness_Moderate",
    "Breathlessness_Severe",
    "ContactHistory_Yes",
    "TravelHistory_Yes",
    "HIVStatus_Positive",
    "PreviousTB_Yes",
]


# Define the prediction function
def predict_tuberculosis(*features):
    try:
        # Convert inputs to a NumPy array and reshape for model prediction
        features_array = np.array(features, dtype=float).reshape(1, -1)
        print(features_array)
        features_array_scaled = scaler.transform(features_array)
        print(features_array_scaled)
        prediction = lr_model.predict(features_array_scaled)
        print(prediction)

        return (
            "Tuberculosis Positive" if prediction[0] == 1 else "Tuberculosis Negative"
        )
    except Exception as e:
        return f"Error: {e}"


# Load the trained model
model = load_model("saved_model/best_model.keras")


# Function to extract audio features
def extract_features(audio_path, sr=22050, max_len=157):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=sr)

        # Extract features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)  # (20, time)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)  # (12, time)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # (128, time)
        spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)  # (7, time)

        # Transpose and pad/crop to max_len
        def pad_or_truncate(feature, target_shape):
            if feature.shape[1] > target_shape:
                return feature[:, :target_shape]
            else:
                return np.pad(
                    feature,
                    ((0, 0), (0, target_shape - feature.shape[1])),
                    mode="constant",
                )

        mfcc = pad_or_truncate(mfcc, max_len).T  # (157, 20)
        chroma = pad_or_truncate(chroma, max_len).T  # (157, 12)
        mel = pad_or_truncate(mel, max_len).T  # (157, 128)
        spec_contrast = pad_or_truncate(spec_contrast, max_len).T  # (157, 7)

        # Normalize
        def normalize(X):
            return (X - np.min(X)) / (np.max(X) - np.min(X) + 1e-8)

        mfcc = normalize(mfcc)
        chroma = normalize(chroma)
        mel = normalize(mel)
        spec_contrast = normalize(spec_contrast)

        return (
            np.array([mfcc]),
            np.array([chroma]),
            np.array([mel]),
            np.array([spec_contrast]),
        )

    except Exception as e:
        print(f"Error processing audio: {e}")
        return None, None, None, None


# Function to predict from audio
def predict_from_audio(audio_path):
    mfcc, chroma, mel, spec = extract_features(audio_path)
    if mfcc is None:
        return "Error processing audio file."

    # Predict
    prediction = model.predict([mfcc, chroma, mel, spec])
    predicted_class = np.argmax(prediction, axis=1)[0]
    print(predicted_class)
    return (
        "Tuberculosis Detected" if predicted_class == 1 else "No Tuberculosis Detected"
    )


def get_description(language):
    if language == "English":
        return """This project, developed by ehealth.co.id, leverages advanced machine learning models 
        to predict tuberculosis (TB) based on patient data. You can predict TB status using 
        numeric health features or by uploading an audio file that may contain cough sounds.
        
        Cough Scale 1-10:

        Scale 1-2 (Mild):

        1. Cough occurs rarely, only occasionally.
        1. No sputum or blood in the cough.
        1. Does not interfere with daily activities.
        
        Scale 3-4 (Slight):

        1. Cough occurs occasionally, especially in the morning.
        1. Cough is not accompanied by sputum or blood.
        1. Does not interfere with activities but is still noticeable.
        
        Scale 5-6 (Moderate):

        1. Cough occurs several times a day, with some sputum.
        1. The cough may worsen at night or in the morning.
        1. No blood in the sputum, but it disrupts normal activities.
        
        Scale 7-8 (Severe):

        Cough occurs every day with a lot of sputum, including possible blood in the sputum.
        May cause disturbances in sleep or daily activities.
        May indicate a more serious lung infection, such as tuberculosis (TB).
        
        Scale 9-10 (Very Severe):

        Continuous and highly productive cough with a lot of sputum, possibly with blood.
        Cough severely disrupts sleep, activities, and overall quality of life.
        These symptoms likely indicate active TB infection that should be promptly examined by a doctor.

        """
    else:
        return """Proyek ini, dikembangkan oleh ehealth.co.id, memanfaatkan model pembelajaran mesin canggih 
        untuk memprediksi tuberkulosis (TB) berdasarkan data pasien. Anda dapat memprediksi status TB menggunakan 
        fitur kesehatan numerik atau dengan mengunggah file audio yang mungkin berisi suara batuk.
        
        Skala Batuk 1-10:
        
        Skala 1-2 (Ringan):
        Batuk jarang terjadi, hanya sekali-sekali.
        Tidak ada batuk berdahak atau darah.
        Tidak mengganggu aktivitas sehari-hari.
        
        Skala 3-4 (Sedikit):
        Batuk terjadi sesekali, terutama di pagi hari.
        Batuk tidak disertai dahak atau darah.
        Tidak mengganggu aktivitas, tetapi tetap terasa.
        
        Skala 5-6 (Sedang):
        Batuk terjadi beberapa kali sehari, dengan sedikit dahak.
        Mungkin batuk lebih parah saat malam hari atau pagi hari.
        Tidak ada darah pada dahak, tetapi mengganggu kegiatan normal.
        
        Skala 7-8 (Berat):
        Batuk terjadi setiap hari dengan banyak dahak, termasuk kemungkinan darah pada dahak.
        Dapat menyebabkan gangguan tidur atau aktivitas sehari-hari.
        Dapat mengindikasikan infeksi paru yang lebih serius, seperti TB.
        
        Skala 9-10 (Sangat Berat):
        Batuk terus-menerus dan sangat produktif dengan banyak dahak, kemungkinan disertai darah.
        Batuk sangat mengganggu tidur, aktivitas, dan kualitas hidup secara keseluruhan.
        Gejala ini mengindikasikan kemungkinan besar ada infeksi TB aktif yang harus segera diperiksa lebih lanjut oleh dokter.
        """


DEFAULT_LANG = "English"
# Create the Gradio interface
with gr.Blocks() as demo:

    gr.Markdown("# Tuberculosis Prediction Model")

    # Language selection dropdown
    language_select = gr.Dropdown(
        choices=[DEFAULT_LANG, "Indonesian"],
        value=DEFAULT_LANG,
        label="Select Language",
        interactive=True,
    )

    # Display dynamic description based on selected language
    description_output = gr.Markdown(get_description(DEFAULT_LANG))

    # Update description when language is toggled
    language_select.change(
        fn=get_description,
        inputs=language_select,
        outputs=description_output,
    )

    with gr.Tab("Predict with Numeric Features"):
        inputs = [
            (
                gr.Number(label=feature)
                if feature in ["Age", "CoughScale", "WeightLoss(kg)"]
                else gr.Radio([0, 1], label=feature)
            )
            for feature in feature_names
        ]
        predict_button = gr.Button("Predict Tuberculosis")
        output_text = gr.Textbox(label="Prediction Result")
        predict_button.click(predict_tuberculosis, inputs, output_text)

    with gr.Tab("Predict with Audio File"):
        audio_input = gr.Audio(type="filepath", label="Upload Audio File")
        predict_audio_button = gr.Button("Predict from Audio")
        audio_output = gr.Textbox(label="Audio Prediction Result")
        predict_audio_button.click(predict_from_audio, audio_input, audio_output)

# Run Gradio
if __name__ == "__main__":
    demo.launch()