Spaces:

Mohammedelhakim
/

KidGuard_Crydetection_cryClassification

Running

App Files Files Community

Mohammedelhakim commited on Feb 10

Commit

c846a2e

1 Parent(s): 4fa298b

Add application file

Browse files

Files changed (16) hide show

Dockerfile +13 -0
app.py +187 -0
classification_models/babycry_ensemble.pkl +3 -0
classification_models/feature_selector.pkl +3 -0
classification_models/label_encoder.pkl +3 -0
classification_models/scaler.pkl +3 -0
detection_models/emb_test.npy +3 -0
detection_models/emb_train.npy +3 -0
detection_models/emb_val.npy +3 -0
detection_models/pca_yamnet.pkl +3 -0
detection_models/scaler_yamnet.pkl +3 -0
detection_models/y_test.npy +3 -0
detection_models/y_train.npy +3 -0
detection_models/y_val.npy +3 -0
detection_models/yamnet_lr_model.joblib +3 -0
requirements.txt +14 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.13
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,187 @@

+# app.py
+from fastapi import FastAPI, UploadFile, File, HTTPException
+import traceback
+import numpy as np
+import librosa
+import joblib
+import tempfile
+import os
+import tensorflow as tf
+import tensorflow_hub as hub
+# =========================
+# Configuration
+# =========================
+SR = 16000
+DETECTOR_MODEL_PATH = "detection_models/yamnet_lr_model.joblib"
+DETECTOR_SCALER_PATH = "detection_models/scaler_yamnet.pkl"
+DETECTOR_PCA_PATH = "detection_models/pca_yamnet.pkl"
+CLASS_ENSEMBLE_PATH = "classification_models/babycry_ensemble.pkl"
+CLASS_SCALER_PATH = "classification_models/scaler.pkl"
+CLASS_SELECTOR_PATH = "classification_models/feature_selector.pkl"
+CLASS_LE_PATH = "classification_models/label_encoder.pkl"
+# =========================
+# Load models (ONCE)
+# =========================
+yamnet = hub.load("https://tfhub.dev/google/yamnet/1")
+det_model = joblib.load(DETECTOR_MODEL_PATH)
+det_scaler = joblib.load(DETECTOR_SCALER_PATH)
+det_pca = joblib.load(DETECTOR_PCA_PATH)
+ensemble = joblib.load(CLASS_ENSEMBLE_PATH)
+cls_scaler = joblib.load(CLASS_SCALER_PATH)
+feature_selector = joblib.load(CLASS_SELECTOR_PATH)
+label_encoder = joblib.load(CLASS_LE_PATH)
+# =========================
+# Feature Extraction
+# =========================
+def extract_yamnet_embedding(path):
+    wav, _ = librosa.load(path, sr=SR, mono=True)
+    waveform = tf.convert_to_tensor(wav, dtype=tf.float32)
+    _, embeddings, _ = yamnet(waveform)
+    emb = embeddings.numpy()
+    mean_emb = np.mean(emb, axis=0)
+    std_emb = np.std(emb, axis=0)
+    return np.concatenate([mean_emb, std_emb]).reshape(1, -1)
+def extract_classification_features(path):
+    y, sr = librosa.load(path, sr=SR)
+    stft = np.abs(librosa.stft(y))
+    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40), axis=1)
+    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr), axis=1)
+    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr), axis=1)
+    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr), axis=1)
+    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr), axis=1)
+        # Time-domain features (ensure 1D)
+    zero_crossing = np.mean(librosa.feature.zero_crossing_rate(y))
+    energy = np.mean(librosa.feature.rms(y=y))
+        # Spectral features (ensure 1D)
+    spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
+    spec_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
+    spec_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
+    spec_flatness = np.mean(librosa.feature.spectral_flatness(y=y))
+    combined_features = np.concatenate([
+            mfcc[:40],              # First 40 MFCCs
+            chroma[:12],            # 12 chroma features
+            mel[:40],               # First 40 mel features
+            contrast[:7],           # 7 contrast features
+            tonnetz[:6],            # 6 tonnetz features
+            [zero_crossing],        # 1 feature
+            [energy],               # 1 feature
+            [spec_centroid],        # 1 feature
+            [spec_bandwidth],       # 1 feature
+            [spec_rolloff],         # 1 feature
+            [spec_flatness]         # 1 feature
+        ])
+    return combined_features.reshape(1,-1)
+# =========================
+# Detection & Classification
+# =========================
+def detect_is_cry(path, threshold):
+    feat = extract_yamnet_embedding(path)
+    feat = det_scaler.transform(feat)
+    feat = det_pca.transform(feat)
+    prob = det_model.predict_proba(feat)[0][0]
+    is_cry = bool(prob >= threshold)   # 🔥 الحل هنا
+    return is_cry, float(prob)
+def classify_cry(path, conf_threshold):
+    feat = extract_classification_features(path)
+    current_len = feat.shape[1]
+    expected_len = getattr(cls_scaler, "n_features_in_", None)
+    if expected_len is not None and current_len != expected_len:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Feature length mismatch: got {current_len}, expected {expected_len}"
+        )
+    print("feat shape at classify_cry:", feat.shape)  # should be (1, 111)
+    print("scaler expects:", cls_scaler.n_features_in_)  # should be 111
+    feat_scaled = cls_scaler.transform(feat)
+    feat_selector = feature_selector.transform(feat_scaled)
+    probs = ensemble.predict_proba(feat_selector)[0]
+    max_prob = float(np.max(probs))
+    if max_prob < conf_threshold:
+        return "Normal / Not a Cry", None, max_prob
+    label = label_encoder.inverse_transform([np.argmax(probs)])[0]
+    return label, probs.tolist(), max_prob
+# =========================
+# FastAPI App
+# =========================
+app = FastAPI(
+    title="Baby Cry Detection & Classification API",
+    version="1.0"
+)
+@app.post("/predict")
+async def predict(
+    file: UploadFile = File(...),
+    detection_threshold: float = 0.212,
+    classification_threshold: float = 0.6
+):
+    if not file.filename.lower().endswith((".wav", ".mp3", ".flac", ".ogg")):
+        raise HTTPException(status_code=400, detail="Invalid audio format")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        tmp.write(await file.read())
+        tmp_path = tmp.name
+    try:
+        try:
+            is_cry, cry_prob = detect_is_cry(tmp_path, detection_threshold)
+            response = {
+                "filename": file.filename,
+                "cry_probability": cry_prob,
+                "is_cry": is_cry,
+            }
+            if not is_cry:
+                response["result"] = "Not a cry"
+                return response
+            label, probs, confidence = classify_cry(
+                tmp_path,
+                classification_threshold
+            )
+            response.update({
+                "result": label,
+                "confidence": confidence,
+                "class_probabilities": probs,
+            })
+            return response
+        except Exception as e:
+            # Log full traceback to the server console
+            traceback.print_exc()
+            # Return the error message so you see it in the client
+            raise HTTPException(
+                status_code=500,
+                detail=f"Prediction failed: {e}"
+            )
+    finally:
+        os.remove(tmp_path)

classification_models/babycry_ensemble.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0a01a2aa3a97291870a49b6588debc071458e471d88d7b6395ce337e6f7711a
+size 67580166

classification_models/feature_selector.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03d498641f16a348d33e5a02b9a82a2971df660327ae3d47ebccab6f0cd2bf09
+size 19013111

classification_models/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71981ceb1bd0cb5640bb42385899834f2d0686c6b453f4cddda9e2710f63ed1f
+size 527

classification_models/scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88d831a8a7d9411bc67dc1a756f2d470fc6a73a4b6957153f2accb280fff4bb4
+size 3231

detection_models/emb_test.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:099f6632bc63a5ddb94cd3c0614bebca33a14d46a597b953464710387ebd09a4
+size 2424960

detection_models/emb_train.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc02a2176f4e07b682e3fe50e1704b9c2b1d90688a2179c6dcdc29d377fca04a
+size 11296896

detection_models/emb_val.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27e58492b2bc629328a306a1c6b923b23b82b4867c67d4bcd74889cd2477a5bb
+size 2416768

detection_models/pca_yamnet.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:215e13aff4ff1477e1380bbd8b619fc022458d9d3df061dea4b4b1b389c44614
+size 2109354

detection_models/scaler_yamnet.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f3944740c79f5418f0a9cc950a17a62a5112cca206e3aa781a9c2c80a949cb7
+size 49735

detection_models/y_test.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ae89854a1c5ee482237593e8dddb7ffb4b3ef29cc91731347fec99ef5358140
+size 1312

detection_models/y_train.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac93eb0c226983fc38c5791c455d57087161fd9c8f5960f16c6c343bc65b6ca9
+size 5644

detection_models/y_val.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95ec7190d712d7e97f39ddd58361b209d7b7d680eb5fc6552ec2c9ec3dacde52
+size 1308

detection_models/yamnet_lr_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca0150d00aeffbe724b53039a072c1ddfc42288607c0ed11e5d46c8ed81b4cc8
+size 1835

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+fastapi==0.128.6
+uvicorn==0.40.0
+python-multipart==0.0.22
+numpy==2.3.5
+scipy==1.17.0
+joblib==1.4.2
+scikit-learn==1.8.0
+librosa==0.11.0
+soundfile==0.13.1
+audioread==3.1.0
+soxr==1.0.0
+tensorflow==2.20.0
+tensorflow-hub==0.16.1