Spaces:

Devved11
/

RestAPI_AI_VOICE_DETECTION

Sleeping

App Files Files Community

Devved11 commited on Feb 5

Commit

b7efc93

verified ·

1 Parent(s): f639be2

Update models.py

Browse files

Files changed (1) hide show

models.py +77 -70

models.py CHANGED Viewed

@@ -1,71 +1,78 @@
-from transformers import pipeline
-import librosa
-import numpy as np
-classifier = pipeline(
-    "audio-classification",
-    model="./model",
-    device=-1
-)
-def detect_audio(y: np.ndarray) -> tuple[str, float, str]:
-    """
-    Detect if audio is AI_GENERATED or HUMAN.
-    Returns: classification, confidenceScore (0-1), explanation
-    """
-    try:
-        result = classifier(y)
-        if not result:
-            return "HUMAN", 0.50, "Insufficient audio features detected."
-        # Take top prediction
-        top = result[0]
-        label_lower = top['label'].lower()
-        top_score = top['score']
-        # Flexible mapping for common labels
-        if any(word in label_lower for word in ['ai', 'fake', 'synthetic', 'aivoice']):
-            classification = "AI_GENERATED"
-            confidence = round(top_score, 3)
-        else:
-            classification = "HUMAN"
-            confidence = round(top_score, 3)
-        # Feature-based explanation (judge-friendly)
-        flatness = librosa.feature.spectral_flatness(y=y).mean()
-        pitch = librosa.yin(y, fmin=75, fmax=300)
-        pitch_std = np.std(pitch) if len(pitch) > 0 else 0.0
-        cues = []
-        if flatness > 0.5:
-            cues.append("unnatural high spectral flatness (robotic)")
-        else:
-            cues.append("natural spectral variation")
-        if pitch_std < 10:
-            cues.append("unnatural pitch consistency")
-        else:
-            cues.append("natural pitch variation")
-        # Decide feature-based tendency
-        feature_vote = "AI_GENERATED" if (flatness > 0.5 and pitch_std < 10) else "HUMAN"
-        cues_text = " and ".join(cues)
-        if feature_vote == classification:
-            explanation = (
-                f"{cues_text}, which aligns with the model prediction "
-                f"of {classification.lower()} voice."
-            )
-        else:
-            explanation = (
-                f"{cues_text}. However, the deep learning model detected "
-                f"patterns consistent with {classification.lower()} voice."
-            )
-        explanation = explanation.capitalize()
-        return classification, confidence, explanation
-    except Exception as e:
-        # Fallback on error
         return "HUMAN", 0.50, f"Analysis error: {str(e)}. Treated as human."

+from transformers import pipeline
+import librosa
+import numpy as np
+classifier = None
+def load_model():
+    global classifier
+    if classifier is None:
+        classifier = pipeline(
+            "audio-classification",
+            model="Hemgg/Deepfake-audio-detection",
+            device=-1
+        )
+    return classifier
+def detect_audio(y: np.ndarray) -> tuple[str, float, str]:
+    """
+    Detect if audio is AI_GENERATED or HUMAN.
+    Returns: classification, confidenceScore (0-1), explanation
+    """
+    try:
+        result = load_model()
+        if not result:
+            return "HUMAN", 0.50, "Insufficient audio features detected."
+        # Take top prediction
+        top = result[0]
+        label_lower = top['label'].lower()
+        top_score = top['score']
+        # Flexible mapping for common labels
+        if any(word in label_lower for word in ['ai', 'fake', 'synthetic', 'aivoice']):
+            classification = "AI_GENERATED"
+            confidence = round(top_score, 3)
+        else:
+            classification = "HUMAN"
+            confidence = round(top_score, 3)
+        # Feature-based explanation (judge-friendly)
+        flatness = librosa.feature.spectral_flatness(y=y).mean()
+        pitch = librosa.yin(y, fmin=75, fmax=300)
+        pitch_std = np.std(pitch) if len(pitch) > 0 else 0.0
+        cues = []
+        if flatness > 0.5:
+            cues.append("unnatural high spectral flatness (robotic)")
+        else:
+            cues.append("natural spectral variation")
+        if pitch_std < 10:
+            cues.append("unnatural pitch consistency")
+        else:
+            cues.append("natural pitch variation")
+        # Decide feature-based tendency
+        feature_vote = "AI_GENERATED" if (flatness > 0.5 and pitch_std < 10) else "HUMAN"
+        cues_text = " and ".join(cues)
+        if feature_vote == classification:
+            explanation = (
+                f"{cues_text}, which aligns with the model prediction "
+                f"of {classification.lower()} voice."
+            )
+        else:
+            explanation = (
+                f"{cues_text}. However, the deep learning model detected "
+                f"patterns consistent with {classification.lower()} voice."
+            )
+        explanation = explanation.capitalize()
+        return classification, confidence, explanation
+    except Exception as e:
+        # Fallback on error
         return "HUMAN", 0.50, f"Analysis error: {str(e)}. Treated as human."