Devved11 commited on
Commit
b7efc93
·
verified ·
1 Parent(s): f639be2

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +77 -70
models.py CHANGED
@@ -1,71 +1,78 @@
1
- from transformers import pipeline
2
- import librosa
3
- import numpy as np
4
-
5
- classifier = pipeline(
6
- "audio-classification",
7
- model="./model",
8
- device=-1
9
- )
10
-
11
- def detect_audio(y: np.ndarray) -> tuple[str, float, str]:
12
- """
13
- Detect if audio is AI_GENERATED or HUMAN.
14
- Returns: classification, confidenceScore (0-1), explanation
15
- """
16
- try:
17
- result = classifier(y)
18
- if not result:
19
- return "HUMAN", 0.50, "Insufficient audio features detected."
20
-
21
- # Take top prediction
22
- top = result[0]
23
- label_lower = top['label'].lower()
24
- top_score = top['score']
25
-
26
- # Flexible mapping for common labels
27
- if any(word in label_lower for word in ['ai', 'fake', 'synthetic', 'aivoice']):
28
- classification = "AI_GENERATED"
29
- confidence = round(top_score, 3)
30
- else:
31
- classification = "HUMAN"
32
- confidence = round(top_score, 3)
33
-
34
- # Feature-based explanation (judge-friendly)
35
- flatness = librosa.feature.spectral_flatness(y=y).mean()
36
- pitch = librosa.yin(y, fmin=75, fmax=300)
37
- pitch_std = np.std(pitch) if len(pitch) > 0 else 0.0
38
-
39
- cues = []
40
- if flatness > 0.5:
41
- cues.append("unnatural high spectral flatness (robotic)")
42
- else:
43
- cues.append("natural spectral variation")
44
- if pitch_std < 10:
45
- cues.append("unnatural pitch consistency")
46
- else:
47
- cues.append("natural pitch variation")
48
-
49
- # Decide feature-based tendency
50
- feature_vote = "AI_GENERATED" if (flatness > 0.5 and pitch_std < 10) else "HUMAN"
51
-
52
- cues_text = " and ".join(cues)
53
-
54
- if feature_vote == classification:
55
- explanation = (
56
- f"{cues_text}, which aligns with the model prediction "
57
- f"of {classification.lower()} voice."
58
- )
59
- else:
60
- explanation = (
61
- f"{cues_text}. However, the deep learning model detected "
62
- f"patterns consistent with {classification.lower()} voice."
63
- )
64
-
65
- explanation = explanation.capitalize()
66
-
67
- return classification, confidence, explanation
68
-
69
- except Exception as e:
70
- # Fallback on error
 
 
 
 
 
 
 
71
  return "HUMAN", 0.50, f"Analysis error: {str(e)}. Treated as human."
 
1
+ from transformers import pipeline
2
+ import librosa
3
+ import numpy as np
4
+
5
+ classifier = None
6
+
7
+ def load_model():
8
+ global classifier
9
+ if classifier is None:
10
+ classifier = pipeline(
11
+ "audio-classification",
12
+ model="Hemgg/Deepfake-audio-detection",
13
+ device=-1
14
+ )
15
+ return classifier
16
+
17
+
18
+ def detect_audio(y: np.ndarray) -> tuple[str, float, str]:
19
+ """
20
+ Detect if audio is AI_GENERATED or HUMAN.
21
+ Returns: classification, confidenceScore (0-1), explanation
22
+ """
23
+ try:
24
+ result = load_model()
25
+ if not result:
26
+ return "HUMAN", 0.50, "Insufficient audio features detected."
27
+
28
+ # Take top prediction
29
+ top = result[0]
30
+ label_lower = top['label'].lower()
31
+ top_score = top['score']
32
+
33
+ # Flexible mapping for common labels
34
+ if any(word in label_lower for word in ['ai', 'fake', 'synthetic', 'aivoice']):
35
+ classification = "AI_GENERATED"
36
+ confidence = round(top_score, 3)
37
+ else:
38
+ classification = "HUMAN"
39
+ confidence = round(top_score, 3)
40
+
41
+ # Feature-based explanation (judge-friendly)
42
+ flatness = librosa.feature.spectral_flatness(y=y).mean()
43
+ pitch = librosa.yin(y, fmin=75, fmax=300)
44
+ pitch_std = np.std(pitch) if len(pitch) > 0 else 0.0
45
+
46
+ cues = []
47
+ if flatness > 0.5:
48
+ cues.append("unnatural high spectral flatness (robotic)")
49
+ else:
50
+ cues.append("natural spectral variation")
51
+ if pitch_std < 10:
52
+ cues.append("unnatural pitch consistency")
53
+ else:
54
+ cues.append("natural pitch variation")
55
+
56
+ # Decide feature-based tendency
57
+ feature_vote = "AI_GENERATED" if (flatness > 0.5 and pitch_std < 10) else "HUMAN"
58
+
59
+ cues_text = " and ".join(cues)
60
+
61
+ if feature_vote == classification:
62
+ explanation = (
63
+ f"{cues_text}, which aligns with the model prediction "
64
+ f"of {classification.lower()} voice."
65
+ )
66
+ else:
67
+ explanation = (
68
+ f"{cues_text}. However, the deep learning model detected "
69
+ f"patterns consistent with {classification.lower()} voice."
70
+ )
71
+
72
+ explanation = explanation.capitalize()
73
+
74
+ return classification, confidence, explanation
75
+
76
+ except Exception as e:
77
+ # Fallback on error
78
  return "HUMAN", 0.50, f"Analysis error: {str(e)}. Treated as human."