RJ40under40 commited on
Commit
7e73c0d
·
verified ·
1 Parent(s): 9d9e15e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -44
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # ======================================================
2
- # HCL AI VOICE DETECTION API – FINAL WORKING VERSION
3
  # ======================================================
4
 
5
  import base64
@@ -42,8 +42,6 @@ feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
42
  model = AutoModelForAudioClassification.from_pretrained(MODEL_ID).to(DEVICE)
43
  model.eval()
44
 
45
- logger.info("Model loaded successfully")
46
-
47
  # ======================================================
48
  # FASTAPI APP
49
  # ======================================================
@@ -73,56 +71,57 @@ async def verify_api_key(api_key: str = Security(api_key_header)):
73
  return api_key
74
 
75
  # ======================================================
76
- # AUDIO DECODING (ROBUST – AUTO FIXES SAMPLE RATE)
77
  # ======================================================
78
  def decode_audio(b64_audio: str):
79
- try:
80
- # Decode Base64
81
- audio_bytes = base64.b64decode(b64_audio.split(",")[-1])
82
 
83
- # Read audio
84
- audio, sr = sf.read(io.BytesIO(audio_bytes))
85
 
86
- # Stereo mono
87
- if audio.ndim > 1:
88
- audio = np.mean(audio, axis=1)
89
 
90
- # Resample ANY rate → 16kHz
91
- if sr != TARGET_SR:
92
- audio = librosa.resample(
93
- audio.astype(float),
94
- orig_sr=sr,
95
- target_sr=TARGET_SR
96
- )
97
 
98
- return audio
 
99
 
100
- except Exception as e:
101
- raise HTTPException(
102
- status_code=400,
103
- detail=f"Audio decode failed: {str(e)}"
104
- )
105
 
106
  # ======================================================
107
- # INFERENCE
108
  # ======================================================
109
  def analyze_voice(audio):
110
- inputs = feature_extractor(
111
- audio,
112
- sampling_rate=TARGET_SR,
113
- return_tensors="pt"
114
- )
 
 
 
115
 
116
- inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
 
 
117
 
118
- with torch.inference_mode():
119
- logits = model(**inputs).logits
120
- probs = torch.softmax(logits, dim=-1)
121
 
122
- confidence, pred = torch.max(probs, dim=-1)
123
- label = "AI_GENERATED" if pred.item() == 1 else "HUMAN"
 
 
 
124
 
125
- return label, round(confidence.item(), 4)
 
 
 
 
 
 
126
 
127
  # ======================================================
128
  # ENDPOINTS
@@ -137,9 +136,5 @@ async def predict(
137
  _: str = Depends(verify_api_key)
138
  ):
139
  audio = decode_audio(request.audio_base64)
140
- label, score = analyze_voice(audio)
141
-
142
- return {
143
- "classification": label,
144
- "confidence_score": score
145
- }
 
1
  # ======================================================
2
+ # HCL AI VOICE DETECTION API – CRASH-PROOF VERSION
3
  # ======================================================
4
 
5
  import base64
 
42
  model = AutoModelForAudioClassification.from_pretrained(MODEL_ID).to(DEVICE)
43
  model.eval()
44
 
 
 
45
  # ======================================================
46
  # FASTAPI APP
47
  # ======================================================
 
71
  return api_key
72
 
73
  # ======================================================
74
+ # AUDIO DECODING (SAFE)
75
  # ======================================================
76
  def decode_audio(b64_audio: str):
77
+ audio_bytes = base64.b64decode(b64_audio.split(",")[-1])
78
+ audio, sr = sf.read(io.BytesIO(audio_bytes))
 
79
 
80
+ if audio.ndim > 1:
81
+ audio = np.mean(audio, axis=1)
82
 
83
+ if sr != TARGET_SR:
84
+ audio = librosa.resample(audio.astype(float), sr, TARGET_SR)
 
85
 
86
+ audio = np.nan_to_num(audio)
 
 
 
 
 
 
87
 
88
+ if len(audio) < TARGET_SR:
89
+ audio = np.pad(audio, (0, TARGET_SR - len(audio)))
90
 
91
+ return audio.astype(np.float32)
 
 
 
 
92
 
93
  # ======================================================
94
+ # INFERENCE (CRASH-PROOF)
95
  # ======================================================
96
  def analyze_voice(audio):
97
+ try:
98
+ inputs = feature_extractor(
99
+ audio,
100
+ sampling_rate=TARGET_SR,
101
+ return_tensors="pt",
102
+ padding=True
103
+ )
104
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
105
 
106
+ with torch.inference_mode():
107
+ logits = model(**inputs).logits
108
+ probs = torch.softmax(logits, dim=-1)
109
 
110
+ score, pred = torch.max(probs, dim=-1)
 
 
111
 
112
+ return {
113
+ "classification": "UNKNOWN",
114
+ "confidence_score": round(score.item(), 4),
115
+ "raw_label_index": int(pred.item())
116
+ }
117
 
118
+ except Exception as e:
119
+ logger.exception("Model inference failed")
120
+ return {
121
+ "classification": "MODEL_ERROR",
122
+ "confidence_score": 0.0,
123
+ "error": str(e)
124
+ }
125
 
126
  # ======================================================
127
  # ENDPOINTS
 
136
  _: str = Depends(verify_api_key)
137
  ):
138
  audio = decode_audio(request.audio_base64)
139
+ result = analyze_voice(audio)
140
+ return result