RJ40under40 commited on
Commit
e8d09f3
·
verified ·
1 Parent(s): 972137d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -1,12 +1,14 @@
1
  # ======================================================
2
- # HCL AI VOICE DETECTION API – HF SPACES (STABLE)
3
  # ======================================================
4
 
5
  import base64
6
  import io
7
  import logging
 
8
  import torch
9
  import soundfile as sf
 
10
 
11
  from fastapi import FastAPI, HTTPException, Depends, Security
12
  from fastapi.middleware.cors import CORSMiddleware
@@ -21,7 +23,6 @@ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
21
  API_KEY_NAME = "access_token"
22
  API_KEY_VALUE = "HCL_SECURE_KEY_2026"
23
 
24
- # ✅ VERIFIED audio-classification model
25
  MODEL_ID = "superb/wav2vec2-base-superb-ks"
26
  TARGET_SR = 16000
27
 
@@ -63,7 +64,6 @@ app.add_middleware(
63
  class AudioRequest(BaseModel):
64
  audio_base64: str
65
 
66
-
67
  # ======================================================
68
  # SECURITY
69
  # ======================================================
@@ -72,21 +72,40 @@ async def verify_api_key(api_key: str = Security(api_key_header)):
72
  raise HTTPException(status_code=403, detail="Invalid API Key")
73
  return api_key
74
 
75
-
76
  # ======================================================
77
- # AUDIO + INFERENCE
78
  # ======================================================
79
  def decode_audio(b64_audio: str):
80
  try:
 
81
  audio_bytes = base64.b64decode(b64_audio.split(",")[-1])
 
 
82
  audio, sr = sf.read(io.BytesIO(audio_bytes))
 
 
 
 
 
 
83
  if sr != TARGET_SR:
84
- raise ValueError("Audio must be 16kHz")
 
 
 
 
 
85
  return audio
86
- except Exception as e:
87
- raise HTTPException(status_code=400, detail=f"Audio decode failed: {e}")
88
 
 
 
 
 
 
89
 
 
 
 
90
  def analyze_voice(audio):
91
  inputs = feature_extractor(
92
  audio,
@@ -105,7 +124,6 @@ def analyze_voice(audio):
105
 
106
  return label, round(confidence.item(), 4)
107
 
108
-
109
  # ======================================================
110
  # ENDPOINTS
111
  # ======================================================
@@ -113,7 +131,6 @@ def analyze_voice(audio):
113
  def health():
114
  return {"status": "ok", "device": DEVICE}
115
 
116
-
117
  @app.post("/predict")
118
  async def predict(
119
  request: AudioRequest,
 
1
  # ======================================================
2
+ # HCL AI VOICE DETECTION API – FINAL WORKING VERSION
3
  # ======================================================
4
 
5
  import base64
6
  import io
7
  import logging
8
+ import numpy as np
9
  import torch
10
  import soundfile as sf
11
+ import librosa
12
 
13
  from fastapi import FastAPI, HTTPException, Depends, Security
14
  from fastapi.middleware.cors import CORSMiddleware
 
23
  API_KEY_NAME = "access_token"
24
  API_KEY_VALUE = "HCL_SECURE_KEY_2026"
25
 
 
26
  MODEL_ID = "superb/wav2vec2-base-superb-ks"
27
  TARGET_SR = 16000
28
 
 
64
  class AudioRequest(BaseModel):
65
  audio_base64: str
66
 
 
67
  # ======================================================
68
  # SECURITY
69
  # ======================================================
 
72
  raise HTTPException(status_code=403, detail="Invalid API Key")
73
  return api_key
74
 
 
75
  # ======================================================
76
+ # AUDIO DECODING (ROBUST – AUTO FIXES SAMPLE RATE)
77
  # ======================================================
78
  def decode_audio(b64_audio: str):
79
  try:
80
+ # Decode Base64
81
  audio_bytes = base64.b64decode(b64_audio.split(",")[-1])
82
+
83
+ # Read audio
84
  audio, sr = sf.read(io.BytesIO(audio_bytes))
85
+
86
+ # Stereo → mono
87
+ if audio.ndim > 1:
88
+ audio = np.mean(audio, axis=1)
89
+
90
+ # Resample ANY rate → 16kHz
91
  if sr != TARGET_SR:
92
+ audio = librosa.resample(
93
+ audio.astype(float),
94
+ orig_sr=sr,
95
+ target_sr=TARGET_SR
96
+ )
97
+
98
  return audio
 
 
99
 
100
+ except Exception as e:
101
+ raise HTTPException(
102
+ status_code=400,
103
+ detail=f"Audio decode failed: {str(e)}"
104
+ )
105
 
106
+ # ======================================================
107
+ # INFERENCE
108
+ # ======================================================
109
  def analyze_voice(audio):
110
  inputs = feature_extractor(
111
  audio,
 
124
 
125
  return label, round(confidence.item(), 4)
126
 
 
127
  # ======================================================
128
  # ENDPOINTS
129
  # ======================================================
 
131
  def health():
132
  return {"status": "ok", "device": DEVICE}
133
 
 
134
  @app.post("/predict")
135
  async def predict(
136
  request: AudioRequest,