RJ40under40 commited on
Commit
c336244
·
verified ·
1 Parent(s): 18828c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -60
app.py CHANGED
@@ -6,37 +6,29 @@ import numpy as np
6
  import torch
7
  import librosa
8
  import uvicorn
9
- from fastapi import FastAPI, HTTPException, Security, Depends
10
- from fastapi.middleware.cors import CORSMiddleware
11
- from fastapi.security.api_key import APIKeyHeader
12
  from pydantic import BaseModel
13
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
14
 
15
  # ======================================================
16
  # CONFIG & HACKATHON SETTINGS
17
  # ======================================================
18
- # Use the Secret "HF_Token" if the model ever becomes restricted
19
  HF_TOKEN = os.getenv("HF_Token")
20
- API_KEY_NAME = "access_token"
21
- API_KEY_VALUE = "HCL_SECURE_KEY_2026"
22
 
23
- # A stable, high-accuracy public model for synthetic voice detection
24
  MODEL_ID = "Hemgg/Deepfake-audio-detection"
25
  TARGET_SR = 16000
26
-
27
- # Mapping model output indices to required Hackathon strings
28
- # Note: Verified against Hemgg model config (0: Fake/AI, 1: Real/Human)
29
  LABEL_MAP = {0: "AI_GENERATED", 1: "HUMAN"}
30
 
31
  logging.basicConfig(level=logging.INFO)
32
- logger = logging.getLogger("hcl-voice-safety")
 
 
33
 
34
  # ======================================================
35
  # MODEL LOADING
36
  # ======================================================
37
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
38
- logger.info(f"Loading model {MODEL_ID} to {DEVICE}...")
39
-
40
  try:
41
  feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID, token=HF_TOKEN)
42
  model = AutoModelForAudioClassification.from_pretrained(MODEL_ID, token=HF_TOKEN).to(DEVICE)
@@ -50,90 +42,89 @@ except Exception as e:
50
  # API SETUP
51
  # ======================================================
52
  app = FastAPI(title="HCL AI Voice Detection API")
53
- api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
54
-
55
- app.add_middleware(
56
- CORSMiddleware,
57
- allow_origins=["*"],
58
- allow_methods=["*"],
59
- allow_headers=["*"],
60
- )
61
 
62
- class AudioRequest(BaseModel):
63
- audio_base64: str
 
 
64
 
65
- # Security layer
66
- async def verify_api_key(api_key: str = Security(api_key_header)):
67
- if api_key != API_KEY_VALUE:
68
- raise HTTPException(status_code=403, detail="Invalid API Key")
69
- return api_key
 
70
 
71
  # ======================================================
72
  # CORE LOGIC
73
  # ======================================================
74
  def preprocess_audio(b64_string: str):
75
- """Processes base64 audio into a normalized 16kHz waveform."""
76
  try:
77
- # Strip potential data URL prefix
78
  if "," in b64_string:
79
  b64_string = b64_string.split(",")[1]
80
 
81
- # Ensure correct padding for base64
82
- missing_padding = len(b64_string) % 4
83
- if missing_padding:
84
- b64_string += "=" * (4 - missing_padding)
85
-
86
  audio_bytes = base64.b64decode(b64_string)
87
 
88
- # Load audio using librosa (backed by ffmpeg for MP3 support)
89
  with io.BytesIO(audio_bytes) as bio:
90
  audio, sr = librosa.load(bio, sr=TARGET_SR)
91
 
92
- # Padding/Stability: Ensure at least 1 second of audio
93
  if len(audio) < TARGET_SR:
94
  audio = np.pad(audio, (0, TARGET_SR - len(audio)))
95
 
96
  return audio.astype(np.float32)
97
  except Exception as e:
98
- logger.error(f"Audio Preprocessing Failed: {e}")
99
- raise ValueError("Decoding failed. Ensure valid Base64 MP3/WAV.")
100
 
101
- @app.get("/")
102
- def root():
103
- return {"status": "online", "model": MODEL_ID}
 
104
 
105
- @app.post("/predict")
106
- async def predict(request: AudioRequest, _: str = Depends(verify_api_key)):
 
 
 
 
 
 
107
  if model is None:
108
- raise HTTPException(status_code=503, detail="Model unavailable.")
109
 
110
  try:
111
- # 1. Convert B64 to raw waveform
112
- waveform = preprocess_audio(request.audio_base64)
113
 
114
- # 2. Extract features and move to GPU/CPU
115
  inputs = feature_extractor(waveform, sampling_rate=TARGET_SR, return_tensors="pt").to(DEVICE)
116
-
117
- # 3. Model Inference (No Gradient Tracking)
118
  with torch.no_grad():
119
  logits = model(**inputs).logits
120
  probs = torch.softmax(logits, dim=-1)
121
 
122
- # 4. Map result to confidence and label
123
  confidence, pred_idx = torch.max(probs, dim=-1)
124
- label = LABEL_MAP.get(int(pred_idx.item()), "UNKNOWN")
 
125
 
 
126
  return {
127
- "classification": label,
128
- "confidence_score": round(float(confidence.item()), 4)
 
 
 
129
  }
130
 
131
- except ValueError as ve:
132
- raise HTTPException(status_code=400, detail=str(ve))
133
  except Exception as e:
134
- logger.exception("Inference error occurred")
135
- raise HTTPException(status_code=500, detail="Internal server error.")
 
 
 
136
 
137
  if __name__ == "__main__":
138
- # Standard port for Hugging Face Spaces
139
  uvicorn.run("app:app", host="0.0.0.0", port=7860)
 
6
  import torch
7
  import librosa
8
  import uvicorn
9
+ from fastapi import FastAPI, HTTPException, Security, Depends, Header
 
 
10
  from pydantic import BaseModel
11
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
12
 
13
  # ======================================================
14
  # CONFIG & HACKATHON SETTINGS
15
  # ======================================================
 
16
  HF_TOKEN = os.getenv("HF_Token")
17
+ API_KEY_VALUE = "sk_test_123456789" # Set your secret key here
 
18
 
19
+ # Using the high-accuracy deepfake detection model
20
  MODEL_ID = "Hemgg/Deepfake-audio-detection"
21
  TARGET_SR = 16000
 
 
 
22
  LABEL_MAP = {0: "AI_GENERATED", 1: "HUMAN"}
23
 
24
  logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger("hcl-voice-detection")
26
+
27
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
28
 
29
  # ======================================================
30
  # MODEL LOADING
31
  # ======================================================
 
 
 
32
  try:
33
  feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID, token=HF_TOKEN)
34
  model = AutoModelForAudioClassification.from_pretrained(MODEL_ID, token=HF_TOKEN).to(DEVICE)
 
42
  # API SETUP
43
  # ======================================================
44
  app = FastAPI(title="HCL AI Voice Detection API")
 
 
 
 
 
 
 
 
45
 
46
+ class VoiceRequest(BaseModel):
47
+ language: str
48
+ audioFormat: str
49
+ audioBase64: str
50
 
51
+ # Security Layer: Checks for 'x-api-key' in headers
52
+ async def verify_api_key(x_api_key: str = Header(None)):
53
+ if x_api_key != API_KEY_VALUE:
54
+ # Standard Hackathon error response for auth
55
+ raise HTTPException(status_code=403, detail="Invalid API key or malformed request")
56
+ return x_api_key
57
 
58
  # ======================================================
59
  # CORE LOGIC
60
  # ======================================================
61
  def preprocess_audio(b64_string: str):
 
62
  try:
63
+ # Clean potential data prefixes
64
  if "," in b64_string:
65
  b64_string = b64_string.split(",")[1]
66
 
67
+ # Base64 Decoding
 
 
 
 
68
  audio_bytes = base64.b64decode(b64_string)
69
 
70
+ # Load via librosa for robust MP3 support
71
  with io.BytesIO(audio_bytes) as bio:
72
  audio, sr = librosa.load(bio, sr=TARGET_SR)
73
 
74
+ # Padding/Normalization
75
  if len(audio) < TARGET_SR:
76
  audio = np.pad(audio, (0, TARGET_SR - len(audio)))
77
 
78
  return audio.astype(np.float32)
79
  except Exception as e:
80
+ logger.error(f"Preprocessing error: {e}")
81
+ raise ValueError("Invalid audio data")
82
 
83
+ def generate_explanation(classification: str, confidence: float):
84
+ if classification == "AI_GENERATED":
85
+ return "Unnatural pitch consistency and robotic speech patterns detected in the spectral analysis."
86
+ return "Natural prosody and human-like frequency variance identified."
87
 
88
+ # ======================================================
89
+ # ENDPOINTS
90
+ # ======================================================
91
+ @app.post("/api/voice-detection")
92
+ async def voice_detection(
93
+ request: VoiceRequest,
94
+ auth: str = Depends(verify_api_key)
95
+ ):
96
  if model is None:
97
+ return {"status": "error", "message": "Model not available"}
98
 
99
  try:
100
+ # 1. Audio Processing
101
+ waveform = preprocess_audio(request.audioBase64)
102
 
103
+ # 2. Inference
104
  inputs = feature_extractor(waveform, sampling_rate=TARGET_SR, return_tensors="pt").to(DEVICE)
 
 
105
  with torch.no_grad():
106
  logits = model(**inputs).logits
107
  probs = torch.softmax(logits, dim=-1)
108
 
 
109
  confidence, pred_idx = torch.max(probs, dim=-1)
110
+ classification = LABEL_MAP.get(int(pred_idx.item()), "UNKNOWN")
111
+ score = round(float(confidence.item()), 2)
112
 
113
+ # 3. Response Generation (Matches Hackathon Format)
114
  return {
115
+ "status": "success",
116
+ "language": request.language,
117
+ "classification": classification,
118
+ "confidenceScore": score,
119
+ "explanation": generate_explanation(classification, score)
120
  }
121
 
 
 
122
  except Exception as e:
123
+ logger.error(f"Inference error: {e}")
124
+ return {
125
+ "status": "error",
126
+ "message": "Malformed request or processing error"
127
+ }
128
 
129
  if __name__ == "__main__":
 
130
  uvicorn.run("app:app", host="0.0.0.0", port=7860)