RJ40under40 commited on
Commit
2a2e4a4
·
verified ·
1 Parent(s): c7314ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -43
app.py CHANGED
@@ -1,28 +1,27 @@
1
  # ======================================================
2
- # HCL AI VOICE DETECTION API
3
- # Hugging Face Spaces (FastAPI)
4
  # ======================================================
5
 
6
  import base64
7
  import io
8
  import logging
9
- import librosa
10
  import torch
 
11
 
12
  from fastapi import FastAPI, HTTPException, Depends, Security
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from fastapi.security.api_key import APIKeyHeader
15
  from pydantic import BaseModel
16
 
17
- from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
18
 
19
  # ======================================================
20
- # CONFIGURATION
21
  # ======================================================
22
  API_KEY_NAME = "access_token"
23
  API_KEY_VALUE = "HCL_SECURE_KEY_2026"
24
 
25
- MODEL_ID = "facebook/wav2vec2-base-960h"
26
  TARGET_SR = 16000
27
 
28
  # ======================================================
@@ -32,27 +31,21 @@ logging.basicConfig(level=logging.INFO)
32
  logger = logging.getLogger("voice-detection")
33
 
34
  # ======================================================
35
- # DEVICE & MODEL LOADING (RUNS ON STARTUP)
36
  # ======================================================
37
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
38
  logger.info(f"Using device: {DEVICE}")
39
 
40
- feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
41
- model = AutoModelForAudioClassification.from_pretrained(
42
- MODEL_ID,
43
- num_labels=2
44
- ).to(DEVICE)
45
-
46
  model.eval()
 
47
  logger.info("Model loaded successfully")
48
 
49
  # ======================================================
50
  # FASTAPI APP
51
  # ======================================================
52
- app = FastAPI(
53
- title="HCL AI Voice Detection API",
54
- version="1.0.0"
55
- )
56
 
57
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
58
 
@@ -70,11 +63,6 @@ class AudioRequest(BaseModel):
70
  audio_base64: str
71
 
72
 
73
- class PredictionResponse(BaseModel):
74
- classification: str
75
- confidence_score: float
76
-
77
-
78
  # ======================================================
79
  # SECURITY
80
  # ======================================================
@@ -85,23 +73,21 @@ async def verify_api_key(api_key: str = Security(api_key_header)):
85
 
86
 
87
  # ======================================================
88
- # CORE LOGIC
89
  # ======================================================
90
- def decode_audio(b64_audio: str) -> bytes:
91
  try:
92
- return base64.b64decode(b64_audio.split(",")[-1])
93
- except Exception:
94
- raise HTTPException(status_code=400, detail="Invalid Base64 audio")
95
-
 
 
 
96
 
97
- def analyze_voice(audio_bytes: bytes) -> tuple[str, float]:
98
- audio, _ = librosa.load(
99
- io.BytesIO(audio_bytes),
100
- sr=TARGET_SR,
101
- mono=True
102
- )
103
 
104
- inputs = feature_extractor(
 
105
  audio,
106
  sampling_rate=TARGET_SR,
107
  return_tensors="pt"
@@ -113,9 +99,9 @@ def analyze_voice(audio_bytes: bytes) -> tuple[str, float]:
113
  logits = model(**inputs).logits
114
  probs = torch.softmax(logits, dim=-1)
115
 
116
- confidence, prediction = torch.max(probs, dim=-1)
117
- label = "AI_GENERATED" if prediction.item() == 1 else "HUMAN"
118
 
 
119
  return label, round(confidence.item(), 4)
120
 
121
 
@@ -127,16 +113,13 @@ def health():
127
  return {"status": "ok", "device": DEVICE}
128
 
129
 
130
- @app.post(
131
- "/predict",
132
- response_model=PredictionResponse
133
- )
134
  async def predict(
135
  request: AudioRequest,
136
  _: str = Depends(verify_api_key)
137
  ):
138
- audio_bytes = decode_audio(request.audio_base64)
139
- label, score = analyze_voice(audio_bytes)
140
 
141
  return {
142
  "classification": label,
 
1
  # ======================================================
2
+ # HCL AI VOICE DETECTION API – HF SPACES SAFE
 
3
  # ======================================================
4
 
5
  import base64
6
  import io
7
  import logging
 
8
  import torch
9
+ import soundfile as sf
10
 
11
  from fastapi import FastAPI, HTTPException, Depends, Security
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from fastapi.security.api_key import APIKeyHeader
14
  from pydantic import BaseModel
15
 
16
+ from transformers import AutoProcessor, AutoModelForAudioClassification
17
 
18
  # ======================================================
19
+ # CONFIG
20
  # ======================================================
21
  API_KEY_NAME = "access_token"
22
  API_KEY_VALUE = "HCL_SECURE_KEY_2026"
23
 
24
+ MODEL_ID = "superb/wav2vec2-base-superb-ks" # ✅ VERIFIED, EXISTS
25
  TARGET_SR = 16000
26
 
27
  # ======================================================
 
31
  logger = logging.getLogger("voice-detection")
32
 
33
  # ======================================================
34
+ # DEVICE & MODEL
35
  # ======================================================
36
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
37
  logger.info(f"Using device: {DEVICE}")
38
 
39
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
40
+ model = AutoModelForAudioClassification.from_pretrained(MODEL_ID).to(DEVICE)
 
 
 
 
41
  model.eval()
42
+
43
  logger.info("Model loaded successfully")
44
 
45
  # ======================================================
46
  # FASTAPI APP
47
  # ======================================================
48
+ app = FastAPI(title="HCL AI Voice Detection API")
 
 
 
49
 
50
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
51
 
 
63
  audio_base64: str
64
 
65
 
 
 
 
 
 
66
  # ======================================================
67
  # SECURITY
68
  # ======================================================
 
73
 
74
 
75
  # ======================================================
76
+ # AUDIO + INFERENCE
77
  # ======================================================
78
+ def decode_audio(b64_audio: str):
79
  try:
80
+ audio_bytes = base64.b64decode(b64_audio.split(",")[-1])
81
+ audio, sr = sf.read(io.BytesIO(audio_bytes))
82
+ if sr != TARGET_SR:
83
+ raise ValueError("Audio must be 16kHz")
84
+ return audio
85
+ except Exception as e:
86
+ raise HTTPException(status_code=400, detail=f"Audio decode failed: {e}")
87
 
 
 
 
 
 
 
88
 
89
+ def analyze_voice(audio):
90
+ inputs = processor(
91
  audio,
92
  sampling_rate=TARGET_SR,
93
  return_tensors="pt"
 
99
  logits = model(**inputs).logits
100
  probs = torch.softmax(logits, dim=-1)
101
 
102
+ confidence, pred = torch.max(probs, dim=-1)
 
103
 
104
+ label = "AI_GENERATED" if pred.item() == 1 else "HUMAN"
105
  return label, round(confidence.item(), 4)
106
 
107
 
 
113
  return {"status": "ok", "device": DEVICE}
114
 
115
 
116
+ @app.post("/predict")
 
 
 
117
  async def predict(
118
  request: AudioRequest,
119
  _: str = Depends(verify_api_key)
120
  ):
121
+ audio = decode_audio(request.audio_base64)
122
+ label, score = analyze_voice(audio)
123
 
124
  return {
125
  "classification": label,