Spaces:

ROSHANNN123
/

voicedetectionapi

Sleeping

App Files Files Community

ROSHANNN123 commited on Feb 5

Commit

922c67e

verified ·

1 Parent(s): 045c92b

Update model_service.py

Browse files

Files changed (1) hide show

model_service.py +10 -15

model_service.py CHANGED Viewed

@@ -11,25 +11,23 @@ MODEL_NAME = "Hemgg/Deepfake-audio-detection"
 class ModelService:
     def __init__(self):
-        print(f"Loading model: {MODEL_NAME}...")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
         self.model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME).to(self.device)
-        print(f"Model loaded on {self.device}")
     def preprocess_audio(self, audio_bytes):
-        # Using a temporary file is the most robust way to handle MP3/WAV with FFmpeg
         fd, tmp_path = tempfile.mkstemp(suffix=".audio")
         try:
             with os.fdopen(fd, 'wb') as tmp:
                 tmp.write(audio_bytes)
-            # Load and resample to 16kHz
             speech, _ = librosa.load(tmp_path, sr=16000)
             return speech
         except Exception as e:
-            print(f"Error processing audio: {e}")
-            raise ValueError(f"Invalid audio format: {str(e)}")
         finally:
             if os.path.exists(tmp_path):
                 os.remove(tmp_path)
@@ -45,16 +43,13 @@ class ModelService:
         probs = F.softmax(logits, dim=-1)
         id2label = self.model.config.id2label
         predicted_id = torch.argmax(probs, dim=-1).item()
-        predicted_label = id2label[predicted_id]
-        confidence = probs[0][predicted_id].item()
-        lower_label = predicted_label.lower()
-        if "real" in lower_label or "human" in lower_label or "bonafide" in lower_label:
-            return "HUMAN", confidence
-        else:
-            return "AI_GENERATED", confidence
-# Singleton
 model_service = None
 def get_model_service():
     global model_service

 class ModelService:
     def __init__(self):
+        print("Loading AI Model...")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
         self.model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME).to(self.device)
     def preprocess_audio(self, audio_bytes):
+        # Temp file is the safest way to read MP3/WAV/OGG on cloud servers
         fd, tmp_path = tempfile.mkstemp(suffix=".audio")
         try:
             with os.fdopen(fd, 'wb') as tmp:
                 tmp.write(audio_bytes)
+            # Load and resample to 16kHz (Standard for Wav2Vec2)
             speech, _ = librosa.load(tmp_path, sr=16000)
             return speech
         except Exception as e:
+            raise ValueError(f"Audio processing failed: {str(e)}")
         finally:
             if os.path.exists(tmp_path):
                 os.remove(tmp_path)
         probs = F.softmax(logits, dim=-1)
         id2label = self.model.config.id2label
         predicted_id = torch.argmax(probs, dim=-1).item()
+        # Mapping to Portal Labels
+        lbl = id2label[predicted_id].lower()
+        if "real" in lbl or "human" in lbl or "bonafide" in lbl:
+            return "HUMAN", probs[0][predicted_id].item()
+        return "AI_GENERATED", probs[0][predicted_id].item()
 model_service = None
 def get_model_service():
     global model_service