Spaces:

mariam-ahmed15
/

Deepfake-audio-detection

Running

App Files Files Community

mariam-ahmed15 commited on about 22 hours ago

Commit

3fc49d8

verified ·

1 Parent(s): b7e88e7

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -5,34 +5,31 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtra
 # 1. CONFIGURATION
 MODEL_ID = "facebook/wav2vec2-xls-r-300m"
-QUANTIZED_MODEL_PATH = "quantized_model.pth"
-# 2. LOAD MODEL
 print("Loading model architecture...")
-# A. Load the skeleton (empty weights)
 model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2)
 feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)
-# B. Apply the quantization structure (Must happen BEFORE loading weights)
-# This changes the Linear layers to INT8 format so the keys match
 model = torch.quantization.quantize_dynamic(
     model, {torch.nn.Linear}, dtype=torch.qint8
 )
-# C. Load your trained quantized weights
 print("Loading quantized weights...")
 model.load_state_dict(torch.load(QUANTIZED_MODEL_PATH, map_location=torch.device('cpu')))
 model.eval()
-# 3. DEFINE PREDICTION FUNCTION
 def predict_audio(audio_path):
     if audio_path is None:
         return "No Audio Provided"
-    # Load and resample audio to 16kHz
     speech_array, sr = librosa.load(audio_path, sr=16000)
-    # Process inputs
     inputs = feature_extractor(
         speech_array,
         sampling_rate=16000,
@@ -43,10 +40,9 @@ def predict_audio(audio_path):
     with torch.no_grad():
         logits = model(**inputs).logits
-    # Convert logits to probabilities
     probs = torch.nn.functional.softmax(logits, dim=-1)
-    # Assuming Label 0 = Real, Label 1 = Deepfake (Adjust based on your training!)
     fake_prob = probs[0][1].item()
     real_prob = probs[0][0].item()
@@ -55,14 +51,17 @@ def predict_audio(audio_path):
         "Real": real_prob
     }
-# 4. CREATE API INTERFACE
-# This creates a visual UI *and* a hidden API endpoint
 iface = gr.Interface(
     fn=predict_audio,
-    inputs=gr.Audio(type="filepath"),
     outputs=gr.Label(num_top_classes=2),
     title="Deepfake Audio Detection API",
-    description="Upload an audio file to check if it's real or fake."
 )
 iface.launch()

 # 1. CONFIGURATION
 MODEL_ID = "facebook/wav2vec2-xls-r-300m"
+QUANTIZED_MODEL_PATH = "quantized_model.pth"
+# 2. LOAD MODEL
 print("Loading model architecture...")
 model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2)
 feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)
+# Apply quantization structure
 model = torch.quantization.quantize_dynamic(
     model, {torch.nn.Linear}, dtype=torch.qint8
 )
+# Load weights
 print("Loading quantized weights...")
 model.load_state_dict(torch.load(QUANTIZED_MODEL_PATH, map_location=torch.device('cpu')))
 model.eval()
+# 3. PREDICTION FUNCTION
 def predict_audio(audio_path):
     if audio_path is None:
         return "No Audio Provided"
+    # Load and resample
     speech_array, sr = librosa.load(audio_path, sr=16000)
     inputs = feature_extractor(
         speech_array,
         sampling_rate=16000,
     with torch.no_grad():
         logits = model(**inputs).logits
     probs = torch.nn.functional.softmax(logits, dim=-1)
+    # Label 0 = Real, Label 1 = Deepfake (Double check your own labels!)
     fake_prob = probs[0][1].item()
     real_prob = probs[0][0].item()
         "Real": real_prob
     }
+# 4. CREATE INTERFACE (Modified for Upload Only)
 iface = gr.Interface(
     fn=predict_audio,
+    inputs=gr.Audio(
+        sources=["upload"],
+        type="filepath",
+        label="Upload Audio File"
+    ),
     outputs=gr.Label(num_top_classes=2),
     title="Deepfake Audio Detection API",
+    description="Upload an audio file (WAV/MP3) to check if it's real or fake."
 )
 iface.launch()