Spaces:
Sleeping
Sleeping
File size: 2,712 Bytes
e633ada |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# -*- coding: utf-8 -*-
"""app_voice.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1op-dtpDLHXAJm53Q-2S04nNsQGjcz18G
"""
import os
import numpy as np
import librosa
import gradio as gr
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")
# Load trained model
model = load_model("voice_verifier_model.h5")
# Load label encoder
encoder = LabelEncoder()
encoder.classes_ = np.array(['Fake', 'Real']) # Adjust if your label order is different
# Feature extraction
def extract_features(file_path):
try:
audio, sample_rate = librosa.load(file_path, duration=3, offset=0.5)
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
return np.mean(mfccs.T, axis=0)
except Exception as e:
print("Audio processing error:", e)
return None
# Prediction function
def predict_audio(file):
features = extract_features(file)
if features is None:
return "β οΈ **Oops! Couldn't understand the audio. Try again with a clear `.wav` file.**"
features = features.reshape(1, -1)
probs = model.predict(features)[0]
index = np.argmax(probs)
label = encoder.inverse_transform([index])[0]
confidence = round(probs[index] * 100, 2)
if label.lower() == "real":
emoji = "π§ π£οΈ"
msg = f"{emoji} **Real Human Voice Detected!**\nπ’ Confidence: **{confidence}%**"
advice = "β
No robots here. It's a real person!"
else:
emoji = "π€ποΈ"
msg = f"{emoji} **AI-Generated Voice Detected!**\nπ΄ Confidence: **{confidence}%**"
advice = "β οΈ Synthetic voice detected. Be cautious!"
return f"{msg}\n\n{advice}"
# App description
description = """
ποΈ Welcome to **Voice Verifier 3000**
π Detect whether a voice is **REAL** or **AI-generated** using a deep learning model trained on human vs synthetic audio.
---
### π€ Why Use This?
- π‘οΈ Catch deepfake voices in seconds
- ποΈ Validate voiceovers, interviews, or online calls
- π Useful for researchers, content moderators, or just curious minds
---
### π How to Use:
1. Upload a `.wav` file (3β5 seconds)
2. Click **Submit**
3. Instantly see the voice verdict with confidence level!
---
π₯ Built with β€οΈ using TensorFlow + Librosa + Gradio
"""
# Gradio UI
gr.Interface(
fn=predict_audio,
inputs=gr.Audio(type="filepath", label="π Upload your voice (.wav only)"),
outputs="markdown",
title="π§ Voice Verifier 3000: Human vs AI Voice Detector",
description=description,
theme="default"
).launch() |