File size: 2,712 Bytes
e633ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""app_voice.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1op-dtpDLHXAJm53Q-2S04nNsQGjcz18G
"""

import os
import numpy as np
import librosa
import gradio as gr
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

# Load trained model
model = load_model("voice_verifier_model.h5")

# Load label encoder
encoder = LabelEncoder()
encoder.classes_ = np.array(['Fake', 'Real'])  # Adjust if your label order is different

# Feature extraction
def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, duration=3, offset=0.5)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        return np.mean(mfccs.T, axis=0)
    except Exception as e:
        print("Audio processing error:", e)
        return None

# Prediction function
def predict_audio(file):
    features = extract_features(file)
    if features is None:
        return "⚠️ **Oops! Couldn't understand the audio. Try again with a clear `.wav` file.**"

    features = features.reshape(1, -1)
    probs = model.predict(features)[0]
    index = np.argmax(probs)
    label = encoder.inverse_transform([index])[0]
    confidence = round(probs[index] * 100, 2)

    if label.lower() == "real":
        emoji = "πŸ§ πŸ—£οΈ"
        msg = f"{emoji} **Real Human Voice Detected!**\n🟒 Confidence: **{confidence}%**"
        advice = "βœ… No robots here. It's a real person!"
    else:
        emoji = "πŸ€–πŸŽ™οΈ"
        msg = f"{emoji} **AI-Generated Voice Detected!**\nπŸ”΄ Confidence: **{confidence}%**"
        advice = "⚠️ Synthetic voice detected. Be cautious!"

    return f"{msg}\n\n{advice}"

# App description
description = """
πŸŽ™οΈ Welcome to **Voice Verifier 3000**
πŸ” Detect whether a voice is **REAL** or **AI-generated** using a deep learning model trained on human vs synthetic audio.

---

### πŸ€– Why Use This?
- πŸ›‘οΈ Catch deepfake voices in seconds
- πŸŽ™οΈ Validate voiceovers, interviews, or online calls
- πŸ” Useful for researchers, content moderators, or just curious minds

---

### πŸ“‚ How to Use:
1. Upload a `.wav` file (3–5 seconds)
2. Click **Submit**
3. Instantly see the voice verdict with confidence level!

---

πŸ”₯ Built with ❀️ using TensorFlow + Librosa + Gradio
"""

# Gradio UI
gr.Interface(
    fn=predict_audio,
    inputs=gr.Audio(type="filepath", label="πŸ“‚ Upload your voice (.wav only)"),
    outputs="markdown",
    title="🧠 Voice Verifier 3000: Human vs AI Voice Detector",
    description=description,
    theme="default"
).launch()