Spaces:
Running
Running
eaysu commited on
Commit ·
92a5582
1
Parent(s): 03996b3
initial commit
Browse files- .gitignore +41 -0
- .gradio/certificate.pem +31 -0
- README.md +66 -5
- app.py +166 -0
- requirements.txt +10 -0
- speech_brain_text.py +17 -0
- voice_temp_1.wav +1 -0
- voice_temp_2.wav +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
ENV/
|
| 10 |
+
build/
|
| 11 |
+
develop-eggs/
|
| 12 |
+
dist/
|
| 13 |
+
downloads/
|
| 14 |
+
eggs/
|
| 15 |
+
.eggs/
|
| 16 |
+
lib/
|
| 17 |
+
lib64/
|
| 18 |
+
parts/
|
| 19 |
+
sdist/
|
| 20 |
+
var/
|
| 21 |
+
wheels/
|
| 22 |
+
*.egg-info/
|
| 23 |
+
.installed.cfg
|
| 24 |
+
*.egg
|
| 25 |
+
|
| 26 |
+
# Model cache
|
| 27 |
+
ecapa/
|
| 28 |
+
|
| 29 |
+
# Gradio
|
| 30 |
+
flagged/
|
| 31 |
+
|
| 32 |
+
# IDE
|
| 33 |
+
.vscode/
|
| 34 |
+
.idea/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
*~
|
| 38 |
+
|
| 39 |
+
# OS
|
| 40 |
+
.DS_Store
|
| 41 |
+
Thumbs.db
|
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
README.md
CHANGED
|
@@ -1,12 +1,73 @@
|
|
| 1 |
---
|
| 2 |
title: Voice Similarity Checker
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Voice Similarity Checker
|
| 3 |
+
emoji: 🎙️
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: violet
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 🎙️ Voice Similarity Checker
|
| 14 |
+
|
| 15 |
+
Compare two voice samples using advanced speaker recognition AI powered by SpeechBrain's ECAPA-TDNN model.
|
| 16 |
+
|
| 17 |
+
## 🚀 Quick Start
|
| 18 |
+
|
| 19 |
+
### Run Locally
|
| 20 |
+
```bash
|
| 21 |
+
# Install dependencies
|
| 22 |
+
pip install -r requirements.txt
|
| 23 |
+
|
| 24 |
+
# Launch the Gradio interface
|
| 25 |
+
python app.py
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
### Run Original Script
|
| 29 |
+
```bash
|
| 30 |
+
python speech_brain_text.py
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## ✨ Features
|
| 34 |
+
|
| 35 |
+
- 🎤 **Compare Any Audio Files**: Upload two voice samples in any format (WAV, MP3, FLAC, etc.)
|
| 36 |
+
- 📊 **Similarity Score**: Get a precise similarity score and speaker verification
|
| 37 |
+
- ⚡ **Performance Metrics**: View detailed metrics including:
|
| 38 |
+
- Elapsed processing time
|
| 39 |
+
- Memory usage statistics
|
| 40 |
+
- CPU utilization
|
| 41 |
+
- 🎨 **Modern UI**: Beautiful, responsive Gradio interface with gradient themes
|
| 42 |
+
|
| 43 |
+
## 📖 How to Use
|
| 44 |
+
|
| 45 |
+
1. Upload your first audio file in the "Voice Sample 1" section
|
| 46 |
+
2. Upload your second audio file in the "Voice Sample 2" section
|
| 47 |
+
3. Click the "Compare Voices" button
|
| 48 |
+
4. View the similarity results and performance metrics
|
| 49 |
+
|
| 50 |
+
## 🤖 Model Information
|
| 51 |
+
|
| 52 |
+
This application uses the **SpeechBrain ECAPA-TDNN** model:
|
| 53 |
+
- Model: `speechbrain/spkrec-ecapa-voxceleb`
|
| 54 |
+
- Architecture: ECAPA-TDNN
|
| 55 |
+
- Training Data: VoxCeleb corpus
|
| 56 |
+
|
| 57 |
+
## 📦 Project Structure
|
| 58 |
+
|
| 59 |
+
- `app.py` - Modern Gradio web interface
|
| 60 |
+
- `speech_brain_text.py` - Original CLI script
|
| 61 |
+
- `requirements.txt` - Python dependencies
|
| 62 |
+
- `README.md` - This file
|
| 63 |
+
|
| 64 |
+
## 🌐 Deploy to Hugging Face Spaces
|
| 65 |
+
|
| 66 |
+
This app is ready to deploy to Hugging Face Spaces. Simply:
|
| 67 |
+
1. Create a new Space on Hugging Face
|
| 68 |
+
2. Upload all files from this repository
|
| 69 |
+
3. The app will automatically launch!
|
| 70 |
+
|
| 71 |
+
## 📄 License
|
| 72 |
+
|
| 73 |
+
MIT License
|
app.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from speechbrain.inference.speaker import SpeakerRecognition
|
| 3 |
+
import time
|
| 4 |
+
import psutil
|
| 5 |
+
import os
|
| 6 |
+
import tracemalloc
|
| 7 |
+
import tempfile
|
| 8 |
+
import shutil
|
| 9 |
+
|
| 10 |
+
# Initialize the model globally
|
| 11 |
+
print("Loading SpeechBrain model...")
|
| 12 |
+
model = SpeakerRecognition.from_hparams(
|
| 13 |
+
source="speechbrain/spkrec-ecapa-voxceleb",
|
| 14 |
+
savedir="ecapa"
|
| 15 |
+
)
|
| 16 |
+
print("Model loaded successfully!")
|
| 17 |
+
|
| 18 |
+
def format_bytes(bytes_value):
|
| 19 |
+
"""Convert bytes to human-readable format"""
|
| 20 |
+
for unit in ['B', 'KB', 'MB', 'GB']:
|
| 21 |
+
if bytes_value < 1024.0:
|
| 22 |
+
return f"{bytes_value:.2f} {unit}"
|
| 23 |
+
bytes_value /= 1024.0
|
| 24 |
+
return f"{bytes_value:.2f} TB"
|
| 25 |
+
|
| 26 |
+
def compare_voices(audio1, audio2):
|
| 27 |
+
"""
|
| 28 |
+
Compare two voice files and return similarity score with metrics
|
| 29 |
+
"""
|
| 30 |
+
if audio1 is None or audio2 is None:
|
| 31 |
+
return "Please upload both audio files", "", "", "", "", "", "", "", ""
|
| 32 |
+
|
| 33 |
+
temp_file1 = None
|
| 34 |
+
temp_file2 = None
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Create temporary copies of the audio files
|
| 38 |
+
temp_dir = tempfile.gettempdir()
|
| 39 |
+
|
| 40 |
+
# Get original filenames for display
|
| 41 |
+
original_name1 = os.path.basename(audio1)
|
| 42 |
+
original_name2 = os.path.basename(audio2)
|
| 43 |
+
|
| 44 |
+
# Create temp files with original extensions
|
| 45 |
+
ext1 = os.path.splitext(audio1)[1]
|
| 46 |
+
ext2 = os.path.splitext(audio2)[1]
|
| 47 |
+
|
| 48 |
+
temp_file1 = os.path.join(temp_dir, f"voice_temp_1{ext1}")
|
| 49 |
+
temp_file2 = os.path.join(temp_dir, f"voice_temp_2{ext2}")
|
| 50 |
+
|
| 51 |
+
# Copy to temp location
|
| 52 |
+
shutil.copy2(audio1, temp_file1)
|
| 53 |
+
shutil.copy2(audio2, temp_file2)
|
| 54 |
+
|
| 55 |
+
# Start tracking metrics
|
| 56 |
+
tracemalloc.start()
|
| 57 |
+
process = psutil.Process(os.getpid())
|
| 58 |
+
mem_before = process.memory_info().rss
|
| 59 |
+
start_time = time.time()
|
| 60 |
+
|
| 61 |
+
# Perform voice comparison using temp files
|
| 62 |
+
score, prediction = model.verify_files(temp_file1, temp_file2)
|
| 63 |
+
|
| 64 |
+
# Calculate metrics
|
| 65 |
+
elapsed_time = time.time() - start_time
|
| 66 |
+
current, peak = tracemalloc.get_traced_memory()
|
| 67 |
+
tracemalloc.stop()
|
| 68 |
+
mem_after = process.memory_info().rss
|
| 69 |
+
mem_used = mem_after - mem_before
|
| 70 |
+
|
| 71 |
+
# Format results
|
| 72 |
+
similarity_score = score.item()
|
| 73 |
+
is_same_speaker = "Yes" if prediction.item() else "No"
|
| 74 |
+
|
| 75 |
+
# Determine interpretation based on score
|
| 76 |
+
# The model uses 0.25 as threshold for same/different speaker decision
|
| 77 |
+
if similarity_score > 0.25:
|
| 78 |
+
interpretation = f"✅ Same Speaker (Score above threshold: {similarity_score:.4f} > 0.25)"
|
| 79 |
+
else:
|
| 80 |
+
interpretation = f"❌ Different Speakers (Score below threshold: {similarity_score:.4f} ≤ 0.25)"
|
| 81 |
+
|
| 82 |
+
# Return individual values
|
| 83 |
+
return (
|
| 84 |
+
f"{similarity_score:.4f}",
|
| 85 |
+
is_same_speaker,
|
| 86 |
+
interpretation,
|
| 87 |
+
original_name1,
|
| 88 |
+
original_name2,
|
| 89 |
+
f"{elapsed_time:.3f} seconds",
|
| 90 |
+
format_bytes(mem_used),
|
| 91 |
+
format_bytes(peak),
|
| 92 |
+
f"{process.cpu_percent():.1f}%"
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
return f"Error: {str(e)}", "", "", "", "", "", "", "", ""
|
| 97 |
+
|
| 98 |
+
finally:
|
| 99 |
+
# Clean up temporary files
|
| 100 |
+
if temp_file1 and os.path.exists(temp_file1):
|
| 101 |
+
try:
|
| 102 |
+
os.remove(temp_file1)
|
| 103 |
+
except:
|
| 104 |
+
pass
|
| 105 |
+
if temp_file2 and os.path.exists(temp_file2):
|
| 106 |
+
try:
|
| 107 |
+
os.remove(temp_file2)
|
| 108 |
+
except:
|
| 109 |
+
pass
|
| 110 |
+
|
| 111 |
+
# Create Gradio interface
|
| 112 |
+
demo = gr.Blocks()
|
| 113 |
+
|
| 114 |
+
with demo:
|
| 115 |
+
gr.Markdown("# Voice Similarity Checker")
|
| 116 |
+
gr.Markdown("Compare two voice samples using SpeechBrain ECAPA-TDNN speaker recognition")
|
| 117 |
+
|
| 118 |
+
with gr.Row():
|
| 119 |
+
audio1 = gr.Audio(label="Voice Sample 1", type="filepath")
|
| 120 |
+
audio2 = gr.Audio(label="Voice Sample 2", type="filepath")
|
| 121 |
+
|
| 122 |
+
compare_btn = gr.Button("Compare Voices", variant="primary")
|
| 123 |
+
|
| 124 |
+
gr.Markdown("""
|
| 125 |
+
## Score Interpretation Guide
|
| 126 |
+
The model uses **cosine similarity** with a threshold of **0.25**:
|
| 127 |
+
- **Score > 0.25**: ✅ **Same Speaker** (voices match)
|
| 128 |
+
- **Score ≤ 0.25**: ❌ **Different Speakers** (voices don't match)
|
| 129 |
+
|
| 130 |
+
*Higher scores indicate greater similarity. Scores range from -1 to 1, but typically fall between 0 and 1 for voice comparisons.*
|
| 131 |
+
""")
|
| 132 |
+
|
| 133 |
+
gr.Markdown("## Results")
|
| 134 |
+
|
| 135 |
+
with gr.Row():
|
| 136 |
+
with gr.Column():
|
| 137 |
+
gr.Markdown("### Similarity Analysis")
|
| 138 |
+
similarity_score = gr.Textbox(label="Similarity Score", interactive=False)
|
| 139 |
+
same_speaker = gr.Textbox(label="Same Speaker (Model Prediction)", interactive=False)
|
| 140 |
+
interpretation = gr.Textbox(label="Interpretation", interactive=False)
|
| 141 |
+
file1_name = gr.Textbox(label="Audio File 1", interactive=False)
|
| 142 |
+
file2_name = gr.Textbox(label="Audio File 2", interactive=False)
|
| 143 |
+
|
| 144 |
+
with gr.Column():
|
| 145 |
+
gr.Markdown("### Performance Metrics")
|
| 146 |
+
elapsed_time = gr.Textbox(label="Elapsed Time", interactive=False)
|
| 147 |
+
memory_used = gr.Textbox(label="Memory Used", interactive=False)
|
| 148 |
+
peak_memory = gr.Textbox(label="Peak Memory", interactive=False)
|
| 149 |
+
cpu_usage = gr.Textbox(label="CPU Usage", interactive=False)
|
| 150 |
+
|
| 151 |
+
# Event handler
|
| 152 |
+
compare_btn.click(
|
| 153 |
+
fn=compare_voices,
|
| 154 |
+
inputs=[audio1, audio2],
|
| 155 |
+
outputs=[similarity_score, same_speaker, interpretation, file1_name, file2_name,
|
| 156 |
+
elapsed_time, memory_used, peak_memory, cpu_usage]
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# Launch the app
|
| 160 |
+
if __name__ == "__main__":
|
| 161 |
+
demo.launch(
|
| 162 |
+
share=True, # Creates a public link for Hugging Face Spaces
|
| 163 |
+
server_name="0.0.0.0",
|
| 164 |
+
server_port=7860,
|
| 165 |
+
show_error=True
|
| 166 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
speechbrain==1.0.0
|
| 3 |
+
torch==2.1.0
|
| 4 |
+
torchaudio==2.1.0
|
| 5 |
+
numpy<2.0.0
|
| 6 |
+
huggingface-hub<1.0.0
|
| 7 |
+
psutil>=5.9.0
|
| 8 |
+
soundfile>=0.12.1
|
| 9 |
+
librosa>=0.10.0
|
| 10 |
+
requests
|
speech_brain_text.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from speechbrain.inference.speaker import SpeakerRecognition
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
model = SpeakerRecognition.from_hparams(
|
| 5 |
+
source="speechbrain/spkrec-ecapa-voxceleb",
|
| 6 |
+
savedir="ecapa"
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
file1 = "/Users/enesaysu/Desktop/voices/enes_angry.wav"
|
| 10 |
+
file2 = "/Users/enesaysu/Desktop/voices/enes_sad.wav"
|
| 11 |
+
|
| 12 |
+
score, prediction = model.verify_files(file1, file2)
|
| 13 |
+
|
| 14 |
+
print(f"\nVoice File 1: {os.path.basename(file1)}")
|
| 15 |
+
print(f"Voice File 2: {os.path.basename(file2)}")
|
| 16 |
+
print(f"\nSimilarity Score: {score.item():.4f}")
|
| 17 |
+
print(f"Same Speaker: {'Yes' if prediction.item() else 'No'}")
|
voice_temp_1.wav
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/var/folders/gt/qsgfz6rd2rz8ssq4jtdxsgdc0000gn/T/voice_temp_1.wav
|
voice_temp_2.wav
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/var/folders/gt/qsgfz6rd2rz8ssq4jtdxsgdc0000gn/T/voice_temp_2.wav
|