Spaces:

eaysu
/

Voice_Similarity_Checker

Running

App Files Files Community

eaysu commited on Dec 6, 2025

Commit

92a5582

1 Parent(s): 03996b3

initial commit

Browse files

Files changed (8) hide show

.gitignore +41 -0
.gradio/certificate.pem +31 -0
README.md +66 -5
app.py +166 -0
requirements.txt +10 -0
speech_brain_text.py +17 -0
voice_temp_1.wav +1 -0
voice_temp_2.wav +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,41 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Model cache
+ecapa/
+# Gradio
+flagged/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

README.md CHANGED Viewed

@@ -1,12 +1,73 @@
 ---
 title: Voice Similarity Checker
-emoji: ⚡
-colorFrom: yellow
-colorTo: gray
 sdk: gradio
-sdk_version: 6.0.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Voice Similarity Checker
+emoji: 🎙️
+colorFrom: purple
+colorTo: violet
 sdk: gradio
+sdk_version: 4.0.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# 🎙️ Voice Similarity Checker
+Compare two voice samples using advanced speaker recognition AI powered by SpeechBrain's ECAPA-TDNN model.
+## 🚀 Quick Start
+### Run Locally
+```bash
+# Install dependencies
+pip install -r requirements.txt
+# Launch the Gradio interface
+python app.py
+```
+### Run Original Script
+```bash
+python speech_brain_text.py
+```
+## ✨ Features
+- 🎤 **Compare Any Audio Files**: Upload two voice samples in any format (WAV, MP3, FLAC, etc.)
+- 📊 **Similarity Score**: Get a precise similarity score and speaker verification
+- ⚡ **Performance Metrics**: View detailed metrics including:
+  - Elapsed processing time
+  - Memory usage statistics
+  - CPU utilization
+- 🎨 **Modern UI**: Beautiful, responsive Gradio interface with gradient themes
+## 📖 How to Use
+1. Upload your first audio file in the "Voice Sample 1" section
+2. Upload your second audio file in the "Voice Sample 2" section
+3. Click the "Compare Voices" button
+4. View the similarity results and performance metrics
+## 🤖 Model Information
+This application uses the **SpeechBrain ECAPA-TDNN** model:
+- Model: `speechbrain/spkrec-ecapa-voxceleb`
+- Architecture: ECAPA-TDNN
+- Training Data: VoxCeleb corpus
+## 📦 Project Structure
+- `app.py` - Modern Gradio web interface
+- `speech_brain_text.py` - Original CLI script
+- `requirements.txt` - Python dependencies
+- `README.md` - This file
+## 🌐 Deploy to Hugging Face Spaces
+This app is ready to deploy to Hugging Face Spaces. Simply:
+1. Create a new Space on Hugging Face
+2. Upload all files from this repository
+3. The app will automatically launch!
+## 📄 License
+MIT License

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import gradio as gr
+from speechbrain.inference.speaker import SpeakerRecognition
+import time
+import psutil
+import os
+import tracemalloc
+import tempfile
+import shutil
+# Initialize the model globally
+print("Loading SpeechBrain model...")
+model = SpeakerRecognition.from_hparams(
+    source="speechbrain/spkrec-ecapa-voxceleb",
+    savedir="ecapa"
+)
+print("Model loaded successfully!")
+def format_bytes(bytes_value):
+    """Convert bytes to human-readable format"""
+    for unit in ['B', 'KB', 'MB', 'GB']:
+        if bytes_value < 1024.0:
+            return f"{bytes_value:.2f} {unit}"
+        bytes_value /= 1024.0
+    return f"{bytes_value:.2f} TB"
+def compare_voices(audio1, audio2):
+    """
+    Compare two voice files and return similarity score with metrics
+    """
+    if audio1 is None or audio2 is None:
+        return "Please upload both audio files", "", "", "", "", "", "", "", ""
+    temp_file1 = None
+    temp_file2 = None
+    try:
+        # Create temporary copies of the audio files
+        temp_dir = tempfile.gettempdir()
+        # Get original filenames for display
+        original_name1 = os.path.basename(audio1)
+        original_name2 = os.path.basename(audio2)
+        # Create temp files with original extensions
+        ext1 = os.path.splitext(audio1)[1]
+        ext2 = os.path.splitext(audio2)[1]
+        temp_file1 = os.path.join(temp_dir, f"voice_temp_1{ext1}")
+        temp_file2 = os.path.join(temp_dir, f"voice_temp_2{ext2}")
+        # Copy to temp location
+        shutil.copy2(audio1, temp_file1)
+        shutil.copy2(audio2, temp_file2)
+        # Start tracking metrics
+        tracemalloc.start()
+        process = psutil.Process(os.getpid())
+        mem_before = process.memory_info().rss
+        start_time = time.time()
+        # Perform voice comparison using temp files
+        score, prediction = model.verify_files(temp_file1, temp_file2)
+        # Calculate metrics
+        elapsed_time = time.time() - start_time
+        current, peak = tracemalloc.get_traced_memory()
+        tracemalloc.stop()
+        mem_after = process.memory_info().rss
+        mem_used = mem_after - mem_before
+        # Format results
+        similarity_score = score.item()
+        is_same_speaker = "Yes" if prediction.item() else "No"
+        # Determine interpretation based on score
+        # The model uses 0.25 as threshold for same/different speaker decision
+        if similarity_score > 0.25:
+            interpretation = f"✅ Same Speaker (Score above threshold: {similarity_score:.4f} > 0.25)"
+        else:
+            interpretation = f"❌ Different Speakers (Score below threshold: {similarity_score:.4f} ≤ 0.25)"
+        # Return individual values
+        return (
+            f"{similarity_score:.4f}",
+            is_same_speaker,
+            interpretation,
+            original_name1,
+            original_name2,
+            f"{elapsed_time:.3f} seconds",
+            format_bytes(mem_used),
+            format_bytes(peak),
+            f"{process.cpu_percent():.1f}%"
+        )
+    except Exception as e:
+        return f"Error: {str(e)}", "", "", "", "", "", "", "", ""
+    finally:
+        # Clean up temporary files
+        if temp_file1 and os.path.exists(temp_file1):
+            try:
+                os.remove(temp_file1)
+            except:
+                pass
+        if temp_file2 and os.path.exists(temp_file2):
+            try:
+                os.remove(temp_file2)
+            except:
+                pass
+# Create Gradio interface
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("# Voice Similarity Checker")
+    gr.Markdown("Compare two voice samples using SpeechBrain ECAPA-TDNN speaker recognition")
+    with gr.Row():
+        audio1 = gr.Audio(label="Voice Sample 1", type="filepath")
+        audio2 = gr.Audio(label="Voice Sample 2", type="filepath")
+    compare_btn = gr.Button("Compare Voices", variant="primary")
+    gr.Markdown("""
+    ## Score Interpretation Guide
+    The model uses **cosine similarity** with a threshold of **0.25**:
+    - **Score > 0.25**: ✅ **Same Speaker** (voices match)
+    - **Score ≤ 0.25**: ❌ **Different Speakers** (voices don't match)
+    *Higher scores indicate greater similarity. Scores range from -1 to 1, but typically fall between 0 and 1 for voice comparisons.*
+    """)
+    gr.Markdown("## Results")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Similarity Analysis")
+            similarity_score = gr.Textbox(label="Similarity Score", interactive=False)
+            same_speaker = gr.Textbox(label="Same Speaker (Model Prediction)", interactive=False)
+            interpretation = gr.Textbox(label="Interpretation", interactive=False)
+            file1_name = gr.Textbox(label="Audio File 1", interactive=False)
+            file2_name = gr.Textbox(label="Audio File 2", interactive=False)
+        with gr.Column():
+            gr.Markdown("### Performance Metrics")
+            elapsed_time = gr.Textbox(label="Elapsed Time", interactive=False)
+            memory_used = gr.Textbox(label="Memory Used", interactive=False)
+            peak_memory = gr.Textbox(label="Peak Memory", interactive=False)
+            cpu_usage = gr.Textbox(label="CPU Usage", interactive=False)
+    # Event handler
+    compare_btn.click(
+        fn=compare_voices,
+        inputs=[audio1, audio2],
+        outputs=[similarity_score, same_speaker, interpretation, file1_name, file2_name,
+                elapsed_time, memory_used, peak_memory, cpu_usage]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        share=True,  # Creates a public link for Hugging Face Spaces
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio>=4.0.0
+speechbrain==1.0.0
+torch==2.1.0
+torchaudio==2.1.0
+numpy<2.0.0
+huggingface-hub<1.0.0
+psutil>=5.9.0
+soundfile>=0.12.1
+librosa>=0.10.0
+requests

speech_brain_text.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from speechbrain.inference.speaker import SpeakerRecognition
+import os
+model = SpeakerRecognition.from_hparams(
+    source="speechbrain/spkrec-ecapa-voxceleb",
+    savedir="ecapa"
+)
+file1 = "/Users/enesaysu/Desktop/voices/enes_angry.wav"
+file2 = "/Users/enesaysu/Desktop/voices/enes_sad.wav"
+score, prediction = model.verify_files(file1, file2)
+print(f"\nVoice File 1: {os.path.basename(file1)}")
+print(f"Voice File 2: {os.path.basename(file2)}")
+print(f"\nSimilarity Score: {score.item():.4f}")
+print(f"Same Speaker: {'Yes' if prediction.item() else 'No'}")

voice_temp_1.wav ADDED Viewed

	@@ -0,0 +1 @@


1	+ /var/folders/gt/qsgfz6rd2rz8ssq4jtdxsgdc0000gn/T/voice_temp_1.wav

voice_temp_2.wav ADDED Viewed

	@@ -0,0 +1 @@


1	+ /var/folders/gt/qsgfz6rd2rz8ssq4jtdxsgdc0000gn/T/voice_temp_2.wav