import os import time import psutil import numpy as np import gradio as gr from resemblyzer import VoiceEncoder, preprocess_wav # Initialize the VoiceEncoder encoder = VoiceEncoder() def analyze_voice_similarity(audio_file1, audio_file2, progress=gr.Progress()): # Update progress for visual feedback progress(0, desc="Starting analysis...") # Validate inputs if not audio_file1 or not audio_file2: return "", "", "", "", "Please upload both audio files" start_time = time.time() # Record start time # Get current process information process = psutil.Process(os.getpid()) # Preprocess audio files try: progress(0.2, desc="Processing first audio file...") wav1 = preprocess_wav(audio_file1) progress(0.4, desc="Processing second audio file...") wav2 = preprocess_wav(audio_file2) except Exception as e: return "", "", "", "", f"Error processing audio files: {str(e)}" # Extract speaker embeddings progress(0.6, desc="Extracting voice embeddings...") embed1 = encoder.embed_utterance(wav1) embed2 = encoder.embed_utterance(wav2) # Calculate cosine similarity between embeddings progress(0.8, desc="Calculating similarity...") similarity = np.dot(embed1, embed2) / (np.linalg.norm(embed1) * np.linalg.norm(embed2)) # Determine if voices are from the same source result = "SAME PERSON" if similarity >= 0.80 else "DIFFERENT PEOPLE" # Get memory usage memory_usage = process.memory_info().rss / (1024 * 1024) # in MB # Calculate execution time execution_time = time.time() - start_time progress(1.0, desc="Analysis complete!") # Format the HTML output for the results display html_output = f"""

Analysis Results

Similarity Score

{similarity:.4f}

Conclusion

{result}

Performance Metrics

Memory Usage

{memory_usage:.2f} MB

Execution Time

{execution_time:.4f} seconds

""" # Return the individual values and the HTML display return ( f"{similarity:.4f}", result, f"{memory_usage:.2f} MB", f"{execution_time:.4f} seconds", "" # No error ) # Create Gradio interface with gr.Blocks(title="Voice Similarity Analyzer", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎤 Voice Similarity Analyzer") gr.Markdown("Upload two audio files to check if they're from the same person. A similarity score >= 0.80 indicates the same speaker.") with gr.Row(): with gr.Column(): audio_input1 = gr.Audio(label="Voice Sample 1", type="filepath") with gr.Column(): audio_input2 = gr.Audio(label="Voice Sample 2", type="filepath") analyze_button = gr.Button("Analyze Voice Similarity", variant="primary") # Error message display error_message = gr.Markdown(visible=True) # Results display section with gr.Group(visible=True): gr.Markdown("## Results will appear here after analysis") with gr.Row(): with gr.Column(): similarity_display = gr.Textbox(label="Similarity Score", value="") with gr.Column(): conclusion_display = gr.Textbox(label="Conclusion", value="") with gr.Row(): with gr.Column(): memory_display = gr.Textbox(label="Memory Usage", value="") with gr.Column(): time_display = gr.Textbox(label="Execution Time", value="") # Handler for the analyze button analyze_button.click( fn=analyze_voice_similarity, inputs=[audio_input1, audio_input2], outputs=[ similarity_display, conclusion_display, memory_display, time_display, error_message ] ) gr.Markdown(""" ## How It Works 1. Upload two voice recordings 2. Click "Analyze Voice Similarity" 3. The app will extract voice embeddings using Resemblyzer 4. The similarity score is calculated using cosine similarity 5. A score >= 0.80 indicates the same speaker ## Performance Metrics - Memory usage shows how much RAM is being used - Execution time measures how long the comparison takes """) # Launch the app if __name__ == "__main__": demo.launch()