Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Integration tests for speaker separation workflow | |
| Tests end-to-end speaker separation from M4A input to separated outputs. | |
| """ | |
| import os | |
| from pathlib import Path | |
| import pytest | |
| from src.services.speaker_separation import SpeakerSeparationService | |
| class TestSpeakerSeparationWorkflow: | |
| """Integration tests for full speaker separation workflow.""" | |
| def test_audio_dir(self): | |
| """Get test audio fixtures directory.""" | |
| return Path("audio_fixtures/multi_speaker") | |
| def service(self): | |
| """Create speaker separation service.""" | |
| hf_token = os.getenv("HUGGINGFACE_TOKEN") | |
| if not hf_token: | |
| pytest.skip("HUGGINGFACE_TOKEN not set") | |
| return SpeakerSeparationService(hf_token=hf_token) | |
| def test_separate_two_speakers_end_to_end(self, service, test_audio_dir, tmp_path): | |
| """ | |
| Test complete workflow: M4A input -> speaker separation -> M4A outputs. | |
| Success Criteria (SC-001): 85%+ speaker separation accuracy | |
| Success Criteria (SC-002): Processing time <= 2x audio duration | |
| """ | |
| # Check if test audio exists | |
| input_file = test_audio_dir / "two_speakers.m4a" | |
| if not input_file.exists(): | |
| pytest.skip(f"Test audio not found: {input_file}") | |
| # Set output directory | |
| output_dir = tmp_path / "separated" | |
| output_dir.mkdir() | |
| # Run full separation workflow | |
| result = service.separate_and_export( | |
| input_file=str(input_file), output_dir=str(output_dir), min_speakers=2, max_speakers=2 | |
| ) | |
| # Verify results | |
| assert result["speakers_detected"] == 2 | |
| assert result["processing_time_seconds"] > 0 | |
| # Check output files exist | |
| assert len(result["output_files"]) == 2 | |
| for output_info in result["output_files"]: | |
| output_path = Path(output_info["file"]) | |
| assert output_path.exists() | |
| assert output_path.suffix == ".m4a" | |
| assert output_info["duration"] > 0 | |
| # Verify processing time constraint (SC-002) | |
| audio_duration = result["input_duration_seconds"] | |
| processing_time = result["processing_time_seconds"] | |
| assert processing_time <= (audio_duration * 2), ( | |
| f"Processing took {processing_time}s for {audio_duration}s audio (max: {audio_duration * 2}s)" | |
| ) | |
| def test_separate_three_speakers(self, service, test_audio_dir, tmp_path): | |
| """Test separation of audio with 3 speakers.""" | |
| input_file = test_audio_dir / "three_speakers.m4a" | |
| if not input_file.exists(): | |
| pytest.skip(f"Test audio not found: {input_file}") | |
| output_dir = tmp_path / "separated" | |
| output_dir.mkdir() | |
| result = service.separate_and_export( | |
| input_file=str(input_file), | |
| output_dir=str(output_dir), | |
| min_speakers=2, | |
| max_speakers=5, # Allow detection of 3 speakers | |
| ) | |
| # Should detect 3 speakers | |
| assert result["speakers_detected"] == 3 | |
| assert len(result["output_files"]) == 3 | |
| def test_separation_report_generation(self, service, test_audio_dir, tmp_path): | |
| """Test that separation report is generated correctly.""" | |
| input_file = test_audio_dir / "two_speakers.m4a" | |
| if not input_file.exists(): | |
| pytest.skip(f"Test audio not found: {input_file}") | |
| output_dir = tmp_path / "separated" | |
| output_dir.mkdir() | |
| result = service.separate_and_export(input_file=str(input_file), output_dir=str(output_dir)) | |
| # Verify report structure | |
| assert "input_file" in result | |
| assert "speakers_detected" in result | |
| assert "processing_time_seconds" in result | |
| assert "output_files" in result | |
| assert "quality_metrics" in result | |
| # Check quality metrics | |
| assert "average_confidence" in result["quality_metrics"] | |
| assert result["quality_metrics"]["average_confidence"] > 0.5 | |
| def test_quality_preservation(self, service, test_audio_dir, tmp_path): | |
| """ | |
| Test that output quality matches input (SC-007). | |
| Success Criteria (SC-007): No voice quality degradation | |
| """ | |
| input_file = test_audio_dir / "two_speakers.m4a" | |
| if not input_file.exists(): | |
| pytest.skip(f"Test audio not found: {input_file}") | |
| output_dir = tmp_path / "separated" | |
| output_dir.mkdir() | |
| # Get input audio info | |
| from src.lib.audio_io import get_audio_info | |
| input_info = get_audio_info(str(input_file)) | |
| # Run separation | |
| result = service.separate_and_export(input_file=str(input_file), output_dir=str(output_dir)) | |
| # Check output quality | |
| for output_info in result["output_files"]: | |
| output_path = output_info["file"] | |
| output_audio_info = get_audio_info(output_path) | |
| # Sample rate should match or be higher | |
| assert output_audio_info["sample_rate"] >= input_info["sample_rate"] | |
| # Format should be M4A | |
| assert output_audio_info["format"] == "M4A" | |
| def test_progress_reporting(self, service, test_audio_dir, tmp_path): | |
| """Test that progress is reported during processing.""" | |
| input_file = test_audio_dir / "two_speakers.m4a" | |
| if not input_file.exists(): | |
| pytest.skip(f"Test audio not found: {input_file}") | |
| output_dir = tmp_path / "separated" | |
| output_dir.mkdir() | |
| progress_updates = [] | |
| def progress_callback(message, progress): | |
| progress_updates.append((message, progress)) | |
| # Run with progress callback | |
| result = service.separate_and_export( | |
| input_file=str(input_file), | |
| output_dir=str(output_dir), | |
| progress_callback=progress_callback, | |
| ) | |
| # Should have received progress updates | |
| assert len(progress_updates) > 0 | |
| def test_error_handling_missing_file(self, service): | |
| """Test appropriate error handling for missing input file.""" | |
| with pytest.raises(Exception, match="not found|does not exist"): | |
| service.separate_and_export(input_file="nonexistent.m4a", output_dir="/tmp/output") | |
| def test_error_handling_invalid_format(self, service, tmp_path): | |
| """Test appropriate error handling for invalid audio format.""" | |
| # Create non-audio file | |
| invalid_file = tmp_path / "invalid.m4a" | |
| invalid_file.write_text("not audio data") | |
| output_dir = tmp_path / "output" | |
| output_dir.mkdir() | |
| with pytest.raises(Exception, match="invalid|cannot read|failed"): | |
| service.separate_and_export(input_file=str(invalid_file), output_dir=str(output_dir)) | |
| class TestSpeakerSeparationPerformance: | |
| """Performance tests for speaker separation.""" | |
| def test_large_file_processing(self): | |
| """Test processing of large audio files (>1 hour). | |
| Success Criteria (SC-008): Handle 2-hour files without errors | |
| """ | |
| # This would test with a 2-hour file | |
| # Marked as @pytest.mark.slow to skip in regular test runs | |
| pytest.skip("Requires 2-hour test audio file") | |
| def test_memory_usage(self): | |
| """Test that memory usage stays within bounds.""" | |
| pytest.skip("Requires memory profiling setup") | |