"""Tests for voice cache filename parsing and resolution.""" from app.services.voice_cache import ( active_model_tag, cache_is_stale, known_model_tags, list_voice_stems, parse_safetensors_name, resolve_voice_path, ) def test_active_model_tag_returns_raw_when_not_aliased(): assert active_model_tag('german_24l') == 'german_24l' def test_active_model_tag_canonicalizes_english_alias(): assert active_model_tag('english') == 'english_2026-04' def test_active_model_tag_canonicalizes_english_2026_01(): assert active_model_tag('english_2026-01') == 'english_2026-04' def test_active_model_tag_handles_windows_path(): """A custom --model-path value (full Windows file path) must produce a filename-safe tag — not a literal path with `:` and `\\` that breaks safetensors serialization on Windows (issue #13).""" tag = active_model_tag(r'C:\PocketTTS-Server\model\languages\english\english.yaml') # Tag must contain no characters disallowed in Windows filenames. assert not any(c in tag for c in r'\/:*?"<>|') # Stem-derived: english.yaml -> english is the natural choice. assert tag == 'english' def test_active_model_tag_handles_posix_path(): """A POSIX absolute path to a custom model yaml should also produce a filename-safe tag derived from the file stem.""" tag = active_model_tag('/opt/pocket-tts/models/german_24l.yaml') assert '/' not in tag assert tag == 'german_24l' def test_active_model_tag_idempotent_for_plain_language(): """Plain language identifiers must be unaffected by sanitization so the existing tag/cache lookup logic continues to work for built-in models.""" assert active_model_tag('german_24l') == 'german_24l' assert active_model_tag('french_24l') == 'french_24l' def test_active_model_tag_strips_dots_from_derived_tag(): """A model path like 'english.v2.yaml' must NOT yield a tag with internal dots — `parse_safetensors_name` uses `.` to split ..safetensors, so a dotted tag would be mis-parsed and break voice listing.""" tag = active_model_tag('/opt/models/english.v2.yaml') assert '.' not in tag def test_active_model_tag_handles_bundled_hash_filename(): """The bundled model is named like 'b6369a24.yaml' — the derived tag should be the hex stem, no dots.""" tag = active_model_tag('/app/model/b6369a24.yaml') assert tag == 'b6369a24' def test_known_model_tags_includes_all_supported_plus_alias_targets(): tags = known_model_tags() assert 'english_2026-04' in tags assert 'german_24l' in tags assert 'french_24l' in tags # Alias keys are also included so users can name files with them. assert 'english' in tags def test_parse_safetensors_name_recognizes_tagged_file(): tags = known_model_tags() stem, tag = parse_safetensors_name('Emma Watson.english_2026-04.safetensors', tags) assert stem == 'Emma Watson' assert tag == 'english_2026-04' def test_parse_safetensors_name_legacy_unlabeled(): tags = known_model_tags() stem, tag = parse_safetensors_name('legacy.safetensors', tags) assert stem == 'legacy' assert tag is None def test_parse_safetensors_name_dot_in_stem_but_unknown_tag(): """A filename like 'my.voice.safetensors' has 'voice' as the final segment — but 'voice' isn't a known tag, so we treat the whole thing as the stem.""" tags = known_model_tags() stem, tag = parse_safetensors_name('my.voice.safetensors', tags) assert stem == 'my.voice' assert tag is None def test_resolve_voice_path_prefers_tagged_cache(tmp_voices, tmp_cache): """Preference: cache_dir tagged > voices_dir tagged > raw audio > legacy.""" (tmp_voices / 'emma.wav').write_bytes(b'fake-audio') (tmp_cache / 'emma.english_2026-04.safetensors').write_bytes(b'fake-st') result = resolve_voice_path( 'emma', active_model='english_2026-04', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_cache / 'emma.english_2026-04.safetensors' def test_resolve_voice_path_falls_back_to_raw_audio(tmp_voices, tmp_cache): (tmp_voices / 'emma.wav').write_bytes(b'fake-audio') result = resolve_voice_path( 'emma', active_model='english_2026-04', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_voices / 'emma.wav' def test_resolve_voice_path_legacy_unlabeled(tmp_voices, tmp_cache): (tmp_voices / 'emma.safetensors').write_bytes(b'fake-st') result = resolve_voice_path( 'emma', active_model='english_2026-04', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_voices / 'emma.safetensors' def test_resolve_voice_path_passthrough_for_builtin_name(tmp_voices, tmp_cache): """Built-in names (no matching file anywhere) pass through untouched — pocket-tts handles them via HuggingFace.""" result = resolve_voice_path( 'alba', active_model='english_2026-04', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == 'alba' def test_resolve_voice_path_respects_alias(tmp_voices, tmp_cache): """Asking for 'english' should find a cache tagged 'english_2026-04'.""" (tmp_cache / 'emma.english_2026-04.safetensors').write_bytes(b'fake-st') result = resolve_voice_path( 'emma', active_model='english', # alias voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_cache / 'emma.english_2026-04.safetensors' def test_resolve_voice_path_finds_alias_tagged_file(tmp_voices, tmp_cache): """Files tagged with the alias itself (e.g. emma.english.safetensors) should resolve when canonical-tagged file is absent — supports caches written by older versions or by external tools using the alias.""" (tmp_cache / 'emma.english.safetensors').write_bytes(b'fake-st') result = resolve_voice_path( 'emma', active_model='english', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_cache / 'emma.english.safetensors' def test_resolve_voice_path_canonical_preferred_over_alias(tmp_voices, tmp_cache): """When both canonical and alias-tagged files exist, prefer canonical.""" (tmp_cache / 'emma.english.safetensors').write_bytes(b'fake-st') (tmp_cache / 'emma.english_2026-04.safetensors').write_bytes(b'fake-st') result = resolve_voice_path( 'emma', active_model='english', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_cache / 'emma.english_2026-04.safetensors' def test_resolve_voice_path_voices_dir_tagged_cache(tmp_voices, tmp_cache): """A tagged cache dropped directly into voices_dir (e.g. by WingmanAI) is honored.""" (tmp_voices / 'emma.german_24l.safetensors').write_bytes(b'fake-st') result = resolve_voice_path( 'emma', active_model='german_24l', voices_dir=tmp_voices, cache_dir=tmp_cache, ) assert result == tmp_voices / 'emma.german_24l.safetensors' def test_list_voice_stems_collapses_duplicates(tmp_voices, tmp_cache): (tmp_voices / 'emma.wav').write_bytes(b'a') (tmp_voices / 'emma.safetensors').write_bytes(b'a') (tmp_cache / 'emma.english_2026-04.safetensors').write_bytes(b'a') (tmp_cache / 'emma.german_24l.safetensors').write_bytes(b'a') (tmp_voices / 'morgan.mp3').write_bytes(b'a') stems = list_voice_stems(voices_dir=tmp_voices, cache_dir=tmp_cache) assert stems == ['emma', 'morgan'] def test_list_voice_stems_empty(tmp_voices, tmp_cache): assert list_voice_stems(voices_dir=tmp_voices, cache_dir=tmp_cache) == [] def test_list_voice_stems_ignores_unknown_extensions(tmp_voices, tmp_cache): (tmp_voices / 'notes.txt').write_bytes(b'a') (tmp_voices / 'emma.wav').write_bytes(b'a') stems = list_voice_stems(voices_dir=tmp_voices, cache_dir=tmp_cache) assert stems == ['emma'] def test_cache_is_stale_true_when_source_newer(tmp_voices, tmp_cache): import os import time cache = tmp_cache / 'emma.english_2026-04.safetensors' cache.write_bytes(b'old') old_time = time.time() - 100 os.utime(cache, (old_time, old_time)) source = tmp_voices / 'emma.wav' source.write_bytes(b'new') # mtime = now assert cache_is_stale(cache_path=cache, source_path=source) is True def test_cache_is_stale_false_when_cache_newer(tmp_voices, tmp_cache): source = tmp_voices / 'emma.wav' source.write_bytes(b'old') import os import time old_time = time.time() - 100 os.utime(source, (old_time, old_time)) cache = tmp_cache / 'emma.english_2026-04.safetensors' cache.write_bytes(b'new') assert cache_is_stale(cache_path=cache, source_path=source) is False def test_cache_is_stale_false_when_source_missing(tmp_voices, tmp_cache): cache = tmp_cache / 'emma.english_2026-04.safetensors' cache.write_bytes(b'cached') source = tmp_voices / 'emma.wav' # does not exist assert cache_is_stale(cache_path=cache, source_path=source) is False