Spaces:
Sleeping
Sleeping
Fix: NIGHTWAVE TTS spelling, caller echo, ON AIR sign, San Francisco default
Browse files- .pytest_cache/v/cache/nodeids +3 -0
- proxy.py +26 -7
- radio.html +25 -14
- tests/test_proxy.py +65 -0
.pytest_cache/v/cache/nodeids
CHANGED
|
@@ -25,6 +25,8 @@
|
|
| 25 |
"tests/test_content.py::test_song_recs_well_formed",
|
| 26 |
"tests/test_content.py::test_songs_have_required_fields_and_valid_enums",
|
| 27 |
"tests/test_proxy.py::test_broadcast_turn_mock_shape_and_sanitized_text",
|
|
|
|
|
|
|
| 28 |
"tests/test_proxy.py::test_call_turn_mock_returns_caller_text_and_meter_delta",
|
| 29 |
"tests/test_proxy.py::test_clean_dj_text_rejects_degenerate_text_as_none",
|
| 30 |
"tests/test_proxy.py::test_clean_dj_text_strips_label_leakage",
|
|
@@ -37,6 +39,7 @@
|
|
| 37 |
"tests/test_proxy.py::test_song_intro_template_includes_handle",
|
| 38 |
"tests/test_proxy.py::test_song_intro_template_omits_handle_when_absent",
|
| 39 |
"tests/test_proxy.py::test_speakable_normalizes_nightwave_for_tts",
|
|
|
|
| 40 |
"tests/test_proxy.py::test_templated_caller_intro_nonempty",
|
| 41 |
"tests/test_proxy.py::test_templated_rejoin_names_the_dj",
|
| 42 |
"tests/test_proxy.py::test_wmo_phrase_mapping",
|
|
|
|
| 25 |
"tests/test_content.py::test_song_recs_well_formed",
|
| 26 |
"tests/test_content.py::test_songs_have_required_fields_and_valid_enums",
|
| 27 |
"tests/test_proxy.py::test_broadcast_turn_mock_shape_and_sanitized_text",
|
| 28 |
+
"tests/test_proxy.py::test_call_speak_normalizes_nightwave_before_tts",
|
| 29 |
+
"tests/test_proxy.py::test_call_turn_answers_without_echoing_the_question",
|
| 30 |
"tests/test_proxy.py::test_call_turn_mock_returns_caller_text_and_meter_delta",
|
| 31 |
"tests/test_proxy.py::test_clean_dj_text_rejects_degenerate_text_as_none",
|
| 32 |
"tests/test_proxy.py::test_clean_dj_text_strips_label_leakage",
|
|
|
|
| 39 |
"tests/test_proxy.py::test_song_intro_template_includes_handle",
|
| 40 |
"tests/test_proxy.py::test_song_intro_template_omits_handle_when_absent",
|
| 41 |
"tests/test_proxy.py::test_speakable_normalizes_nightwave_for_tts",
|
| 42 |
+
"tests/test_proxy.py::test_speakable_titlecases_nightwave",
|
| 43 |
"tests/test_proxy.py::test_templated_caller_intro_nonempty",
|
| 44 |
"tests/test_proxy.py::test_templated_rejoin_names_the_dj",
|
| 45 |
"tests/test_proxy.py::test_wmo_phrase_mapping",
|
proxy.py
CHANGED
|
@@ -242,6 +242,15 @@ def call_asr(audio_b64: str) -> Dict[str, Any]:
|
|
| 242 |
return resp.json()
|
| 243 |
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
|
| 246 |
"""POST /speak -> {"audio_b64", "words", "wtimes", "wdurations"}."""
|
| 247 |
if is_mock():
|
|
@@ -249,7 +258,7 @@ def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
|
|
| 249 |
resp = _get_client().post(
|
| 250 |
_modal_url() + "/speak",
|
| 251 |
headers=_modal_headers(),
|
| 252 |
-
json={"text": text, "voice": voice},
|
| 253 |
)
|
| 254 |
resp.raise_for_status()
|
| 255 |
return resp.json()
|
|
@@ -354,13 +363,23 @@ def call_turn(stage: str, meter: int, audio_b64: str) -> Dict[str, Any]:
|
|
| 354 |
"""
|
| 355 |
asr = call_asr(audio_b64)
|
| 356 |
caller_text = asr.get("text", "")
|
| 357 |
-
system = arc.build_system_prompt(stage, "caller", caller_text=caller_text)
|
| 358 |
-
brain = call_brain(
|
| 359 |
-
system, [{"role": "user", "content": caller_text or "(crackle)"}]
|
| 360 |
-
)
|
| 361 |
-
text = brain.get("text", "")
|
| 362 |
-
speak = call_speak(text)
|
| 363 |
meter_delta = arc.detect_triggers(caller_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
return {
|
| 365 |
"caller_text": caller_text,
|
| 366 |
"text": text,
|
|
|
|
| 242 |
return resp.json()
|
| 243 |
|
| 244 |
|
| 245 |
+
# All-caps "NIGHTWAVE" makes the TTS spell it out letter-by-letter; normalize to
|
| 246 |
+
# title case for SPEECH only (captions/UI keep the branded all-caps original).
|
| 247 |
+
_SPEAK_FIX = re.compile(r"\bNIGHT\s*WAVE\b", re.IGNORECASE)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def _speakable(text: Optional[str]) -> str:
|
| 251 |
+
return _SPEAK_FIX.sub("Nightwave", text or "")
|
| 252 |
+
|
| 253 |
+
|
| 254 |
def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
|
| 255 |
"""POST /speak -> {"audio_b64", "words", "wtimes", "wdurations"}."""
|
| 256 |
if is_mock():
|
|
|
|
| 258 |
resp = _get_client().post(
|
| 259 |
_modal_url() + "/speak",
|
| 260 |
headers=_modal_headers(),
|
| 261 |
+
json={"text": _speakable(text), "voice": voice},
|
| 262 |
)
|
| 263 |
resp.raise_for_status()
|
| 264 |
return resp.json()
|
|
|
|
| 363 |
"""
|
| 364 |
asr = call_asr(audio_b64)
|
| 365 |
caller_text = asr.get("text", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
meter_delta = arc.detect_triggers(caller_text)
|
| 367 |
+
|
| 368 |
+
# Straight station: the host always answers the caller in his own voice. The
|
| 369 |
+
# caller's words are delivered as an ANSWER-DIRECTIVE in the user turn (NOT
|
| 370 |
+
# embedded verbatim) because a 1B model under the JSON grammar tends to echo a
|
| 371 |
+
# question that appears verbatim in its context.
|
| 372 |
+
system = arc.build_host_prompt("caller", {"caller_text": caller_text})
|
| 373 |
+
if caller_text and caller_text.strip():
|
| 374 |
+
user_msg = (
|
| 375 |
+
'A caller just asked you on the air: "' + caller_text.strip()
|
| 376 |
+
+ '". Answer them directly, in your own words. Do NOT repeat the question.'
|
| 377 |
+
)
|
| 378 |
+
else:
|
| 379 |
+
user_msg = "(the caller's line is too crackly to make out)"
|
| 380 |
+
brain = call_brain(system, [{"role": "user", "content": user_msg}])
|
| 381 |
+
text = _clean_dj_text(brain.get("text", "")) or arc.sample_line(stage, meter)
|
| 382 |
+
speak = call_speak(text)
|
| 383 |
return {
|
| 384 |
"caller_text": caller_text,
|
| 385 |
"text": text,
|
radio.html
CHANGED
|
@@ -1058,18 +1058,25 @@ async function fetchSongs(){
|
|
| 1058 |
}
|
| 1059 |
// Ask the browser for the listener's location (permission prompt), resolve real
|
| 1060 |
// weather server-side, and stash it. Denial/error: stays null -> fictional weather.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1061 |
function requestLocale(){
|
| 1062 |
-
if (
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
if (r.ok){ const j = await r.json(); if (j && j.resolved) State.locale = j; }
|
| 1070 |
-
}catch(_){}
|
| 1071 |
-
}, ()=>{ /* denied/unavailable: keep the dreamy fictional weather */ },
|
| 1072 |
-
{ timeout:8000, maximumAge:600000 });
|
| 1073 |
}
|
| 1074 |
function pickSong(){
|
| 1075 |
const bank = (State.songs && State.songs.length) ? State.songs : CLIENT_SONGS;
|
|
@@ -1636,8 +1643,11 @@ function blobToB64(blob){
|
|
| 1636 |
K. POWER, WIRING, SEED, rAF LOOP
|
| 1637 |
=================================================================== */
|
| 1638 |
function setBroadcasting(on){
|
| 1639 |
-
|
| 1640 |
-
|
|
|
|
|
|
|
|
|
|
| 1641 |
const spin = on && !State.reducedMotion;
|
| 1642 |
els.reelL.classList.toggle("spin", spin);
|
| 1643 |
els.reelR.classList.toggle("spin", spin);
|
|
@@ -1653,7 +1663,8 @@ async function powerOn(){
|
|
| 1653 |
els.power.setAttribute("aria-pressed","true");
|
| 1654 |
els.pwrTxt.innerHTML = "ON AIR";
|
| 1655 |
els.handset.disabled = false;
|
| 1656 |
-
els.
|
|
|
|
| 1657 |
updateHissForTuning();
|
| 1658 |
applyMeter(); // gauge -> SIGNAL
|
| 1659 |
rampGain(Sound.crackleGain, 0.05, 0.6); // bring up the vinyl/room bed
|
|
|
|
| 1058 |
}
|
| 1059 |
// Ask the browser for the listener's location (permission prompt), resolve real
|
| 1060 |
// weather server-side, and stash it. Denial/error: stays null -> fictional weather.
|
| 1061 |
+
// San Francisco — the default location when the listener gives us none.
|
| 1062 |
+
const DEFAULT_LAT = 37.7749, DEFAULT_LON = -122.4194;
|
| 1063 |
+
async function resolveLocaleAt(lat, lon){
|
| 1064 |
+
try{
|
| 1065 |
+
const r = await fetch("/api/locale", {
|
| 1066 |
+
method:"POST", headers:{"Content-Type":"application/json"},
|
| 1067 |
+
body: JSON.stringify({ lat, lon })
|
| 1068 |
+
});
|
| 1069 |
+
if (r.ok){ const j = await r.json(); if (j && j.resolved) State.locale = j; }
|
| 1070 |
+
}catch(_){}
|
| 1071 |
+
}
|
| 1072 |
function requestLocale(){
|
| 1073 |
+
if (State.locale) return;
|
| 1074 |
+
// No geolocation API at all -> default to San Francisco.
|
| 1075 |
+
if (!navigator.geolocation){ resolveLocaleAt(DEFAULT_LAT, DEFAULT_LON); return; }
|
| 1076 |
+
navigator.geolocation.getCurrentPosition(
|
| 1077 |
+
(pos)=>{ resolveLocaleAt(pos.coords.latitude, pos.coords.longitude); },
|
| 1078 |
+
()=>{ resolveLocaleAt(DEFAULT_LAT, DEFAULT_LON); }, // denied/unavailable -> San Francisco
|
| 1079 |
+
{ timeout:8000, maximumAge:600000 });
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1080 |
}
|
| 1081 |
function pickSong(){
|
| 1082 |
const bank = (State.songs && State.songs.length) ? State.songs : CLIENT_SONGS;
|
|
|
|
| 1643 |
K. POWER, WIRING, SEED, rAF LOOP
|
| 1644 |
=================================================================== */
|
| 1645 |
function setBroadcasting(on){
|
| 1646 |
+
// The station is "ON AIR" (tube + label lit) the whole time it's powered -- a
|
| 1647 |
+
// continuous broadcast -- not only while a voice clip plays. The tape reels
|
| 1648 |
+
// still spin while a voice clip is actually sounding, as live feedback.
|
| 1649 |
+
els.body.classList.toggle("broadcasting", !!State.powered);
|
| 1650 |
+
els.onairLabel.textContent = State.powered ? "ON AIR" : "STANDBY";
|
| 1651 |
const spin = on && !State.reducedMotion;
|
| 1652 |
els.reelL.classList.toggle("spin", spin);
|
| 1653 |
els.reelR.classList.toggle("spin", spin);
|
|
|
|
| 1663 |
els.power.setAttribute("aria-pressed","true");
|
| 1664 |
els.pwrTxt.innerHTML = "ON AIR";
|
| 1665 |
els.handset.disabled = false;
|
| 1666 |
+
els.body.classList.add("broadcasting"); // light the ON AIR sign the moment we're powered
|
| 1667 |
+
els.onairLabel.textContent = "ON AIR";
|
| 1668 |
updateHissForTuning();
|
| 1669 |
applyMeter(); // gauge -> SIGNAL
|
| 1670 |
rampGain(Sound.crackleGain, 0.05, 0.6); // bring up the vinyl/room bed
|
tests/test_proxy.py
CHANGED
|
@@ -100,3 +100,68 @@ def test_segment_fallback_is_never_failing_and_well_shaped():
|
|
| 100 |
assert fb["audio_b64"] # a real (silent) WAV bed
|
| 101 |
for key in ("mood", "arc_cue", "words", "wtimes", "wdurations"):
|
| 102 |
assert key in fb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
assert fb["audio_b64"] # a real (silent) WAV bed
|
| 101 |
for key in ("mood", "arc_cue", "words", "wtimes", "wdurations"):
|
| 102 |
assert key in fb
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# ---------------------------------------------------------------------------
|
| 106 |
+
# Regression (bug 2): the all-caps station name must be normalized before TTS so
|
| 107 |
+
# Kokoro speaks "Nightwave" instead of spelling N-I-G-H-T-W-A-V-E. _speakable was
|
| 108 |
+
# dropped during a proxy rebuild and call_speak sent raw text.
|
| 109 |
+
# ---------------------------------------------------------------------------
|
| 110 |
+
def test_speakable_titlecases_nightwave():
|
| 111 |
+
assert proxy._speakable("You're on NIGHTWAVE 98.6") == "You're on Nightwave 98.6"
|
| 112 |
+
assert proxy._speakable("night wave") == "Nightwave"
|
| 113 |
+
assert proxy._speakable(None) == ""
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def test_call_speak_normalizes_nightwave_before_tts(monkeypatch):
|
| 117 |
+
sent = {}
|
| 118 |
+
|
| 119 |
+
class _Resp:
|
| 120 |
+
def raise_for_status(self):
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
def json(self):
|
| 124 |
+
return {"audio_b64": "", "words": [], "wtimes": [], "wdurations": []}
|
| 125 |
+
|
| 126 |
+
class _Client:
|
| 127 |
+
def post(self, url, headers=None, json=None):
|
| 128 |
+
sent.update(json or {})
|
| 129 |
+
return _Resp()
|
| 130 |
+
|
| 131 |
+
monkeypatch.setattr(proxy, "is_mock", lambda: False)
|
| 132 |
+
monkeypatch.setattr(proxy, "_modal_url", lambda: "http://modal.test")
|
| 133 |
+
monkeypatch.setattr(proxy, "_modal_headers", lambda: {})
|
| 134 |
+
monkeypatch.setattr(proxy, "_get_client", lambda: _Client())
|
| 135 |
+
proxy.call_speak("This is NIGHTWAVE, the last station on the dial.")
|
| 136 |
+
assert "NIGHTWAVE" not in sent["text"]
|
| 137 |
+
assert "Nightwave" in sent["text"]
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# ---------------------------------------------------------------------------
|
| 141 |
+
# Regression (bug 4): the caller turn must ANSWER the caller, not echo. The
|
| 142 |
+
# straight station uses build_host_prompt("caller") + an answer-directive user
|
| 143 |
+
# turn so a 1B model doesn't parrot a question that appears verbatim in context.
|
| 144 |
+
# call_turn had reverted to the arc builder + raw caller_text as the user message.
|
| 145 |
+
# ---------------------------------------------------------------------------
|
| 146 |
+
def test_call_turn_answers_without_echoing_the_question(monkeypatch):
|
| 147 |
+
captured = {}
|
| 148 |
+
monkeypatch.setattr(proxy, "call_asr", lambda a: {"text": "are you real?"})
|
| 149 |
+
|
| 150 |
+
def _brain(system, messages):
|
| 151 |
+
captured["system"] = system
|
| 152 |
+
captured["messages"] = messages
|
| 153 |
+
return {"text": "Real as the rain on the glass, friend.", "mood": "warm", "arc_cue": "none"}
|
| 154 |
+
|
| 155 |
+
monkeypatch.setattr(proxy, "call_brain", _brain)
|
| 156 |
+
monkeypatch.setattr(proxy, "call_speak",
|
| 157 |
+
lambda text, voice=arc.VOICE: {"audio_b64": "", "words": [],
|
| 158 |
+
"wtimes": [], "wdurations": []})
|
| 159 |
+
out = proxy.call_turn("oblivious", 0, "<b64>")
|
| 160 |
+
# straight-station host prompt (names the DJ), NOT the arc builder
|
| 161 |
+
assert arc.DJ_NAME in captured["system"]
|
| 162 |
+
# the user turn carries an answer-directive, not the bare echoed question
|
| 163 |
+
user = captured["messages"][-1]["content"]
|
| 164 |
+
assert "do not repeat" in user.lower()
|
| 165 |
+
assert user.strip() != "are you real?"
|
| 166 |
+
assert out["caller_text"] == "are you real?"
|
| 167 |
+
assert out["meter_delta"] >= 14 # an identity/reality question pushes the meter
|