Spaces:

build-small-hackathon
/

nightwave

Sleeping

App Files Files Community

ratandeep commited on 19 days ago

Commit

0cb7e78

verified ·

1 Parent(s): a13f875

Fix: NIGHTWAVE TTS spelling, caller echo, ON AIR sign, San Francisco default

Browse files

Files changed (4) hide show

.pytest_cache/v/cache/nodeids +3 -0
proxy.py +26 -7
radio.html +25 -14
tests/test_proxy.py +65 -0

.pytest_cache/v/cache/nodeids CHANGED Viewed

@@ -25,6 +25,8 @@
   "tests/test_content.py::test_song_recs_well_formed",
   "tests/test_content.py::test_songs_have_required_fields_and_valid_enums",
   "tests/test_proxy.py::test_broadcast_turn_mock_shape_and_sanitized_text",
   "tests/test_proxy.py::test_call_turn_mock_returns_caller_text_and_meter_delta",
   "tests/test_proxy.py::test_clean_dj_text_rejects_degenerate_text_as_none",
   "tests/test_proxy.py::test_clean_dj_text_strips_label_leakage",
@@ -37,6 +39,7 @@
   "tests/test_proxy.py::test_song_intro_template_includes_handle",
   "tests/test_proxy.py::test_song_intro_template_omits_handle_when_absent",
   "tests/test_proxy.py::test_speakable_normalizes_nightwave_for_tts",
   "tests/test_proxy.py::test_templated_caller_intro_nonempty",
   "tests/test_proxy.py::test_templated_rejoin_names_the_dj",
   "tests/test_proxy.py::test_wmo_phrase_mapping",

   "tests/test_content.py::test_song_recs_well_formed",
   "tests/test_content.py::test_songs_have_required_fields_and_valid_enums",
   "tests/test_proxy.py::test_broadcast_turn_mock_shape_and_sanitized_text",
+  "tests/test_proxy.py::test_call_speak_normalizes_nightwave_before_tts",
+  "tests/test_proxy.py::test_call_turn_answers_without_echoing_the_question",
   "tests/test_proxy.py::test_call_turn_mock_returns_caller_text_and_meter_delta",
   "tests/test_proxy.py::test_clean_dj_text_rejects_degenerate_text_as_none",
   "tests/test_proxy.py::test_clean_dj_text_strips_label_leakage",
   "tests/test_proxy.py::test_song_intro_template_includes_handle",
   "tests/test_proxy.py::test_song_intro_template_omits_handle_when_absent",
   "tests/test_proxy.py::test_speakable_normalizes_nightwave_for_tts",
+  "tests/test_proxy.py::test_speakable_titlecases_nightwave",
   "tests/test_proxy.py::test_templated_caller_intro_nonempty",
   "tests/test_proxy.py::test_templated_rejoin_names_the_dj",
   "tests/test_proxy.py::test_wmo_phrase_mapping",

proxy.py CHANGED Viewed

@@ -242,6 +242,15 @@ def call_asr(audio_b64: str) -> Dict[str, Any]:
     return resp.json()
 def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
     """POST /speak -> {"audio_b64", "words", "wtimes", "wdurations"}."""
     if is_mock():
@@ -249,7 +258,7 @@ def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
     resp = _get_client().post(
         _modal_url() + "/speak",
         headers=_modal_headers(),
-        json={"text": text, "voice": voice},
     )
     resp.raise_for_status()
     return resp.json()
@@ -354,13 +363,23 @@ def call_turn(stage: str, meter: int, audio_b64: str) -> Dict[str, Any]:
     """
     asr = call_asr(audio_b64)
     caller_text = asr.get("text", "")
-    system = arc.build_system_prompt(stage, "caller", caller_text=caller_text)
-    brain = call_brain(
-        system, [{"role": "user", "content": caller_text or "(crackle)"}]
-    )
-    text = brain.get("text", "")
-    speak = call_speak(text)
     meter_delta = arc.detect_triggers(caller_text)
     return {
         "caller_text": caller_text,
         "text": text,

     return resp.json()
+# All-caps "NIGHTWAVE" makes the TTS spell it out letter-by-letter; normalize to
+# title case for SPEECH only (captions/UI keep the branded all-caps original).
+_SPEAK_FIX = re.compile(r"\bNIGHT\s*WAVE\b", re.IGNORECASE)
+def _speakable(text: Optional[str]) -> str:
+    return _SPEAK_FIX.sub("Nightwave", text or "")
 def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
     """POST /speak -> {"audio_b64", "words", "wtimes", "wdurations"}."""
     if is_mock():
     resp = _get_client().post(
         _modal_url() + "/speak",
         headers=_modal_headers(),
+        json={"text": _speakable(text), "voice": voice},
     )
     resp.raise_for_status()
     return resp.json()
     """
     asr = call_asr(audio_b64)
     caller_text = asr.get("text", "")
     meter_delta = arc.detect_triggers(caller_text)
+    # Straight station: the host always answers the caller in his own voice. The
+    # caller's words are delivered as an ANSWER-DIRECTIVE in the user turn (NOT
+    # embedded verbatim) because a 1B model under the JSON grammar tends to echo a
+    # question that appears verbatim in its context.
+    system = arc.build_host_prompt("caller", {"caller_text": caller_text})
+    if caller_text and caller_text.strip():
+        user_msg = (
+            'A caller just asked you on the air: "' + caller_text.strip()
+            + '". Answer them directly, in your own words. Do NOT repeat the question.'
+        )
+    else:
+        user_msg = "(the caller's line is too crackly to make out)"
+    brain = call_brain(system, [{"role": "user", "content": user_msg}])
+    text = _clean_dj_text(brain.get("text", "")) or arc.sample_line(stage, meter)
+    speak = call_speak(text)
     return {
         "caller_text": caller_text,
         "text": text,

radio.html CHANGED Viewed

@@ -1058,18 +1058,25 @@ async function fetchSongs(){
 }
 // Ask the browser for the listener's location (permission prompt), resolve real
 // weather server-side, and stash it. Denial/error: stays null -> fictional weather.
 function requestLocale(){
-  if (!navigator.geolocation || State.locale) return;
-  navigator.geolocation.getCurrentPosition(async (pos)=>{
-    try{
-      const r = await fetch("/api/locale", {
-        method:"POST", headers:{"Content-Type":"application/json"},
-        body: JSON.stringify({ lat: pos.coords.latitude, lon: pos.coords.longitude })
-      });
-      if (r.ok){ const j = await r.json(); if (j && j.resolved) State.locale = j; }
-    }catch(_){}
-  }, ()=>{ /* denied/unavailable: keep the dreamy fictional weather */ },
-  { timeout:8000, maximumAge:600000 });
 }
 function pickSong(){
   const bank = (State.songs && State.songs.length) ? State.songs : CLIENT_SONGS;
@@ -1636,8 +1643,11 @@ function blobToB64(blob){
    K. POWER, WIRING, SEED, rAF LOOP
    =================================================================== */
 function setBroadcasting(on){
-  els.body.classList.toggle("broadcasting", !!on);
-  els.onairLabel.textContent = on ? "ON AIR" : (State.powered ? "RECEIVING" : "STANDBY");
   const spin = on && !State.reducedMotion;
   els.reelL.classList.toggle("spin", spin);
   els.reelR.classList.toggle("spin", spin);
@@ -1653,7 +1663,8 @@ async function powerOn(){
   els.power.setAttribute("aria-pressed","true");
   els.pwrTxt.innerHTML = "ON AIR";
   els.handset.disabled = false;
-  els.onairLabel.textContent = "RECEIVING";
   updateHissForTuning();
   applyMeter();                             // gauge -> SIGNAL
   rampGain(Sound.crackleGain, 0.05, 0.6);   // bring up the vinyl/room bed

 }
 // Ask the browser for the listener's location (permission prompt), resolve real
 // weather server-side, and stash it. Denial/error: stays null -> fictional weather.
+// San Francisco — the default location when the listener gives us none.
+const DEFAULT_LAT = 37.7749, DEFAULT_LON = -122.4194;
+async function resolveLocaleAt(lat, lon){
+  try{
+    const r = await fetch("/api/locale", {
+      method:"POST", headers:{"Content-Type":"application/json"},
+      body: JSON.stringify({ lat, lon })
+    });
+    if (r.ok){ const j = await r.json(); if (j && j.resolved) State.locale = j; }
+  }catch(_){}
+}
 function requestLocale(){
+  if (State.locale) return;
+  // No geolocation API at all -> default to San Francisco.
+  if (!navigator.geolocation){ resolveLocaleAt(DEFAULT_LAT, DEFAULT_LON); return; }
+  navigator.geolocation.getCurrentPosition(
+    (pos)=>{ resolveLocaleAt(pos.coords.latitude, pos.coords.longitude); },
+    ()=>{ resolveLocaleAt(DEFAULT_LAT, DEFAULT_LON); },   // denied/unavailable -> San Francisco
+    { timeout:8000, maximumAge:600000 });
 }
 function pickSong(){
   const bank = (State.songs && State.songs.length) ? State.songs : CLIENT_SONGS;
    K. POWER, WIRING, SEED, rAF LOOP
    =================================================================== */
 function setBroadcasting(on){
+  // The station is "ON AIR" (tube + label lit) the whole time it's powered -- a
+  // continuous broadcast -- not only while a voice clip plays. The tape reels
+  // still spin while a voice clip is actually sounding, as live feedback.
+  els.body.classList.toggle("broadcasting", !!State.powered);
+  els.onairLabel.textContent = State.powered ? "ON AIR" : "STANDBY";
   const spin = on && !State.reducedMotion;
   els.reelL.classList.toggle("spin", spin);
   els.reelR.classList.toggle("spin", spin);
   els.power.setAttribute("aria-pressed","true");
   els.pwrTxt.innerHTML = "ON AIR";
   els.handset.disabled = false;
+  els.body.classList.add("broadcasting");   // light the ON AIR sign the moment we're powered
+  els.onairLabel.textContent = "ON AIR";
   updateHissForTuning();
   applyMeter();                             // gauge -> SIGNAL
   rampGain(Sound.crackleGain, 0.05, 0.6);   // bring up the vinyl/room bed

tests/test_proxy.py CHANGED Viewed

@@ -100,3 +100,68 @@ def test_segment_fallback_is_never_failing_and_well_shaped():
     assert fb["audio_b64"]                        # a real (silent) WAV bed
     for key in ("mood", "arc_cue", "words", "wtimes", "wdurations"):
         assert key in fb

     assert fb["audio_b64"]                        # a real (silent) WAV bed
     for key in ("mood", "arc_cue", "words", "wtimes", "wdurations"):
         assert key in fb
+# ---------------------------------------------------------------------------
+# Regression (bug 2): the all-caps station name must be normalized before TTS so
+# Kokoro speaks "Nightwave" instead of spelling N-I-G-H-T-W-A-V-E. _speakable was
+# dropped during a proxy rebuild and call_speak sent raw text.
+# ---------------------------------------------------------------------------
+def test_speakable_titlecases_nightwave():
+    assert proxy._speakable("You're on NIGHTWAVE 98.6") == "You're on Nightwave 98.6"
+    assert proxy._speakable("night wave") == "Nightwave"
+    assert proxy._speakable(None) == ""
+def test_call_speak_normalizes_nightwave_before_tts(monkeypatch):
+    sent = {}
+    class _Resp:
+        def raise_for_status(self):
+            pass
+        def json(self):
+            return {"audio_b64": "", "words": [], "wtimes": [], "wdurations": []}
+    class _Client:
+        def post(self, url, headers=None, json=None):
+            sent.update(json or {})
+            return _Resp()
+    monkeypatch.setattr(proxy, "is_mock", lambda: False)
+    monkeypatch.setattr(proxy, "_modal_url", lambda: "http://modal.test")
+    monkeypatch.setattr(proxy, "_modal_headers", lambda: {})
+    monkeypatch.setattr(proxy, "_get_client", lambda: _Client())
+    proxy.call_speak("This is NIGHTWAVE, the last station on the dial.")
+    assert "NIGHTWAVE" not in sent["text"]
+    assert "Nightwave" in sent["text"]
+# ---------------------------------------------------------------------------
+# Regression (bug 4): the caller turn must ANSWER the caller, not echo. The
+# straight station uses build_host_prompt("caller") + an answer-directive user
+# turn so a 1B model doesn't parrot a question that appears verbatim in context.
+# call_turn had reverted to the arc builder + raw caller_text as the user message.
+# ---------------------------------------------------------------------------
+def test_call_turn_answers_without_echoing_the_question(monkeypatch):
+    captured = {}
+    monkeypatch.setattr(proxy, "call_asr", lambda a: {"text": "are you real?"})
+    def _brain(system, messages):
+        captured["system"] = system
+        captured["messages"] = messages
+        return {"text": "Real as the rain on the glass, friend.", "mood": "warm", "arc_cue": "none"}
+    monkeypatch.setattr(proxy, "call_brain", _brain)
+    monkeypatch.setattr(proxy, "call_speak",
+                        lambda text, voice=arc.VOICE: {"audio_b64": "", "words": [],
+                                                       "wtimes": [], "wdurations": []})
+    out = proxy.call_turn("oblivious", 0, "<b64>")
+    # straight-station host prompt (names the DJ), NOT the arc builder
+    assert arc.DJ_NAME in captured["system"]
+    # the user turn carries an answer-directive, not the bare echoed question
+    user = captured["messages"][-1]["content"]
+    assert "do not repeat" in user.lower()
+    assert user.strip() != "are you real?"
+    assert out["caller_text"] == "are you real?"
+    assert out["meter_delta"] >= 14   # an identity/reality question pushes the meter