ratandeep commited on
Commit
0cb7e78
·
verified ·
1 Parent(s): a13f875

Fix: NIGHTWAVE TTS spelling, caller echo, ON AIR sign, San Francisco default

Browse files
Files changed (4) hide show
  1. .pytest_cache/v/cache/nodeids +3 -0
  2. proxy.py +26 -7
  3. radio.html +25 -14
  4. tests/test_proxy.py +65 -0
.pytest_cache/v/cache/nodeids CHANGED
@@ -25,6 +25,8 @@
25
  "tests/test_content.py::test_song_recs_well_formed",
26
  "tests/test_content.py::test_songs_have_required_fields_and_valid_enums",
27
  "tests/test_proxy.py::test_broadcast_turn_mock_shape_and_sanitized_text",
 
 
28
  "tests/test_proxy.py::test_call_turn_mock_returns_caller_text_and_meter_delta",
29
  "tests/test_proxy.py::test_clean_dj_text_rejects_degenerate_text_as_none",
30
  "tests/test_proxy.py::test_clean_dj_text_strips_label_leakage",
@@ -37,6 +39,7 @@
37
  "tests/test_proxy.py::test_song_intro_template_includes_handle",
38
  "tests/test_proxy.py::test_song_intro_template_omits_handle_when_absent",
39
  "tests/test_proxy.py::test_speakable_normalizes_nightwave_for_tts",
 
40
  "tests/test_proxy.py::test_templated_caller_intro_nonempty",
41
  "tests/test_proxy.py::test_templated_rejoin_names_the_dj",
42
  "tests/test_proxy.py::test_wmo_phrase_mapping",
 
25
  "tests/test_content.py::test_song_recs_well_formed",
26
  "tests/test_content.py::test_songs_have_required_fields_and_valid_enums",
27
  "tests/test_proxy.py::test_broadcast_turn_mock_shape_and_sanitized_text",
28
+ "tests/test_proxy.py::test_call_speak_normalizes_nightwave_before_tts",
29
+ "tests/test_proxy.py::test_call_turn_answers_without_echoing_the_question",
30
  "tests/test_proxy.py::test_call_turn_mock_returns_caller_text_and_meter_delta",
31
  "tests/test_proxy.py::test_clean_dj_text_rejects_degenerate_text_as_none",
32
  "tests/test_proxy.py::test_clean_dj_text_strips_label_leakage",
 
39
  "tests/test_proxy.py::test_song_intro_template_includes_handle",
40
  "tests/test_proxy.py::test_song_intro_template_omits_handle_when_absent",
41
  "tests/test_proxy.py::test_speakable_normalizes_nightwave_for_tts",
42
+ "tests/test_proxy.py::test_speakable_titlecases_nightwave",
43
  "tests/test_proxy.py::test_templated_caller_intro_nonempty",
44
  "tests/test_proxy.py::test_templated_rejoin_names_the_dj",
45
  "tests/test_proxy.py::test_wmo_phrase_mapping",
proxy.py CHANGED
@@ -242,6 +242,15 @@ def call_asr(audio_b64: str) -> Dict[str, Any]:
242
  return resp.json()
243
 
244
 
 
 
 
 
 
 
 
 
 
245
  def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
246
  """POST /speak -> {"audio_b64", "words", "wtimes", "wdurations"}."""
247
  if is_mock():
@@ -249,7 +258,7 @@ def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
249
  resp = _get_client().post(
250
  _modal_url() + "/speak",
251
  headers=_modal_headers(),
252
- json={"text": text, "voice": voice},
253
  )
254
  resp.raise_for_status()
255
  return resp.json()
@@ -354,13 +363,23 @@ def call_turn(stage: str, meter: int, audio_b64: str) -> Dict[str, Any]:
354
  """
355
  asr = call_asr(audio_b64)
356
  caller_text = asr.get("text", "")
357
- system = arc.build_system_prompt(stage, "caller", caller_text=caller_text)
358
- brain = call_brain(
359
- system, [{"role": "user", "content": caller_text or "(crackle)"}]
360
- )
361
- text = brain.get("text", "")
362
- speak = call_speak(text)
363
  meter_delta = arc.detect_triggers(caller_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  return {
365
  "caller_text": caller_text,
366
  "text": text,
 
242
  return resp.json()
243
 
244
 
245
+ # All-caps "NIGHTWAVE" makes the TTS spell it out letter-by-letter; normalize to
246
+ # title case for SPEECH only (captions/UI keep the branded all-caps original).
247
+ _SPEAK_FIX = re.compile(r"\bNIGHT\s*WAVE\b", re.IGNORECASE)
248
+
249
+
250
+ def _speakable(text: Optional[str]) -> str:
251
+ return _SPEAK_FIX.sub("Nightwave", text or "")
252
+
253
+
254
  def call_speak(text: str, voice: str = arc.VOICE) -> Dict[str, Any]:
255
  """POST /speak -> {"audio_b64", "words", "wtimes", "wdurations"}."""
256
  if is_mock():
 
258
  resp = _get_client().post(
259
  _modal_url() + "/speak",
260
  headers=_modal_headers(),
261
+ json={"text": _speakable(text), "voice": voice},
262
  )
263
  resp.raise_for_status()
264
  return resp.json()
 
363
  """
364
  asr = call_asr(audio_b64)
365
  caller_text = asr.get("text", "")
 
 
 
 
 
 
366
  meter_delta = arc.detect_triggers(caller_text)
367
+
368
+ # Straight station: the host always answers the caller in his own voice. The
369
+ # caller's words are delivered as an ANSWER-DIRECTIVE in the user turn (NOT
370
+ # embedded verbatim) because a 1B model under the JSON grammar tends to echo a
371
+ # question that appears verbatim in its context.
372
+ system = arc.build_host_prompt("caller", {"caller_text": caller_text})
373
+ if caller_text and caller_text.strip():
374
+ user_msg = (
375
+ 'A caller just asked you on the air: "' + caller_text.strip()
376
+ + '". Answer them directly, in your own words. Do NOT repeat the question.'
377
+ )
378
+ else:
379
+ user_msg = "(the caller's line is too crackly to make out)"
380
+ brain = call_brain(system, [{"role": "user", "content": user_msg}])
381
+ text = _clean_dj_text(brain.get("text", "")) or arc.sample_line(stage, meter)
382
+ speak = call_speak(text)
383
  return {
384
  "caller_text": caller_text,
385
  "text": text,
radio.html CHANGED
@@ -1058,18 +1058,25 @@ async function fetchSongs(){
1058
  }
1059
  // Ask the browser for the listener's location (permission prompt), resolve real
1060
  // weather server-side, and stash it. Denial/error: stays null -> fictional weather.
 
 
 
 
 
 
 
 
 
 
 
1061
  function requestLocale(){
1062
- if (!navigator.geolocation || State.locale) return;
1063
- navigator.geolocation.getCurrentPosition(async (pos)=>{
1064
- try{
1065
- const r = await fetch("/api/locale", {
1066
- method:"POST", headers:{"Content-Type":"application/json"},
1067
- body: JSON.stringify({ lat: pos.coords.latitude, lon: pos.coords.longitude })
1068
- });
1069
- if (r.ok){ const j = await r.json(); if (j && j.resolved) State.locale = j; }
1070
- }catch(_){}
1071
- }, ()=>{ /* denied/unavailable: keep the dreamy fictional weather */ },
1072
- { timeout:8000, maximumAge:600000 });
1073
  }
1074
  function pickSong(){
1075
  const bank = (State.songs && State.songs.length) ? State.songs : CLIENT_SONGS;
@@ -1636,8 +1643,11 @@ function blobToB64(blob){
1636
  K. POWER, WIRING, SEED, rAF LOOP
1637
  =================================================================== */
1638
  function setBroadcasting(on){
1639
- els.body.classList.toggle("broadcasting", !!on);
1640
- els.onairLabel.textContent = on ? "ON AIR" : (State.powered ? "RECEIVING" : "STANDBY");
 
 
 
1641
  const spin = on && !State.reducedMotion;
1642
  els.reelL.classList.toggle("spin", spin);
1643
  els.reelR.classList.toggle("spin", spin);
@@ -1653,7 +1663,8 @@ async function powerOn(){
1653
  els.power.setAttribute("aria-pressed","true");
1654
  els.pwrTxt.innerHTML = "ON AIR";
1655
  els.handset.disabled = false;
1656
- els.onairLabel.textContent = "RECEIVING";
 
1657
  updateHissForTuning();
1658
  applyMeter(); // gauge -> SIGNAL
1659
  rampGain(Sound.crackleGain, 0.05, 0.6); // bring up the vinyl/room bed
 
1058
  }
1059
  // Ask the browser for the listener's location (permission prompt), resolve real
1060
  // weather server-side, and stash it. Denial/error: stays null -> fictional weather.
1061
+ // San Francisco — the default location when the listener gives us none.
1062
+ const DEFAULT_LAT = 37.7749, DEFAULT_LON = -122.4194;
1063
+ async function resolveLocaleAt(lat, lon){
1064
+ try{
1065
+ const r = await fetch("/api/locale", {
1066
+ method:"POST", headers:{"Content-Type":"application/json"},
1067
+ body: JSON.stringify({ lat, lon })
1068
+ });
1069
+ if (r.ok){ const j = await r.json(); if (j && j.resolved) State.locale = j; }
1070
+ }catch(_){}
1071
+ }
1072
  function requestLocale(){
1073
+ if (State.locale) return;
1074
+ // No geolocation API at all -> default to San Francisco.
1075
+ if (!navigator.geolocation){ resolveLocaleAt(DEFAULT_LAT, DEFAULT_LON); return; }
1076
+ navigator.geolocation.getCurrentPosition(
1077
+ (pos)=>{ resolveLocaleAt(pos.coords.latitude, pos.coords.longitude); },
1078
+ ()=>{ resolveLocaleAt(DEFAULT_LAT, DEFAULT_LON); }, // denied/unavailable -> San Francisco
1079
+ { timeout:8000, maximumAge:600000 });
 
 
 
 
1080
  }
1081
  function pickSong(){
1082
  const bank = (State.songs && State.songs.length) ? State.songs : CLIENT_SONGS;
 
1643
  K. POWER, WIRING, SEED, rAF LOOP
1644
  =================================================================== */
1645
  function setBroadcasting(on){
1646
+ // The station is "ON AIR" (tube + label lit) the whole time it's powered -- a
1647
+ // continuous broadcast -- not only while a voice clip plays. The tape reels
1648
+ // still spin while a voice clip is actually sounding, as live feedback.
1649
+ els.body.classList.toggle("broadcasting", !!State.powered);
1650
+ els.onairLabel.textContent = State.powered ? "ON AIR" : "STANDBY";
1651
  const spin = on && !State.reducedMotion;
1652
  els.reelL.classList.toggle("spin", spin);
1653
  els.reelR.classList.toggle("spin", spin);
 
1663
  els.power.setAttribute("aria-pressed","true");
1664
  els.pwrTxt.innerHTML = "ON AIR";
1665
  els.handset.disabled = false;
1666
+ els.body.classList.add("broadcasting"); // light the ON AIR sign the moment we're powered
1667
+ els.onairLabel.textContent = "ON AIR";
1668
  updateHissForTuning();
1669
  applyMeter(); // gauge -> SIGNAL
1670
  rampGain(Sound.crackleGain, 0.05, 0.6); // bring up the vinyl/room bed
tests/test_proxy.py CHANGED
@@ -100,3 +100,68 @@ def test_segment_fallback_is_never_failing_and_well_shaped():
100
  assert fb["audio_b64"] # a real (silent) WAV bed
101
  for key in ("mood", "arc_cue", "words", "wtimes", "wdurations"):
102
  assert key in fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  assert fb["audio_b64"] # a real (silent) WAV bed
101
  for key in ("mood", "arc_cue", "words", "wtimes", "wdurations"):
102
  assert key in fb
103
+
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # Regression (bug 2): the all-caps station name must be normalized before TTS so
107
+ # Kokoro speaks "Nightwave" instead of spelling N-I-G-H-T-W-A-V-E. _speakable was
108
+ # dropped during a proxy rebuild and call_speak sent raw text.
109
+ # ---------------------------------------------------------------------------
110
+ def test_speakable_titlecases_nightwave():
111
+ assert proxy._speakable("You're on NIGHTWAVE 98.6") == "You're on Nightwave 98.6"
112
+ assert proxy._speakable("night wave") == "Nightwave"
113
+ assert proxy._speakable(None) == ""
114
+
115
+
116
+ def test_call_speak_normalizes_nightwave_before_tts(monkeypatch):
117
+ sent = {}
118
+
119
+ class _Resp:
120
+ def raise_for_status(self):
121
+ pass
122
+
123
+ def json(self):
124
+ return {"audio_b64": "", "words": [], "wtimes": [], "wdurations": []}
125
+
126
+ class _Client:
127
+ def post(self, url, headers=None, json=None):
128
+ sent.update(json or {})
129
+ return _Resp()
130
+
131
+ monkeypatch.setattr(proxy, "is_mock", lambda: False)
132
+ monkeypatch.setattr(proxy, "_modal_url", lambda: "http://modal.test")
133
+ monkeypatch.setattr(proxy, "_modal_headers", lambda: {})
134
+ monkeypatch.setattr(proxy, "_get_client", lambda: _Client())
135
+ proxy.call_speak("This is NIGHTWAVE, the last station on the dial.")
136
+ assert "NIGHTWAVE" not in sent["text"]
137
+ assert "Nightwave" in sent["text"]
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # Regression (bug 4): the caller turn must ANSWER the caller, not echo. The
142
+ # straight station uses build_host_prompt("caller") + an answer-directive user
143
+ # turn so a 1B model doesn't parrot a question that appears verbatim in context.
144
+ # call_turn had reverted to the arc builder + raw caller_text as the user message.
145
+ # ---------------------------------------------------------------------------
146
+ def test_call_turn_answers_without_echoing_the_question(monkeypatch):
147
+ captured = {}
148
+ monkeypatch.setattr(proxy, "call_asr", lambda a: {"text": "are you real?"})
149
+
150
+ def _brain(system, messages):
151
+ captured["system"] = system
152
+ captured["messages"] = messages
153
+ return {"text": "Real as the rain on the glass, friend.", "mood": "warm", "arc_cue": "none"}
154
+
155
+ monkeypatch.setattr(proxy, "call_brain", _brain)
156
+ monkeypatch.setattr(proxy, "call_speak",
157
+ lambda text, voice=arc.VOICE: {"audio_b64": "", "words": [],
158
+ "wtimes": [], "wdurations": []})
159
+ out = proxy.call_turn("oblivious", 0, "<b64>")
160
+ # straight-station host prompt (names the DJ), NOT the arc builder
161
+ assert arc.DJ_NAME in captured["system"]
162
+ # the user turn carries an answer-directive, not the bare echoed question
163
+ user = captured["messages"][-1]["content"]
164
+ assert "do not repeat" in user.lower()
165
+ assert user.strip() != "are you real?"
166
+ assert out["caller_text"] == "are you real?"
167
+ assert out["meter_delta"] >= 14 # an identity/reality question pushes the meter