Spaces:
Running
Running
NeonClary commited on
Commit ·
7e082a0
1
Parent(s): dc20b43
feat: TTS playback speed control (1x / 1.5x / 2x) in message controls and Settings
Browse files- frontend/src/App.css +44 -0
- frontend/src/App.jsx +59 -0
frontend/src/App.css
CHANGED
|
@@ -239,6 +239,39 @@ html.aj-hide-pointer * {
|
|
| 239 |
flex-shrink: 0;
|
| 240 |
}
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
.aj-chat-wrap {
|
| 243 |
flex: 1;
|
| 244 |
display: flex;
|
|
@@ -817,6 +850,17 @@ html.aj-hide-pointer * {
|
|
| 817 |
background: var(--lc-hover);
|
| 818 |
}
|
| 819 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 820 |
[data-tip] {
|
| 821 |
position: relative;
|
| 822 |
}
|
|
|
|
| 239 |
flex-shrink: 0;
|
| 240 |
}
|
| 241 |
|
| 242 |
+
.aj-speed-options {
|
| 243 |
+
display: flex;
|
| 244 |
+
gap: 6px;
|
| 245 |
+
margin-top: 6px;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
.aj-speed-option {
|
| 249 |
+
padding: 4px 12px;
|
| 250 |
+
border-radius: 6px;
|
| 251 |
+
border: 1px solid var(--lc-border);
|
| 252 |
+
background: var(--lc-input);
|
| 253 |
+
color: var(--lc-muted);
|
| 254 |
+
font-size: 13px;
|
| 255 |
+
font-weight: 600;
|
| 256 |
+
cursor: pointer;
|
| 257 |
+
transition: all 0.15s;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
.aj-speed-option:hover {
|
| 261 |
+
border-color: var(--lc-accent);
|
| 262 |
+
color: var(--lc-accent);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
.aj-speed-option--active {
|
| 266 |
+
background: var(--lc-accent);
|
| 267 |
+
color: #fff;
|
| 268 |
+
border-color: var(--lc-accent);
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
.aj-speed-option--active:hover {
|
| 272 |
+
color: #fff;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
.aj-chat-wrap {
|
| 276 |
flex: 1;
|
| 277 |
display: flex;
|
|
|
|
| 850 |
background: var(--lc-hover);
|
| 851 |
}
|
| 852 |
|
| 853 |
+
.aj-tts-speed-btn {
|
| 854 |
+
width: auto;
|
| 855 |
+
padding: 0 6px;
|
| 856 |
+
}
|
| 857 |
+
|
| 858 |
+
.aj-tts-speed-label {
|
| 859 |
+
font-size: 11px;
|
| 860 |
+
font-weight: 700;
|
| 861 |
+
line-height: 1;
|
| 862 |
+
}
|
| 863 |
+
|
| 864 |
[data-tip] {
|
| 865 |
position: relative;
|
| 866 |
}
|
frontend/src/App.jsx
CHANGED
|
@@ -24,6 +24,8 @@ import {
|
|
| 24 |
const STORAGE_PERSONA = 'askjerry_extra_persona'
|
| 25 |
const STORAGE_ALWAYS_SPEAK = 'askjerry_always_speak'
|
| 26 |
const STORAGE_TTS_PRIMED = 'askjerry_tts_primed'
|
|
|
|
|
|
|
| 27 |
const CONTEXT_WINDOW = 8192
|
| 28 |
const MAX_REPLY_TOKENS = 4096
|
| 29 |
const SUMMARIZE_THRESHOLD = 0.55
|
|
@@ -49,6 +51,14 @@ function initialAlwaysSpeak() {
|
|
| 49 |
}
|
| 50 |
}
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
function estimateTokens(text) {
|
| 53 |
return Math.max(1, Math.ceil((text || '').length / 4))
|
| 54 |
}
|
|
@@ -282,6 +292,17 @@ function AssistantSearchBar({ content, show, speak }) {
|
|
| 282 |
<SkipBack size={14} aria-hidden />
|
| 283 |
</button>
|
| 284 |
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
{(speak.playing || speak.paused || (speak.loading && speak.showStop)) && (
|
| 286 |
<button
|
| 287 |
type="button"
|
|
@@ -393,6 +414,7 @@ export default function App() {
|
|
| 393 |
const [ttsPlayingIndex, setTtsPlayingIndex] = useState(null)
|
| 394 |
const [ttsPaused, setTtsPaused] = useState(false)
|
| 395 |
const [alwaysSpeak, setAlwaysSpeak] = useState(initialAlwaysSpeak)
|
|
|
|
| 396 |
const [micListening, setMicListening] = useState(false)
|
| 397 |
const [micTranscribing, setMicTranscribing] = useState(false)
|
| 398 |
const [voiceError, setVoiceError] = useState(null)
|
|
@@ -400,6 +422,8 @@ export default function App() {
|
|
| 400 |
const audioRef = useRef(null)
|
| 401 |
const ttsBlobUrlRef = useRef(null)
|
| 402 |
const ttsSessionRef = useRef(0)
|
|
|
|
|
|
|
| 403 |
const messagesRef = useRef(messages)
|
| 404 |
messagesRef.current = messages
|
| 405 |
const streamingRef = useRef(streaming)
|
|
@@ -599,6 +623,7 @@ export default function App() {
|
|
| 599 |
const a = audioRef.current
|
| 600 |
if (!a) return
|
| 601 |
try {
|
|
|
|
| 602 |
await a.play()
|
| 603 |
setTtsPaused(false)
|
| 604 |
} catch (e) {
|
|
@@ -652,6 +677,7 @@ export default function App() {
|
|
| 652 |
audio.onerror = () => {
|
| 653 |
finish()
|
| 654 |
}
|
|
|
|
| 655 |
audio.play().catch(() => finish())
|
| 656 |
})
|
| 657 |
}, [])
|
|
@@ -843,6 +869,19 @@ export default function App() {
|
|
| 843 |
try { sessionStorage.setItem(STORAGE_ALWAYS_SPEAK, alwaysSpeak ? '1' : '0') } catch { /* */ }
|
| 844 |
}, [alwaysSpeak])
|
| 845 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 846 |
useEffect(() => {
|
| 847 |
if (ttsContentResolverRef.current) {
|
| 848 |
ttsContentResolverRef.current()
|
|
@@ -1385,6 +1424,24 @@ export default function App() {
|
|
| 1385 |
<p className="aj-options-hint">
|
| 1386 |
When enabled, each assistant reply is read aloud automatically after it finishes generating (uses text-to-speech).
|
| 1387 |
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1388 |
</div>
|
| 1389 |
)}
|
| 1390 |
</div>
|
|
@@ -1475,6 +1532,8 @@ export default function App() {
|
|
| 1475 |
onPause: pauseTts,
|
| 1476 |
onResume: resumeTts,
|
| 1477 |
onReplay: replayTts,
|
|
|
|
|
|
|
| 1478 |
onStopReading: stopTts,
|
| 1479 |
}}
|
| 1480 |
/>
|
|
|
|
| 24 |
const STORAGE_PERSONA = 'askjerry_extra_persona'
|
| 25 |
const STORAGE_ALWAYS_SPEAK = 'askjerry_always_speak'
|
| 26 |
const STORAGE_TTS_PRIMED = 'askjerry_tts_primed'
|
| 27 |
+
const STORAGE_TTS_SPEED = 'askjerry_tts_speed'
|
| 28 |
+
const TTS_SPEED_OPTIONS = [1, 1.5, 2]
|
| 29 |
const CONTEXT_WINDOW = 8192
|
| 30 |
const MAX_REPLY_TOKENS = 4096
|
| 31 |
const SUMMARIZE_THRESHOLD = 0.55
|
|
|
|
| 51 |
}
|
| 52 |
}
|
| 53 |
|
| 54 |
+
function initialTtsSpeed() {
|
| 55 |
+
try {
|
| 56 |
+
const v = parseFloat(sessionStorage.getItem(STORAGE_TTS_SPEED))
|
| 57 |
+
if (TTS_SPEED_OPTIONS.includes(v)) return v
|
| 58 |
+
} catch { /* */ }
|
| 59 |
+
return 1
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
function estimateTokens(text) {
|
| 63 |
return Math.max(1, Math.ceil((text || '').length / 4))
|
| 64 |
}
|
|
|
|
| 292 |
<SkipBack size={14} aria-hidden />
|
| 293 |
</button>
|
| 294 |
)}
|
| 295 |
+
{(speak.playing || speak.paused) && (
|
| 296 |
+
<button
|
| 297 |
+
type="button"
|
| 298 |
+
className="aj-msg-search-btn aj-tts-speed-btn"
|
| 299 |
+
onClick={speak.onCycleSpeed}
|
| 300 |
+
data-tip="Playback speed"
|
| 301 |
+
aria-label={`Playback speed: ${speak.speed}x`}
|
| 302 |
+
>
|
| 303 |
+
<span className="aj-tts-speed-label">{speak.speed}x</span>
|
| 304 |
+
</button>
|
| 305 |
+
)}
|
| 306 |
{(speak.playing || speak.paused || (speak.loading && speak.showStop)) && (
|
| 307 |
<button
|
| 308 |
type="button"
|
|
|
|
| 414 |
const [ttsPlayingIndex, setTtsPlayingIndex] = useState(null)
|
| 415 |
const [ttsPaused, setTtsPaused] = useState(false)
|
| 416 |
const [alwaysSpeak, setAlwaysSpeak] = useState(initialAlwaysSpeak)
|
| 417 |
+
const [ttsSpeed, setTtsSpeed] = useState(initialTtsSpeed)
|
| 418 |
const [micListening, setMicListening] = useState(false)
|
| 419 |
const [micTranscribing, setMicTranscribing] = useState(false)
|
| 420 |
const [voiceError, setVoiceError] = useState(null)
|
|
|
|
| 422 |
const audioRef = useRef(null)
|
| 423 |
const ttsBlobUrlRef = useRef(null)
|
| 424 |
const ttsSessionRef = useRef(0)
|
| 425 |
+
const ttsSpeedRef = useRef(ttsSpeed)
|
| 426 |
+
ttsSpeedRef.current = ttsSpeed
|
| 427 |
const messagesRef = useRef(messages)
|
| 428 |
messagesRef.current = messages
|
| 429 |
const streamingRef = useRef(streaming)
|
|
|
|
| 623 |
const a = audioRef.current
|
| 624 |
if (!a) return
|
| 625 |
try {
|
| 626 |
+
a.playbackRate = ttsSpeedRef.current
|
| 627 |
await a.play()
|
| 628 |
setTtsPaused(false)
|
| 629 |
} catch (e) {
|
|
|
|
| 677 |
audio.onerror = () => {
|
| 678 |
finish()
|
| 679 |
}
|
| 680 |
+
audio.playbackRate = ttsSpeedRef.current
|
| 681 |
audio.play().catch(() => finish())
|
| 682 |
})
|
| 683 |
}, [])
|
|
|
|
| 869 |
try { sessionStorage.setItem(STORAGE_ALWAYS_SPEAK, alwaysSpeak ? '1' : '0') } catch { /* */ }
|
| 870 |
}, [alwaysSpeak])
|
| 871 |
|
| 872 |
+
useEffect(() => {
|
| 873 |
+
try { sessionStorage.setItem(STORAGE_TTS_SPEED, String(ttsSpeed)) } catch { /* */ }
|
| 874 |
+
const a = audioRef.current
|
| 875 |
+
if (a) a.playbackRate = ttsSpeed
|
| 876 |
+
}, [ttsSpeed])
|
| 877 |
+
|
| 878 |
+
const cycleTtsSpeed = useCallback(() => {
|
| 879 |
+
setTtsSpeed(prev => {
|
| 880 |
+
const idx = TTS_SPEED_OPTIONS.indexOf(prev)
|
| 881 |
+
return TTS_SPEED_OPTIONS[(idx + 1) % TTS_SPEED_OPTIONS.length]
|
| 882 |
+
})
|
| 883 |
+
}, [])
|
| 884 |
+
|
| 885 |
useEffect(() => {
|
| 886 |
if (ttsContentResolverRef.current) {
|
| 887 |
ttsContentResolverRef.current()
|
|
|
|
| 1424 |
<p className="aj-options-hint">
|
| 1425 |
When enabled, each assistant reply is read aloud automatically after it finishes generating (uses text-to-speech).
|
| 1426 |
</p>
|
| 1427 |
+
<label className="aj-field">
|
| 1428 |
+
<span>Playback speed</span>
|
| 1429 |
+
<div className="aj-speed-options">
|
| 1430 |
+
{TTS_SPEED_OPTIONS.map(s => (
|
| 1431 |
+
<button
|
| 1432 |
+
key={s}
|
| 1433 |
+
type="button"
|
| 1434 |
+
className={`aj-speed-option${ttsSpeed === s ? ' aj-speed-option--active' : ''}`}
|
| 1435 |
+
onClick={() => setTtsSpeed(s)}
|
| 1436 |
+
>
|
| 1437 |
+
{s}x
|
| 1438 |
+
</button>
|
| 1439 |
+
))}
|
| 1440 |
+
</div>
|
| 1441 |
+
</label>
|
| 1442 |
+
<p className="aj-options-hint">
|
| 1443 |
+
Controls how fast text-to-speech audio plays. Applies to all messages.
|
| 1444 |
+
</p>
|
| 1445 |
</div>
|
| 1446 |
)}
|
| 1447 |
</div>
|
|
|
|
| 1532 |
onPause: pauseTts,
|
| 1533 |
onResume: resumeTts,
|
| 1534 |
onReplay: replayTts,
|
| 1535 |
+
onCycleSpeed: cycleTtsSpeed,
|
| 1536 |
+
speed: ttsSpeed,
|
| 1537 |
onStopReading: stopTts,
|
| 1538 |
}}
|
| 1539 |
/>
|