Spaces:
Running
Running
SamiKoen commited on
Commit ·
152b885
1
Parent(s): bc81313
Sadelestirme: OpenAI native interrupt davranisi (anlik kesilir), client-side barge-in mantigi tamamen kaldirildi. Sistem promptu: kisa cevap kurali
Browse files- app.py +7 -4
- static/index.html +7 -68
app.py
CHANGED
|
@@ -59,12 +59,15 @@ def build_session_instructions() -> str:
|
|
| 59 |
base = "Trek Bisiklet uzmani bir satis temsilcisisin."
|
| 60 |
|
| 61 |
voice_addon = (
|
| 62 |
-
"\n\nSESLI SOHBET KURALLARI:\n"
|
| 63 |
-
"- Cevaplarin
|
|
|
|
|
|
|
| 64 |
"- Markdown, * veya emoji KULLANMA.\n"
|
| 65 |
"- HER ZAMAN 'siz' ile hitap et, soru ile bitirme.\n"
|
| 66 |
"- Stok/fiyat sorulari geldiginde get_warehouse_stock fonksiyonunu cagir.\n"
|
| 67 |
-
"-
|
|
|
|
| 68 |
)
|
| 69 |
return apply_pronunciation_fixes(base + voice_addon)
|
| 70 |
|
|
@@ -145,7 +148,7 @@ async def realtime_relay(client_ws: WebSocket):
|
|
| 145 |
"threshold": 0.5,
|
| 146 |
"prefix_padding_ms": 300,
|
| 147 |
"silence_duration_ms": 700,
|
| 148 |
-
"interrupt_response":
|
| 149 |
"create_response": True,
|
| 150 |
},
|
| 151 |
"tools": TOOLS,
|
|
|
|
| 59 |
base = "Trek Bisiklet uzmani bir satis temsilcisisin."
|
| 60 |
|
| 61 |
voice_addon = (
|
| 62 |
+
"\n\nSESLI SOHBET KURALLARI (cok onemli):\n"
|
| 63 |
+
"- Cevaplarin TELEFON GORUSMESI gibi olsun: KISA, NET, ozet.\n"
|
| 64 |
+
"- En fazla 1-2 cumle. Liste yapma, detayli aciklama yapma.\n"
|
| 65 |
+
"- Musteri detay isterse o zaman uzat.\n"
|
| 66 |
"- Markdown, * veya emoji KULLANMA.\n"
|
| 67 |
"- HER ZAMAN 'siz' ile hitap et, soru ile bitirme.\n"
|
| 68 |
"- Stok/fiyat sorulari geldiginde get_warehouse_stock fonksiyonunu cagir.\n"
|
| 69 |
+
"- Fonksiyon sonucunu DOGAL ve KISA bir cumleyle ozetle.\n"
|
| 70 |
+
"- Ornek: 'Caddebostan magazasinda mevcut, 250 bin lira.' (uzun aciklama yok).\n"
|
| 71 |
)
|
| 72 |
return apply_pronunciation_fixes(base + voice_addon)
|
| 73 |
|
|
|
|
| 148 |
"threshold": 0.5,
|
| 149 |
"prefix_padding_ms": 300,
|
| 150 |
"silence_duration_ms": 700,
|
| 151 |
+
"interrupt_response": True, # Kullanici konusunca asistan ANINDA kesilir
|
| 152 |
"create_response": True,
|
| 153 |
},
|
| 154 |
"tools": TOOLS,
|
static/index.html
CHANGED
|
@@ -167,21 +167,7 @@ let analyserData = null;
|
|
| 167 |
let freqData = null;
|
| 168 |
let assistantSpeaking = false; // Asistan ses ciktiyor mu?
|
| 169 |
|
| 170 |
-
//
|
| 171 |
-
// "Son SPEAKING_WINDOW icinde toplam SPEAKING_REQUIRED konustuysan tetikle"
|
| 172 |
-
// Boylece dogal duraklamalar timer'i sifirlamaz — toplam aktif konusma sayilir.
|
| 173 |
-
const BARGE_IN_THRESHOLD = 0.012;
|
| 174 |
-
const SPEAKING_WINDOW_MS = 4000; // 4sn pencere
|
| 175 |
-
const SPEAKING_REQUIRED_MS = 2000; // toplam 2sn konusma
|
| 176 |
-
const FRAME_DURATION_MS = 100; // worklet frame ~100ms
|
| 177 |
-
const LEVEL_SMOOTH_WINDOW = 5;
|
| 178 |
-
let bargeInTriggered = false;
|
| 179 |
-
let openAiVadActive = false;
|
| 180 |
-
let levelHistory = [];
|
| 181 |
-
let speakingFrames = []; // konusma frame'lerinin timestamp listesi
|
| 182 |
-
let lastBargeDebugLog = 0;
|
| 183 |
-
|
| 184 |
-
// Aktif audio playback source'lari (cancel icin)
|
| 185 |
let activeAudioSources = [];
|
| 186 |
|
| 187 |
const $ = (id) => document.getElementById(id);
|
|
@@ -628,35 +614,6 @@ async function connect() {
|
|
| 628 |
if (ws?.readyState !== WebSocket.OPEN) return;
|
| 629 |
const b64 = arrayBufferToBase64(e.data.pcm);
|
| 630 |
ws.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: b64 }));
|
| 631 |
-
|
| 632 |
-
// Barge-in: cumulative — son 4sn icinde toplam 2sn konusma
|
| 633 |
-
if (assistantSpeaking && !bargeInTriggered) {
|
| 634 |
-
const now = Date.now();
|
| 635 |
-
levelHistory.push(e.data.level);
|
| 636 |
-
if (levelHistory.length > LEVEL_SMOOTH_WINDOW) levelHistory.shift();
|
| 637 |
-
const avgLevel = levelHistory.reduce((a, b) => a + b, 0) / levelHistory.length;
|
| 638 |
-
|
| 639 |
-
const speaking = avgLevel > BARGE_IN_THRESHOLD || openAiVadActive;
|
| 640 |
-
if (speaking) speakingFrames.push(now);
|
| 641 |
-
// Penceren disindaki frame'leri sil
|
| 642 |
-
speakingFrames = speakingFrames.filter(t => now - t < SPEAKING_WINDOW_MS);
|
| 643 |
-
const totalSpeakingMs = speakingFrames.length * FRAME_DURATION_MS;
|
| 644 |
-
|
| 645 |
-
// Saniyelik debug
|
| 646 |
-
if (now - lastBargeDebugLog > 1000) {
|
| 647 |
-
lastBargeDebugLog = now;
|
| 648 |
-
if (totalSpeakingMs > 100) {
|
| 649 |
-
console.log(`[barge-in] toplam=${totalSpeakingMs}ms avgLevel=${avgLevel.toFixed(3)} vad=${openAiVadActive}`);
|
| 650 |
-
}
|
| 651 |
-
}
|
| 652 |
-
|
| 653 |
-
if (totalSpeakingMs >= SPEAKING_REQUIRED_MS) {
|
| 654 |
-
triggerBargeIn();
|
| 655 |
-
}
|
| 656 |
-
} else if (!assistantSpeaking) {
|
| 657 |
-
levelHistory = [];
|
| 658 |
-
speakingFrames = [];
|
| 659 |
-
}
|
| 660 |
};
|
| 661 |
src.connect(workletNode);
|
| 662 |
|
|
@@ -682,26 +639,22 @@ function handleEvent(evt) {
|
|
| 682 |
if (evt.delta) playPCM16(base64ToInt16(evt.delta));
|
| 683 |
break;
|
| 684 |
case 'input_audio_buffer.speech_started':
|
|
|
|
| 685 |
if (assistantSpeaking) {
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
} else {
|
| 689 |
-
setStatus('Sizi dinliyorum...', 'connected');
|
| 690 |
}
|
|
|
|
| 691 |
break;
|
| 692 |
case 'input_audio_buffer.speech_stopped':
|
| 693 |
-
openAiVadActive = false;
|
| 694 |
if (!assistantSpeaking) setStatus('Dusunuyor...', 'connecting');
|
| 695 |
break;
|
| 696 |
case 'response.created':
|
| 697 |
setStatus('Yanitliyor', 'connected');
|
| 698 |
assistantSpeaking = true;
|
| 699 |
-
bargeInTriggered = false;
|
| 700 |
-
speakingFrames = [];
|
| 701 |
break;
|
| 702 |
case 'response.done':
|
| 703 |
assistantSpeaking = false;
|
| 704 |
-
speakingFrames = [];
|
| 705 |
setStatus('Bagli — konusabilirsiniz', 'connected');
|
| 706 |
if (evt.response?.status === 'failed')
|
| 707 |
console.error('[error]', evt.response?.status_details);
|
|
@@ -715,8 +668,8 @@ function handleEvent(evt) {
|
|
| 715 |
|
| 716 |
function playPCM16(i16) {
|
| 717 |
if (!playbackCtx || !analyser) return;
|
| 718 |
-
//
|
| 719 |
-
if (
|
| 720 |
|
| 721 |
const f32 = new Float32Array(i16.length);
|
| 722 |
for (let i = 0; i < i16.length; i++) f32[i] = i16[i] / 0x8000;
|
|
@@ -745,26 +698,12 @@ function stopAllAudio() {
|
|
| 745 |
if (playbackCtx) playbackTime = playbackCtx.currentTime;
|
| 746 |
}
|
| 747 |
|
| 748 |
-
function triggerBargeIn() {
|
| 749 |
-
console.log('[barge-in] toplam 2sn+ konusuldu — asistan susturuluyor');
|
| 750 |
-
if (ws?.readyState === WebSocket.OPEN) {
|
| 751 |
-
ws.send(JSON.stringify({ type: 'response.cancel' }));
|
| 752 |
-
}
|
| 753 |
-
stopAllAudio();
|
| 754 |
-
bargeInTriggered = true;
|
| 755 |
-
speakingFrames = [];
|
| 756 |
-
assistantSpeaking = false;
|
| 757 |
-
openAiVadActive = false;
|
| 758 |
-
setStatus('Sizi dinliyorum...', 'connected');
|
| 759 |
-
}
|
| 760 |
|
| 761 |
function disconnect() {
|
| 762 |
setStatus('Baglanti kesildi', 'disconnected');
|
| 763 |
$('btnConnect').disabled = false;
|
| 764 |
$('btnDisconnect').disabled = true;
|
| 765 |
assistantSpeaking = false;
|
| 766 |
-
bargeInTriggered = false;
|
| 767 |
-
speakingFrames = [];
|
| 768 |
if (workletNode) { try { workletNode.disconnect(); } catch {} }
|
| 769 |
if (audioCtx) { try { audioCtx.close(); } catch {} }
|
| 770 |
if (playbackCtx) { try { playbackCtx.close(); } catch {} }
|
|
|
|
| 167 |
let freqData = null;
|
| 168 |
let assistantSpeaking = false; // Asistan ses ciktiyor mu?
|
| 169 |
|
| 170 |
+
// Aktif audio playback source'lari (kullanici konusmaya baslayinca temizlenir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
let activeAudioSources = [];
|
| 172 |
|
| 173 |
const $ = (id) => document.getElementById(id);
|
|
|
|
| 614 |
if (ws?.readyState !== WebSocket.OPEN) return;
|
| 615 |
const b64 = arrayBufferToBase64(e.data.pcm);
|
| 616 |
ws.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: b64 }));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
};
|
| 618 |
src.connect(workletNode);
|
| 619 |
|
|
|
|
| 639 |
if (evt.delta) playPCM16(base64ToInt16(evt.delta));
|
| 640 |
break;
|
| 641 |
case 'input_audio_buffer.speech_started':
|
| 642 |
+
// Kullanici konusmaya basladi — eger asistan konusuyorsa pending PCM'leri at
|
| 643 |
if (assistantSpeaking) {
|
| 644 |
+
stopAllAudio();
|
| 645 |
+
assistantSpeaking = false;
|
|
|
|
|
|
|
| 646 |
}
|
| 647 |
+
setStatus('Sizi dinliyorum...', 'connected');
|
| 648 |
break;
|
| 649 |
case 'input_audio_buffer.speech_stopped':
|
|
|
|
| 650 |
if (!assistantSpeaking) setStatus('Dusunuyor...', 'connecting');
|
| 651 |
break;
|
| 652 |
case 'response.created':
|
| 653 |
setStatus('Yanitliyor', 'connected');
|
| 654 |
assistantSpeaking = true;
|
|
|
|
|
|
|
| 655 |
break;
|
| 656 |
case 'response.done':
|
| 657 |
assistantSpeaking = false;
|
|
|
|
| 658 |
setStatus('Bagli — konusabilirsiniz', 'connected');
|
| 659 |
if (evt.response?.status === 'failed')
|
| 660 |
console.error('[error]', evt.response?.status_details);
|
|
|
|
| 668 |
|
| 669 |
function playPCM16(i16) {
|
| 670 |
if (!playbackCtx || !analyser) return;
|
| 671 |
+
// Asistan susturulduysa yeni chunk'lari oynatma
|
| 672 |
+
if (!assistantSpeaking) return;
|
| 673 |
|
| 674 |
const f32 = new Float32Array(i16.length);
|
| 675 |
for (let i = 0; i < i16.length; i++) f32[i] = i16[i] / 0x8000;
|
|
|
|
| 698 |
if (playbackCtx) playbackTime = playbackCtx.currentTime;
|
| 699 |
}
|
| 700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
|
| 702 |
function disconnect() {
|
| 703 |
setStatus('Baglanti kesildi', 'disconnected');
|
| 704 |
$('btnConnect').disabled = false;
|
| 705 |
$('btnDisconnect').disabled = true;
|
| 706 |
assistantSpeaking = false;
|
|
|
|
|
|
|
| 707 |
if (workletNode) { try { workletNode.disconnect(); } catch {} }
|
| 708 |
if (audioCtx) { try { audioCtx.close(); } catch {} }
|
| 709 |
if (playbackCtx) { try { playbackCtx.close(); } catch {} }
|