ruslanmv commited on
Commit
0e040d4
·
1 Parent(s): fd95484

First version stream chat

Browse files
Files changed (2) hide show
  1. app/routers/chat.py +56 -4
  2. app/templates/chat.html +169 -86
app/routers/chat.py CHANGED
@@ -1,6 +1,9 @@
1
  from fastapi import APIRouter, Depends, HTTPException, Query
 
2
  from pydantic import BaseModel
3
- from typing import List, Optional
 
 
4
  from ..deps import get_settings
5
  from ..core.config import Settings
6
  from ..services.chat_service import ChatService
@@ -25,8 +28,7 @@ class ChatRequest(BaseModel):
25
  for m in reversed(self.messages):
26
  if m.role.lower() == "user":
27
  return m.content
28
- if self.messages:
29
- return self.messages[-1].content
30
  raise ValueError("Body must include 'query'/'question'/'prompt' or 'messages'")
31
 
32
  class ChatResponse(BaseModel):
@@ -43,7 +45,6 @@ async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
43
  answer = await svc.answer(text)
44
  return ChatResponse(answer=answer)
45
  except PermissionError as e:
46
- # Gated model / no license accepted for token
47
  raise HTTPException(status_code=403, detail=str(e))
48
  except Exception as e:
49
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
@@ -58,3 +59,54 @@ async def chat_get(query: str = Query(...), settings: Settings = Depends(get_set
58
  raise HTTPException(status_code=403, detail=str(e))
59
  except Exception as e:
60
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import APIRouter, Depends, HTTPException, Query
2
+ from starlette.responses import StreamingResponse
3
  from pydantic import BaseModel
4
+ from typing import List, Optional, Any, Iterator
5
+ import json, time
6
+
7
  from ..deps import get_settings
8
  from ..core.config import Settings
9
  from ..services.chat_service import ChatService
 
28
  for m in reversed(self.messages):
29
  if m.role.lower() == "user":
30
  return m.content
31
+ return self.messages[-1].content
 
32
  raise ValueError("Body must include 'query'/'question'/'prompt' or 'messages'")
33
 
34
  class ChatResponse(BaseModel):
 
45
  answer = await svc.answer(text)
46
  return ChatResponse(answer=answer)
47
  except PermissionError as e:
 
48
  raise HTTPException(status_code=403, detail=str(e))
49
  except Exception as e:
50
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
 
59
  raise HTTPException(status_code=403, detail=str(e))
60
  except Exception as e:
61
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
62
+
63
+ # ---------- Streaming (SSE) ----------
64
+ def _sse_line(obj: Any) -> str:
65
+ payload = obj if isinstance(obj, str) else json.dumps(obj, ensure_ascii=False)
66
+ return f"data: {payload}\n\n"
67
+
68
+ @router.get("/chat/stream")
69
+ async def chat_stream(query: str = Query(...), settings: Settings = Depends(get_settings)):
70
+ """
71
+ SSE stream of token deltas: emits {"delta": "..."} chunks, then final [DONE].
72
+ """
73
+ svc = ChatService(settings)
74
+
75
+ def gen() -> Iterator[str]:
76
+ # Anti-buffer padding & immediate ping to force first paint
77
+ yield ":" + (" " * 2048) + "\n\n"
78
+ yield "event: ping\ndata: 0\n\n"
79
+ any_tokens = False
80
+ try:
81
+ for token in svc.stream_answer(query):
82
+ if token:
83
+ any_tokens = True
84
+ yield _sse_line({"delta": token})
85
+ if not any_tokens:
86
+ yield _sse_line({"delta": ""})
87
+ yield _sse_line("[DONE]")
88
+ except GeneratorExit:
89
+ return
90
+ except Exception as e:
91
+ try:
92
+ yield _sse_line({"error": str(e)})
93
+ except Exception:
94
+ return
95
+
96
+ headers = {
97
+ # Critical for proxies/browsers
98
+ "Cache-Control": "no-cache, no-transform",
99
+ "X-Accel-Buffering": "no", # Nginx
100
+ "Connection": "keep-alive",
101
+ "Content-Encoding": "identity", # Prevents Starlette gzip from buffering SSE
102
+ }
103
+ return StreamingResponse(gen(), media_type="text/event-stream; charset=utf-8", headers=headers)
104
+
105
+ @router.post("/chat/stream")
106
+ async def chat_stream_post(req: ChatRequest, settings: Settings = Depends(get_settings)):
107
+ try:
108
+ q = req.as_text()
109
+ except ValueError as e:
110
+ raise HTTPException(status_code=422, detail=str(e))
111
+ # Reuse GET logic to keep one code path
112
+ return await chat_stream(query=q, settings=settings)
app/templates/chat.html CHANGED
@@ -2,13 +2,7 @@
2
  {% block body %}
3
  <div class="card">
4
  <h3>Chat — Matrix System 1.0</h3>
5
-
6
- <!-- Messages -->
7
- <div id="messages" style="margin-top:14px; display:flex; flex-direction:column; gap:10px; max-height:60vh; overflow:auto;">
8
- <!-- Filled by JS from localStorage -->
9
- </div>
10
-
11
- <!-- Composer -->
12
  <form id="chatForm" style="display:grid; gap:12px; margin-top:14px;">
13
  <textarea id="question" rows="4" placeholder="Ask anything about Matrix EcoSystem, Guardian, or Hub..."></textarea>
14
  <div style="display:flex; gap:10px; align-items:center;">
@@ -30,100 +24,189 @@
30
  box-shadow: 0 4px 16px rgba(0,0,0,0.25), 0 0 0 1px rgba(0,255,156,0.05);
31
  font-family: "Share Tech Mono", monospace;
32
  }
33
- .user { align-self: flex-end; background: #062013; color: var(--text); border-color: #0e2e1a; }
34
- .bot { align-self: flex-start; background: #05140c; color: var(--text); border-color: #0c2416; }
35
- .meta { font-size: 11px; opacity: .6; margin-top: 2px; }
 
 
 
 
 
 
 
 
 
36
  </style>
37
 
38
  <script>
39
- (function () {
40
- const KEY = 'matrix_ai_chat_history';
41
- const messagesEl = document.getElementById('messages');
42
- const form = document.getElementById('chatForm');
43
- const input = document.getElementById('question');
44
- const sendBtn = document.getElementById('sendBtn');
45
- const clearBtn = document.getElementById('clearBtn');
46
-
47
- function loadHistory() {
48
- try {
49
- return JSON.parse(localStorage.getItem(KEY) || '[]');
50
- } catch { return []; }
51
- }
52
- function saveHistory(hist) {
53
- localStorage.setItem(KEY, JSON.stringify(hist.slice(-100))); // cap
54
- }
55
- function msgEl(role, text, ts) {
56
- const wrap = document.createElement('div');
57
- wrap.style.display = 'flex';
58
- wrap.style.flexDirection = 'column';
59
- wrap.style.gap = '2px';
60
- const b = document.createElement('div');
61
- b.className = 'bubble ' + (role === 'user' ? 'user' : 'bot');
62
- b.textContent = text;
63
- const meta = document.createElement('div');
64
- meta.className = 'meta';
65
- meta.textContent = new Date(ts).toLocaleString();
66
- wrap.appendChild(b);
67
- wrap.appendChild(meta);
68
- return wrap;
69
  }
70
- function render(hist) {
71
- messagesEl.innerHTML = '';
72
- hist.forEach(m => messagesEl.appendChild(msgEl(m.role, m.text, m.ts)));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  messagesEl.scrollTop = messagesEl.scrollHeight;
74
  }
75
-
76
- let history = loadHistory();
77
- if (history.length === 0) {
78
- // seed a friendly welcome
79
- history.push({ role: 'bot', text: 'Welcome to MATRIX-AI. Ask me about Matrix System 1.0, Guardian, or the Hub.', ts: Date.now() });
80
- saveHistory(history);
81
  }
82
- render(history);
83
 
84
- clearBtn.addEventListener('click', () => {
85
- history = [];
86
- saveHistory(history);
87
- render(history);
88
- });
89
 
90
- form.addEventListener('submit', async (e) => {
91
- e.preventDefault();
92
- const q = (input.value || '').trim();
93
- if (!q) return;
94
- input.value = '';
95
- sendBtn.disabled = true;
96
 
97
- // append user message
98
- const userMsg = { role: 'user', text: q, ts: Date.now() };
99
- history.push(userMsg);
100
- saveHistory(history);
101
- render(history);
 
 
 
 
 
 
 
 
102
 
 
103
  try {
104
- const port = (window.location.port || (window.location.href.includes('/+/') ? '' : ''));
105
- const base = window.location.origin; // same origin (HF proxies handle it)
106
- const r = await fetch(base.replace(/\/+$/, '') + '/v1/chat', {
107
  method: 'POST',
108
- headers: { 'content-type': 'application/json' },
109
- body: JSON.stringify({ query: q })
110
  });
111
- let answer = '(no answer)';
112
- if (r.ok) {
113
- const data = await r.json();
114
- answer = (data && (data.answer || data.response || JSON.stringify(data))) || answer;
115
- } else {
116
- answer = `HTTP ${r.status}`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
- history.push({ role: 'bot', text: answer, ts: Date.now() });
119
  } catch (err) {
120
- history.push({ role: 'bot', text: 'Error: ' + (err && err.message ? err.message : String(err)), ts: Date.now() });
121
- } finally {
122
- saveHistory(history);
123
- render(history);
124
- sendBtn.disabled = false;
125
  }
126
- });
127
- })();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  </script>
129
- {% endblock %}
 
2
  {% block body %}
3
  <div class="card">
4
  <h3>Chat — Matrix System 1.0</h3>
5
+ <div id="messages" style="margin-top:14px; display:flex; flex-direction:column; gap:10px; max-height:60vh; overflow:auto;"></div>
 
 
 
 
 
 
6
  <form id="chatForm" style="display:grid; gap:12px; margin-top:14px;">
7
  <textarea id="question" rows="4" placeholder="Ask anything about Matrix EcoSystem, Guardian, or Hub..."></textarea>
8
  <div style="display:flex; gap:10px; align-items:center;">
 
24
  box-shadow: 0 4px 16px rgba(0,0,0,0.25), 0 0 0 1px rgba(0,255,156,0.05);
25
  font-family: "Share Tech Mono", monospace;
26
  }
27
+ .user { align-self: flex-end; background: #062013; color: var(--text); border-color: #0e2e1a; }
28
+ .bot { align-self: flex-start; background: #05140c; color: var(--text); border-color: #0c2416; }
29
+ .meta { font-size: 11px; opacity: .6; margin-top: 2px; }
30
+ .caret {
31
+ display: inline-block;
32
+ width: 8px; height: 1em; vertical-align: bottom;
33
+ background: var(--matrix);
34
+ margin-left: 2px;
35
+ box-shadow: 0 0 6px rgba(0,255,156,0.5);
36
+ animation: blink 1s steps(1) infinite;
37
+ }
38
+ @keyframes blink { 0%, 49% {opacity: 1;} 50%, 100% {opacity: 0;} }
39
  </style>
40
 
41
  <script>
42
+ (function () {
43
+ const KEY = 'matrix_ai_chat_history';
44
+ const messagesEl = document.getElementById('messages');
45
+ const form = document.getElementById('chatForm');
46
+ const input = document.getElementById('question');
47
+ const sendBtn = document.getElementById('sendBtn');
48
+ const clearBtn = document.getElementById('clearBtn');
49
+
50
+ // Enter to send, Shift+Enter for newline
51
+ input.addEventListener('keydown', (e) => {
52
+ if (e.key === 'Enter' && !e.shiftKey) {
53
+ e.preventDefault();
54
+ sendBtn.click();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
+ });
57
+
58
+ function loadHistory(){ try{ return JSON.parse(localStorage.getItem(KEY)||'[]'); }catch{ return []; } }
59
+ function saveHistory(h){ localStorage.setItem(KEY, JSON.stringify(h.slice(-100))); }
60
+
61
+ function block(role, text, ts){
62
+ const wrap = document.createElement('div');
63
+ wrap.style.display='flex'; wrap.style.flexDirection='column'; wrap.style.gap='2px';
64
+ const b = document.createElement('div'); b.className='bubble ' + (role==='user'?'user':'bot'); b.textContent=text;
65
+ const meta = document.createElement('div'); meta.className='meta'; meta.textContent=new Date(ts).toLocaleString();
66
+ wrap.appendChild(b); wrap.appendChild(meta);
67
+ return {wrap,bubble:b,meta};
68
+ }
69
+ function render(hist){
70
+ messagesEl.innerHTML=''; hist.forEach(m=>messagesEl.appendChild(block(m.role,m.text,m.ts).wrap));
71
+ messagesEl.scrollTop = messagesEl.scrollHeight;
72
+ }
73
+ function append(role,text){
74
+ const el = block(role,text,Date.now()); messagesEl.appendChild(el.wrap);
75
+ messagesEl.scrollTop = messagesEl.scrollHeight; return el;
76
+ }
77
+
78
+ let history = loadHistory();
79
+ if(history.length===0){
80
+ history.push({role:'bot',text:'Welcome to MATRIX-AI. Ask me about Matrix System 1.0, Guardian, or the Hub.',ts:Date.now()});
81
+ saveHistory(history);
82
+ }
83
+ render(history);
84
+
85
+ clearBtn.addEventListener('click', ()=>{
86
+ history = []; saveHistory(history); render(history);
87
+ });
88
+
89
+ form.addEventListener('submit', async (e)=>{
90
+ e.preventDefault();
91
+ const q = (input.value||'').trim(); if(!q) return;
92
+ input.value=''; sendBtn.disabled=true;
93
+
94
+ // 1) user message
95
+ history.push({role:'user',text:q,ts:Date.now()}); saveHistory(history); render(history);
96
+
97
+ // 2) live bot bubble + caret
98
+ const live = append('bot','');
99
+ const caret = document.createElement('span'); caret.className='caret'; live.bubble.appendChild(caret);
100
+
101
+ let gotChunk = false, finished = false, streamed = '';
102
+
103
+ function onChunk(delta){
104
+ gotChunk = true;
105
+ streamed += delta || '';
106
+ try { live.bubble.removeChild(caret); } catch {}
107
+ live.bubble.textContent = streamed;
108
+ live.bubble.appendChild(caret);
109
  messagesEl.scrollTop = messagesEl.scrollHeight;
110
  }
111
+ function finalize(text){
112
+ finished = true;
113
+ try { live.bubble.removeChild(caret); } catch {}
114
+ live.bubble.textContent = text;
115
+ history.push({role:'bot',text,ts:Date.now()}); saveHistory(history); render(history);
116
+ sendBtn.disabled=false;
117
  }
 
118
 
119
+ // -------- Strategy A: SSE via EventSource --------
120
+ const sseUrl = window.location.origin.replace(/\/+$/,'') + '/v1/chat/stream?ts=' + Date.now() + '&query=' + encodeURIComponent(q);
121
+ let es = null;
122
+ let fallbackTimer = null;
 
123
 
124
+ function closeES(){ try { es && es.close(); } catch {} es = null; }
 
 
 
 
 
125
 
126
+ async function doNonStream() {
127
+ try{
128
+ const r = await fetch('/v1/chat', {method:'POST', headers:{'content-type':'application/json'}, body: JSON.stringify({query:q})});
129
+ let answer = '(no answer)';
130
+ if(r.ok){ const data=await r.json(); answer = (data && (data.answer||data.response||JSON.stringify(data)))||answer; }
131
+ else { answer = `HTTP ${r.status}`; }
132
+ streamed = answer;
133
+ }catch(err){
134
+ streamed = 'Error: ' + (err && err.message ? err.message : String(err));
135
+ }finally{
136
+ finalize(streamed);
137
+ }
138
+ }
139
 
140
+ async function doFetchStream() {
141
  try {
142
+ const resp = await fetch('/v1/chat/stream', {
 
 
143
  method: 'POST',
144
+ headers: {'content-type': 'application/json'},
145
+ body: JSON.stringify({query:q})
146
  });
147
+ if (!resp.ok || !resp.body) throw new Error('stream HTTP ' + resp.status);
148
+ const reader = resp.body.getReader();
149
+ const decoder = new TextDecoder('utf-8');
150
+ let buf = '';
151
+ for (;;) {
152
+ const { value, done } = await reader.read();
153
+ if (done) break;
154
+ buf += decoder.decode(value, { stream: true });
155
+ const lines = buf.split(/\r?\n/);
156
+ buf = lines.pop() || '';
157
+ for (const line of lines) {
158
+ if (!line.startsWith('data:')) continue;
159
+ const data = line.slice(5).trim();
160
+ if (data === '[DONE]') { finalize(streamed); return; }
161
+ try {
162
+ const obj = JSON.parse(data);
163
+ if (obj.error) { finalize('Error: ' + obj.error); return; }
164
+ if ('delta' in obj) onChunk(obj.delta || '');
165
+ } catch {
166
+ onChunk(data);
167
+ }
168
+ }
169
  }
170
+ finalize(streamed);
171
  } catch (err) {
172
+ // last fallback: non-stream
173
+ await doNonStream();
 
 
 
174
  }
175
+ }
176
+
177
+ function startSSE() {
178
+ if (!window.EventSource) { doFetchStream(); return; }
179
+ es = new EventSource(sseUrl);
180
+ fallbackTimer = setTimeout(() => {
181
+ if (!gotChunk && !finished) {
182
+ closeES();
183
+ doFetchStream();
184
+ }
185
+ }, 1500);
186
+
187
+ es.onmessage = (ev) => {
188
+ if (!ev.data) return;
189
+ if (ev.data === "[DONE]") {
190
+ clearTimeout(fallbackTimer); closeES(); finalize(streamed); return;
191
+ }
192
+ try {
193
+ const obj = JSON.parse(ev.data);
194
+ if (obj.error) { clearTimeout(fallbackTimer); closeES(); finalize('Error: ' + obj.error); return; }
195
+ if ('delta' in obj) onChunk(obj.delta || '');
196
+ } catch {
197
+ onChunk(ev.data);
198
+ }
199
+ };
200
+ es.onerror = () => {
201
+ if (!gotChunk && !finished) {
202
+ clearTimeout(fallbackTimer);
203
+ closeES();
204
+ doFetchStream();
205
+ }
206
+ };
207
+ }
208
+ startSSE();
209
+ });
210
+ })();
211
  </script>
212
+ {% endblock %}