GamerC0der commited on
Commit
93e926b
·
verified ·
1 Parent(s): a6c6e8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -178
app.py CHANGED
@@ -4,6 +4,9 @@ import json
4
  from curl_cffi import requests
5
  from openai import OpenAI
6
  import re
 
 
 
7
 
8
  PORT = 7860
9
  STT_URL = "https://multi-modal.ai.cloudflare.com/api/inference?model=@cf/deepgram/nova-3&field=audio"
@@ -21,7 +24,7 @@ def simple_md(text):
21
  text = re.sub(r'\n', r'<br>', text)
22
  return text
23
 
24
- HTML = """
25
  <!DOCTYPE html>
26
  <html>
27
  <head>
@@ -29,188 +32,73 @@ HTML = """
29
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
30
  </head>
31
  <body>
32
- <h1>Multi-Modal Playground</h1>
33
 
34
  <h2>STT (Nova-3)</h2>
35
- <p>Upload audio:</p>
36
- <input type="file" id="audioFile" accept="audio/*">
37
- <button onclick="transcribe()">Transcribe</button>
38
- <p>Status: <span id="status">Idle</span></p>
39
- <pre id="result" style="background:#eee;padding:10px"></pre>
40
 
41
  <h2>TTS (AURA-1)</h2>
42
- <p>Enter text:</p>
43
- <input type="text" id="textInput" placeholder="Enter text to speak" style="width:100%;">
44
- <button onclick="generateAudio()">Generate Audio</button>
45
- <p>Status: <span id="statusTTS">Idle</span></p>
46
- <audio id="audioPlayer" controls style="width:100%;"></audio>
47
 
48
- <h2>Chat (Llama)</h2>
49
- <div id="messages" style="height:300px;overflow-y:scroll;border:1px solid #ccc;padding:10px;margin-bottom:10px;background:#eee;"></div>
50
- <input type="text" id="chatInput" placeholder="Type message..." style="width:70%;">
51
- <button onclick="sendMessage()">Send</button>
52
- <p>Status: <span id="statusChat">Idle</span></p>
 
53
 
54
- <h2>Voice Chat</h2>
55
- <button id="micBtn" onclick="toggleRecord()" style="font-size:48px;"><i class="fas fa-microphone"></i></button>
56
- <p>Status: <span id="statusVoice">Click to start recording</span></p>
57
- <audio id="voicePlayer" style="display:none;"></audio>
58
-
59
- <script>
60
- let chatMessages = [];
61
- let mediaRecorder;
62
- let audioChunks = [];
63
- let voiceStream;
64
-
65
- function renderMD(text) {
66
- return text.replace(/\*\*(.*?)\*\*/g, '<b>$1</b>')
67
- .replace(/\*(.*?)\*/g, '<i>$1</i>')
68
- .replace(/`(.*?)`/g, '<code>$1</code>')
69
- .replace(/\n/g, '<br>');
70
- }
71
-
72
- function addMessage(role, content) {
73
- const div = document.getElementById('messages');
74
- const msg = document.createElement('div');
75
- msg.innerHTML = `<strong>${role}:</strong> ${renderMD(content)}`;
76
- div.appendChild(msg);
77
- div.scrollTop = div.scrollHeight;
78
- }
79
-
80
- async function sendMessage() {
81
- const input = document.getElementById('chatInput');
82
- const text = input.value.trim();
83
- if (!text) return;
84
- addMessage('user', text);
85
- input.value = '';
86
- document.getElementById('statusChat').innerText = 'Thinking...';
87
- chatMessages.push({role: 'user', content: text});
88
- try {
89
- const res = await fetch('/api/chat', {
90
- method: 'POST',
91
- headers: {'Content-Type': 'application/json'},
92
- body: JSON.stringify({messages: chatMessages})
93
- });
94
- const data = await res.json();
95
- const response = data.response;
96
- addMessage('assistant', response);
97
- chatMessages.push({role: 'assistant', content: response});
98
- document.getElementById('statusChat').innerText = 'Done';
99
- } catch (e) {
100
- console.error('Chat error:', e);
101
- document.getElementById('statusChat').innerText = 'Error';
102
- }
103
- }
104
-
105
- async function toggleRecord() {
106
- const btn = document.getElementById('micBtn');
107
- if (!mediaRecorder || mediaRecorder.state === 'inactive') {
108
- try {
109
- voiceStream = await navigator.mediaDevices.getUserMedia({audio: true});
110
- mediaRecorder = new MediaRecorder(voiceStream);
111
- audioChunks = [];
112
- mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
113
- mediaRecorder.onstop = processVoice;
114
- mediaRecorder.start();
115
- btn.style.color = 'red';
116
- document.getElementById('statusVoice').innerText = 'Recording... Click to stop';
117
- } catch (e) {
118
- console.error('Mic error:', e);
119
- document.getElementById('statusVoice').innerText = 'Error accessing mic';
120
- }
121
- } else {
122
- mediaRecorder.stop();
123
- btn.style.color = 'black';
124
- document.getElementById('statusVoice').innerText = 'Processing...';
125
- }
126
- }
127
-
128
- async function processVoice() {
129
- const audioBlob = new Blob(audioChunks, {type: 'audio/webm'});
130
- if (voiceStream) {
131
- voiceStream.getTracks().forEach(track => track.stop());
132
- }
133
- document.getElementById('statusVoice').innerText = 'Transcribing...';
134
- try {
135
- const sttRes = await fetch('/api/stt', {method: 'POST', body: audioBlob});
136
- const sttData = await sttRes.json();
137
- let userText = '';
138
- if (sttData.results && sttData.results.channels && sttData.results.channels[0] &&
139
- sttData.results.channels[0].alternatives && sttData.results.channels[0].alternatives[0]) {
140
- userText = sttData.results.channels[0].alternatives[0].transcript;
141
- }
142
- if (!userText) {
143
- document.getElementById('statusVoice').innerText = 'No speech detected';
144
- return;
145
- }
146
- document.getElementById('statusVoice').innerText = 'Thinking...';
147
- const chatRes = await fetch('/api/chat', {
148
- method: 'POST',
149
- headers: {'Content-Type': 'application/json'},
150
- body: JSON.stringify({messages: [{role: 'user', content: userText}]})
151
- });
152
- const chatData = await chatRes.json();
153
- const response = chatData.response;
154
- document.getElementById('statusVoice').innerText = 'Generating speech...';
155
- const ttsRes = await fetch('/api/tts', {
156
- method: 'POST',
157
- headers: {'Content-Type': 'application/json'},
158
- body: JSON.stringify({text: response})
159
- });
160
- const ttsData = await ttsRes.json();
161
- const audioPlayer = document.getElementById('voicePlayer');
162
- audioPlayer.src = 'data:audio/webm;base64,' + ttsData.audio;
163
- audioPlayer.play();
164
- document.getElementById('statusVoice').innerText = 'Done';
165
- } catch (e) {
166
- console.error('Voice process error:', e);
167
- document.getElementById('statusVoice').innerText = 'Error';
168
- }
169
- }
170
-
171
- async function transcribe() {
172
- const file = document.getElementById('audioFile').files[0];
173
- if (!file) return;
174
- document.getElementById('status').innerText = 'Processing...';
175
- try {
176
- const res = await fetch('/api/stt', {method: 'POST', body: file});
177
- const data = await res.json();
178
- document.getElementById('result').innerText = JSON.stringify(data, null, 2);
179
- document.getElementById('status').innerText = 'Done';
180
- } catch (e) {
181
- console.error('STT error:', e);
182
- document.getElementById('status').innerText = 'Error';
183
- }
184
- }
185
-
186
- async function generateAudio() {
187
- const text = document.getElementById('textInput').value;
188
- if (!text) return;
189
- document.getElementById('statusTTS').innerText = 'Generating...';
190
- try {
191
- const res = await fetch('/api/tts', {
192
- method: 'POST',
193
- headers: {'Content-Type': 'application/json'},
194
- body: JSON.stringify({text: text})
195
- });
196
- const data = await res.json();
197
- document.getElementById('audioPlayer').src = 'data:audio/webm;base64,' + data.audio;
198
- document.getElementById('statusTTS').innerText = 'Done';
199
- } catch (e) {
200
- console.error('TTS error:', e);
201
- document.getElementById('statusTTS').innerText = 'Error';
202
- }
203
- }
204
-
205
- // Enter key for chat
206
- document.addEventListener('DOMContentLoaded', function() {
207
- document.getElementById('chatInput').addEventListener('keypress', function(e) {
208
- if (e.key === 'Enter') {
209
- sendMessage();
210
- }
211
- });
212
- });
213
- </script>
214
  </body>
215
  </html>
216
  """
@@ -221,12 +109,13 @@ class Handler(http.server.BaseHTTPRequestHandler):
221
  self.send_response(200)
222
  self.send_header("Content-type", "text/html")
223
  self.end_headers()
224
- self.wfile.write(HTML.encode())
225
  else:
226
  self.send_error(404)
227
 
228
  def do_POST(self):
229
  if self.path == '/api/stt':
 
230
  content_length = int(self.headers['Content-Length'])
231
  body = self.rfile.read(content_length)
232
  r = requests.post(STT_URL, data=body, impersonate="chrome")
@@ -234,6 +123,84 @@ class Handler(http.server.BaseHTTPRequestHandler):
234
  self.send_header("Content-type", "application/json")
235
  self.end_headers()
236
  self.wfile.write(r.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  elif self.path == '/api/tts':
238
  content_length = int(self.headers['Content-Length'])
239
  body_str = self.rfile.read(content_length).decode('utf-8')
@@ -248,6 +215,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
248
  self.send_header("Content-type", "application/json")
249
  self.end_headers()
250
  self.wfile.write(json.dumps(response).encode())
 
251
  elif self.path == '/api/chat':
252
  content_length = int(self.headers['Content-Length'])
253
  body_str = self.rfile.read(content_length).decode('utf-8')
@@ -267,6 +235,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
267
  self.send_header("Content-type", "application/json")
268
  self.end_headers()
269
  self.wfile.write(json.dumps(response).encode())
 
270
  else:
271
  self.send_error(404)
272
 
 
4
  from curl_cffi import requests
5
  from openai import OpenAI
6
  import re
7
+ import cgi
8
+ import urllib.parse
9
+ from html import escape
10
 
11
  PORT = 7860
12
  STT_URL = "https://multi-modal.ai.cloudflare.com/api/inference?model=@cf/deepgram/nova-3&field=audio"
 
24
  text = re.sub(r'\n', r'<br>', text)
25
  return text
26
 
27
+ MAIN_HTML = """
28
  <!DOCTYPE html>
29
  <html>
30
  <head>
 
32
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
33
  </head>
34
  <body>
35
+ <h1>Multi-Modal Playground (No JS)</h1>
36
 
37
  <h2>STT (Nova-3)</h2>
38
+ <form action="/stt" method="post" enctype="multipart/form-data">
39
+ <p>Upload audio:</p>
40
+ <input type="file" name="audio" accept="audio/*" required>
41
+ <button type="submit">Transcribe</button>
42
+ </form>
43
 
44
  <h2>TTS (AURA-1)</h2>
45
+ <form action="/tts" method="post">
46
+ <p>Enter text:</p>
47
+ <input type="text" name="text" placeholder="Enter text to speak" style="width:100%;" required>
48
+ <button type="submit">Generate Audio</button>
49
+ </form>
50
 
51
+ <h2>Chat (Llama) - Single Turn</h2>
52
+ <form action="/chat" method="post">
53
+ <p>Enter message:</p>
54
+ <textarea name="message" placeholder="Type your message..." style="width:100%; height:60px;" required></textarea>
55
+ <button type="submit">Send</button>
56
+ </form>
57
 
58
+ <hr>
59
+ <a href="/">Refresh Playground</a>
60
+ </body>
61
+ </html>
62
+ """
63
+
64
+ STT_RESULT_HTML = """
65
+ <!DOCTYPE html>
66
+ <html>
67
+ <head><title>STT Result</title></head>
68
+ <body>
69
+ <h1>STT Transcription Result</h1>
70
+ <pre style="background:#eee;padding:10px;white-space:pre-wrap;">{result}</pre>
71
+ <a href="/">Back to Playground</a>
72
+ </body>
73
+ </html>
74
+ """
75
+
76
+ TTS_RESULT_HTML = """
77
+ <!DOCTYPE html>
78
+ <html>
79
+ <head><title>TTS Result</title></head>
80
+ <body>
81
+ <h1>TTS Generated Audio</h1>
82
+ <audio controls style="width:100%;">
83
+ <source src="data:audio/webm;base64,{audio_b64}" type="audio/webm">
84
+ Your browser does not support the audio element.
85
+ </audio>
86
+ <p><a href="/">Back to Playground</a></p>
87
+ </body>
88
+ </html>
89
+ """
90
+
91
+ CHAT_RESULT_HTML = """
92
+ <!DOCTYPE html>
93
+ <html>
94
+ <head><title>Chat Result</title></head>
95
+ <body>
96
+ <h1>Chat Response</h1>
97
+ <div style="border:1px solid #ccc;padding:10px;margin-bottom:10px;background:#eee;">
98
+ <strong>You:</strong> {user_message}<br><br>
99
+ <strong>Assistant:</strong> {response}
100
+ </div>
101
+ <p><a href="/">Back to Playground</a></p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  </body>
103
  </html>
104
  """
 
109
  self.send_response(200)
110
  self.send_header("Content-type", "text/html")
111
  self.end_headers()
112
+ self.wfile.write(MAIN_HTML.encode())
113
  else:
114
  self.send_error(404)
115
 
116
  def do_POST(self):
117
  if self.path == '/api/stt':
118
+ # Keep old API endpoint for compatibility (though not used in JS-less)
119
  content_length = int(self.headers['Content-Length'])
120
  body = self.rfile.read(content_length)
121
  r = requests.post(STT_URL, data=body, impersonate="chrome")
 
123
  self.send_header("Content-type", "application/json")
124
  self.end_headers()
125
  self.wfile.write(r.content)
126
+ return
127
+
128
+ elif self.path == '/stt':
129
+ form = cgi.FieldStorage(
130
+ fp=self.rfile,
131
+ headers=self.headers,
132
+ environ={'REQUEST_METHOD': 'POST'}
133
+ )
134
+ if 'audio' in form:
135
+ fileitem = form['audio']
136
+ if fileitem.file:
137
+ body = fileitem.file.read()
138
+ r = requests.post(STT_URL, data=body, impersonate="chrome")
139
+ try:
140
+ result_json = json.dumps(r.json(), indent=2)
141
+ except:
142
+ result_json = str(r.text)
143
+ result_html = STT_RESULT_HTML.format(result=escape(result_json))
144
+ self.send_response(200)
145
+ self.send_header("Content-type", "text/html")
146
+ self.end_headers()
147
+ self.wfile.write(result_html.encode())
148
+ return
149
+ self.send_error(400, "No audio file")
150
+
151
+ elif self.path == '/tts':
152
+ form = cgi.FieldStorage(
153
+ fp=self.rfile,
154
+ headers=self.headers,
155
+ environ={'REQUEST_METHOD': 'POST'}
156
+ )
157
+ if 'text' in form:
158
+ text = form['text'].value.strip()
159
+ if text:
160
+ tts_payload = {"model": "@cf/myshell-ai/melotts", "params": {"prompt": text}}
161
+ r = requests.post(TTS_URL, json=tts_payload, impersonate="chrome")
162
+ resp_data = r.json()
163
+ audio_b64 = resp_data["response"]["audio"]
164
+ result_html = TTS_RESULT_HTML.format(audio_b64=escape(audio_b64))
165
+ self.send_response(200)
166
+ self.send_header("Content-type", "text/html")
167
+ self.end_headers()
168
+ self.wfile.write(result_html.encode())
169
+ return
170
+ self.send_error(400, "No text provided")
171
+
172
+ elif self.path == '/chat':
173
+ form = cgi.FieldStorage(
174
+ fp=self.rfile,
175
+ headers=self.headers,
176
+ environ={'REQUEST_METHOD': 'POST'}
177
+ )
178
+ if 'message' in form:
179
+ user_message = form['message'].value.strip()
180
+ if user_message:
181
+ messages = [{"role": "user", "content": user_message}]
182
+ completion = client.chat.completions.create(
183
+ model="meta/llama-3.2-1b-instruct",
184
+ messages=messages,
185
+ temperature=0.2,
186
+ top_p=0.7,
187
+ max_tokens=1024,
188
+ stream=False
189
+ )
190
+ response_text = completion.choices[0].message.content
191
+ response_html = simple_md(escape(response_text))
192
+ result_html = CHAT_RESULT_HTML.format(
193
+ user_message=escape(user_message),
194
+ response=response_html
195
+ )
196
+ self.send_response(200)
197
+ self.send_header("Content-type", "text/html")
198
+ self.end_headers()
199
+ self.wfile.write(result_html.encode())
200
+ return
201
+ self.send_error(400, "No message provided")
202
+
203
+ # Keep old /api/tts and /api/chat for compatibility (though not used)
204
  elif self.path == '/api/tts':
205
  content_length = int(self.headers['Content-Length'])
206
  body_str = self.rfile.read(content_length).decode('utf-8')
 
215
  self.send_header("Content-type", "application/json")
216
  self.end_headers()
217
  self.wfile.write(json.dumps(response).encode())
218
+
219
  elif self.path == '/api/chat':
220
  content_length = int(self.headers['Content-Length'])
221
  body_str = self.rfile.read(content_length).decode('utf-8')
 
235
  self.send_header("Content-type", "application/json")
236
  self.end_headers()
237
  self.wfile.write(json.dumps(response).encode())
238
+
239
  else:
240
  self.send_error(404)
241