import http.server import socketserver import json from curl_cffi import requests from openai import OpenAI import re import cgi import urllib.parse from html import escape PORT = 7860 STT_URL = "https://multi-modal.ai.cloudflare.com/api/inference?model=@cf/deepgram/nova-3&field=audio" TTS_URL = "https://multi-modal.ai.cloudflare.com/api/inference" client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", api_key="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" ) def simple_md(text): text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) text = re.sub(r'\*(.*?)\*', r'\1', text) text = re.sub(r'`(.*?)`', r'\1', text) text = re.sub(r'\n', r'
', text) return text MAIN_HTML = """ Multi-Modal Playground

Multi-Modal Playground

STT (Nova-3)

Upload audio:

TTS (AURA-1)

Enter text:

Chat (Llama) - Single Turn

Enter message:

Voice Chat

Go to Voice Chat


Refresh Playground """ VOICECHAT_HTML = """ Voice Chat

Voice Chat

Status: Click to start recording

Back to Playground

""" STT_RESULT_HTML = """ STT Result

STT Transcription Result

{result}
Back to Playground """ TTS_RESULT_HTML = """ TTS Result

TTS Generated Audio

Back to Playground

""" CHAT_RESULT_HTML = """ Chat Result

Chat Response

You: {user_message}

Assistant: {response}

Back to Playground

""" class Handler(http.server.BaseHTTPRequestHandler): def do_GET(self): if self.path.split('?')[0] == '/': self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(MAIN_HTML.encode()) elif self.path.split('?')[0] == '/voicechat': self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(VOICECHAT_HTML.encode()) else: self.send_error(404) def do_POST(self): if self.path == '/api/stt': content_length = int(self.headers['Content-Length']) body = self.rfile.read(content_length) r = requests.post(STT_URL, data=body, impersonate="chrome") self.send_response(200) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write(r.content) return elif self.path == '/stt': form = cgi.FieldStorage( fp=self.rfile, headers=self.headers, environ={'REQUEST_METHOD': 'POST'} ) if 'audio' in form: fileitem = form['audio'] if fileitem.file: body = fileitem.file.read() r = requests.post(STT_URL, data=body, impersonate="chrome") try: result_json = json.dumps(r.json(), indent=2) except: result_json = str(r.text) result_html = STT_RESULT_HTML.format(result=escape(result_json)) self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(result_html.encode()) return self.send_error(400, "No audio file") elif self.path == '/tts': form = cgi.FieldStorage( fp=self.rfile, headers=self.headers, environ={'REQUEST_METHOD': 'POST'} ) if 'text' in form: text = form['text'].value.strip() if text: tts_payload = {"model": "@cf/myshell-ai/melotts", "params": {"prompt": text}} r = requests.post(TTS_URL, json=tts_payload, impersonate="chrome") resp_data = r.json() audio_b64 = resp_data["response"]["audio"] result_html = TTS_RESULT_HTML.format(audio_b64=escape(audio_b64)) self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(result_html.encode()) return self.send_error(400, "No text provided") elif self.path == '/chat': form = cgi.FieldStorage( fp=self.rfile, headers=self.headers, environ={'REQUEST_METHOD': 'POST'} ) if 'message' in form: user_message = form['message'].value.strip() if user_message: messages = [{"role": "user", "content": user_message}] completion = client.chat.completions.create( model="meta/llama-3.2-1b-instruct", messages=messages, temperature=0.2, top_p=0.7, max_tokens=1024, stream=False ) response_text = completion.choices[0].message.content response_html = simple_md(escape(response_text)) result_html = CHAT_RESULT_HTML.format( user_message=escape(user_message), response=response_html ) self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(result_html.encode()) return self.send_error(400, "No message provided") elif self.path == '/api/tts': content_length = int(self.headers['Content-Length']) body_str = self.rfile.read(content_length).decode('utf-8') req_data = json.loads(body_str) text = req_data['text'] tts_payload = {"model": "@cf/myshell-ai/melotts", "params": {"prompt": text}} r = requests.post(TTS_URL, json=tts_payload, impersonate="chrome") resp_data = r.json() audio_b64 = resp_data["response"]["audio"] response = {"audio": audio_b64} self.send_response(200) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write(json.dumps(response).encode()) elif self.path == '/api/chat': content_length = int(self.headers['Content-Length']) body_str = self.rfile.read(content_length).decode('utf-8') req_data = json.loads(body_str) messages = req_data['messages'] completion = client.chat.completions.create( model="meta/llama-3.2-1b-instruct", messages=messages, temperature=0.2, top_p=0.7, max_tokens=1024, stream=False ) response_text = completion.choices[0].message.content response = {"response": response_text} self.send_response(200) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write(json.dumps(response).encode()) else: self.send_error(404) with socketserver.TCPServer(("0.0.0.0", PORT), Handler) as d: print(f"Server: {PORT}") d.serve_forever()