import http.server
import socketserver
import json
from curl_cffi import requests
from openai import OpenAI
import re
import cgi
import urllib.parse
from html import escape
PORT = 7860
STT_URL = "https://multi-modal.ai.cloudflare.com/api/inference?model=@cf/deepgram/nova-3&field=audio"
TTS_URL = "https://multi-modal.ai.cloudflare.com/api/inference"
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key="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"
)
def simple_md(text):
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
text = re.sub(r'\*(.*?)\*', r'\1', text)
text = re.sub(r'`(.*?)`', r'\1', text)
text = re.sub(r'\n', r'
', text)
return text
MAIN_HTML = """
Multi-Modal Playground
Multi-Modal Playground
STT (Nova-3)
TTS (AURA-1)
Chat (Llama) - Single Turn
Voice Chat
Go to Voice Chat
Refresh Playground
"""
VOICECHAT_HTML = """
Voice Chat
Voice Chat
Status: Click to start recording
Back to Playground
"""
STT_RESULT_HTML = """
STT Result
STT Transcription Result
{result}
Back to Playground
"""
TTS_RESULT_HTML = """
TTS Result
TTS Generated Audio
Back to Playground
"""
CHAT_RESULT_HTML = """
Chat Result
Chat Response
You: {user_message}
Assistant: {response}
Back to Playground
"""
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path.split('?')[0] == '/':
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(MAIN_HTML.encode())
elif self.path.split('?')[0] == '/voicechat':
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(VOICECHAT_HTML.encode())
else:
self.send_error(404)
def do_POST(self):
if self.path == '/api/stt':
content_length = int(self.headers['Content-Length'])
body = self.rfile.read(content_length)
r = requests.post(STT_URL, data=body, impersonate="chrome")
self.send_response(200)
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(r.content)
return
elif self.path == '/stt':
form = cgi.FieldStorage(
fp=self.rfile,
headers=self.headers,
environ={'REQUEST_METHOD': 'POST'}
)
if 'audio' in form:
fileitem = form['audio']
if fileitem.file:
body = fileitem.file.read()
r = requests.post(STT_URL, data=body, impersonate="chrome")
try:
result_json = json.dumps(r.json(), indent=2)
except:
result_json = str(r.text)
result_html = STT_RESULT_HTML.format(result=escape(result_json))
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(result_html.encode())
return
self.send_error(400, "No audio file")
elif self.path == '/tts':
form = cgi.FieldStorage(
fp=self.rfile,
headers=self.headers,
environ={'REQUEST_METHOD': 'POST'}
)
if 'text' in form:
text = form['text'].value.strip()
if text:
tts_payload = {"model": "@cf/myshell-ai/melotts", "params": {"prompt": text}}
r = requests.post(TTS_URL, json=tts_payload, impersonate="chrome")
resp_data = r.json()
audio_b64 = resp_data["response"]["audio"]
result_html = TTS_RESULT_HTML.format(audio_b64=escape(audio_b64))
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(result_html.encode())
return
self.send_error(400, "No text provided")
elif self.path == '/chat':
form = cgi.FieldStorage(
fp=self.rfile,
headers=self.headers,
environ={'REQUEST_METHOD': 'POST'}
)
if 'message' in form:
user_message = form['message'].value.strip()
if user_message:
messages = [{"role": "user", "content": user_message}]
completion = client.chat.completions.create(
model="meta/llama-3.2-1b-instruct",
messages=messages,
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=False
)
response_text = completion.choices[0].message.content
response_html = simple_md(escape(response_text))
result_html = CHAT_RESULT_HTML.format(
user_message=escape(user_message),
response=response_html
)
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(result_html.encode())
return
self.send_error(400, "No message provided")
elif self.path == '/api/tts':
content_length = int(self.headers['Content-Length'])
body_str = self.rfile.read(content_length).decode('utf-8')
req_data = json.loads(body_str)
text = req_data['text']
tts_payload = {"model": "@cf/myshell-ai/melotts", "params": {"prompt": text}}
r = requests.post(TTS_URL, json=tts_payload, impersonate="chrome")
resp_data = r.json()
audio_b64 = resp_data["response"]["audio"]
response = {"audio": audio_b64}
self.send_response(200)
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(response).encode())
elif self.path == '/api/chat':
content_length = int(self.headers['Content-Length'])
body_str = self.rfile.read(content_length).decode('utf-8')
req_data = json.loads(body_str)
messages = req_data['messages']
completion = client.chat.completions.create(
model="meta/llama-3.2-1b-instruct",
messages=messages,
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=False
)
response_text = completion.choices[0].message.content
response = {"response": response_text}
self.send_response(200)
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(response).encode())
else:
self.send_error(404)
with socketserver.TCPServer(("0.0.0.0", PORT), Handler) as d:
print(f"Server: {PORT}")
d.serve_forever()