Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,11 +3,14 @@ import requests
|
|
| 3 |
import json
|
| 4 |
import re
|
| 5 |
import tempfile
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
from datetime import datetime, timedelta, timezone
|
| 8 |
from bs4 import BeautifulSoup
|
| 9 |
from flask import Flask, request, Response, stream_with_context, render_template_string, send_file
|
| 10 |
from supertonic import TTS
|
|
|
|
|
|
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
|
|
@@ -36,6 +39,9 @@ LANGUAGES = {
|
|
| 36 |
"Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi"
|
| 37 |
}
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
# ----------------------------------------------------
|
| 40 |
# ๐ GPS REVERSE GEOCODING
|
| 41 |
# ----------------------------------------------------
|
|
@@ -182,8 +188,8 @@ def chat():
|
|
| 182 |
thinking_instruction = f"""
|
| 183 |
[CRITICAL INSTRUCTION: THINKING MODE ENABLED]
|
| 184 |
Effort Level: {thinking_effort.upper()} - {effort_text}
|
| 185 |
-
You MUST format your reasoning exactly inside
|
| 186 |
-
Do NOT use special system tokens like <|channel|>thought or <|im_start|>. Use standard
|
| 187 |
"""
|
| 188 |
|
| 189 |
location_instruction = ""
|
|
@@ -288,11 +294,11 @@ STRICT DIRECTIVES:
|
|
| 288 |
if "content" in delta and delta["content"]:
|
| 289 |
content = delta["content"]
|
| 290 |
|
| 291 |
-
# Translate Qwen/Other models' internal thinking tokens to our standard HTML
|
| 292 |
-
content = content.replace("<|channel|>thought <|channel|>", "
|
| 293 |
-
content = content.replace("<|channel|>answer <|channel|>", "\n
|
| 294 |
-
content = content.replace("
|
| 295 |
-
content = content.replace("
|
| 296 |
|
| 297 |
delta["content"] = content
|
| 298 |
|
|
@@ -331,22 +337,39 @@ def generate_tts():
|
|
| 331 |
|
| 332 |
try:
|
| 333 |
lang_code = LANGUAGES.get(language_name, "en")
|
| 334 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
# Synthesize audio
|
| 337 |
wav, duration = tts.synthesize(clean_text, voice_style=style, lang=lang_code)
|
| 338 |
|
| 339 |
-
#
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
-
#
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
| 347 |
|
| 348 |
# Return the actual audio file
|
| 349 |
-
return Response(
|
| 350 |
|
| 351 |
except Exception as e:
|
| 352 |
return Response(json.dumps({"error": str(e)}), status=500, mimetype='application/json')
|
|
|
|
| 3 |
import json
|
| 4 |
import re
|
| 5 |
import tempfile
|
| 6 |
+
import io
|
| 7 |
import numpy as np
|
| 8 |
from datetime import datetime, timedelta, timezone
|
| 9 |
from bs4 import BeautifulSoup
|
| 10 |
from flask import Flask, request, Response, stream_with_context, render_template_string, send_file
|
| 11 |
from supertonic import TTS
|
| 12 |
+
from pydub import AudioSegment
|
| 13 |
+
from scipy.io import wavfile
|
| 14 |
|
| 15 |
app = Flask(__name__)
|
| 16 |
|
|
|
|
| 39 |
"Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi"
|
| 40 |
}
|
| 41 |
|
| 42 |
+
# Voice Styles Cache for faster processing
|
| 43 |
+
VOICE_STYLES_CACHE = {}
|
| 44 |
+
|
| 45 |
# ----------------------------------------------------
|
| 46 |
# ๐ GPS REVERSE GEOCODING
|
| 47 |
# ----------------------------------------------------
|
|
|
|
| 188 |
thinking_instruction = f"""
|
| 189 |
[CRITICAL INSTRUCTION: THINKING MODE ENABLED]
|
| 190 |
Effort Level: {thinking_effort.upper()} - {effort_text}
|
| 191 |
+
You MUST format your reasoning exactly inside and HTML tags.
|
| 192 |
+
Do NOT use special system tokens like <|channel|>thought or <|im_start|>. Use standard tags.
|
| 193 |
"""
|
| 194 |
|
| 195 |
location_instruction = ""
|
|
|
|
| 294 |
if "content" in delta and delta["content"]:
|
| 295 |
content = delta["content"]
|
| 296 |
|
| 297 |
+
# Translate Qwen/Other models' internal thinking tokens to our standard HTML tags in real-time!
|
| 298 |
+
content = content.replace("<|channel|>thought <|channel|>", "\n")
|
| 299 |
+
content = content.replace("<|channel|>answer <|channel|>", "\n\n")
|
| 300 |
+
content = content.replace("thought", "\n")
|
| 301 |
+
content = content.replace("", "\n\n")
|
| 302 |
|
| 303 |
delta["content"] = content
|
| 304 |
|
|
|
|
| 337 |
|
| 338 |
try:
|
| 339 |
lang_code = LANGUAGES.get(language_name, "en")
|
| 340 |
+
|
| 341 |
+
# ๐ OPTIMIZATION: Voice Style Caching
|
| 342 |
+
if voice not in VOICE_STYLES_CACHE:
|
| 343 |
+
VOICE_STYLES_CACHE[voice] = tts.get_voice_style(voice_name=voice)
|
| 344 |
+
style = VOICE_STYLES_CACHE[voice]
|
| 345 |
|
| 346 |
# Synthesize audio
|
| 347 |
wav, duration = tts.synthesize(clean_text, voice_style=style, lang=lang_code)
|
| 348 |
|
| 349 |
+
# ๐ OPTIMIZATION: In-Memory Processing (No Disk I/O)
|
| 350 |
+
buffer = io.BytesIO()
|
| 351 |
+
sample_rate = 22050
|
| 352 |
+
|
| 353 |
+
if wav.dtype != np.int16:
|
| 354 |
+
max_val = np.max(np.abs(wav))
|
| 355 |
+
if max_val > 0:
|
| 356 |
+
wav_int16 = np.int16(wav / max_val * 32767)
|
| 357 |
+
else:
|
| 358 |
+
wav_int16 = wav.astype(np.int16)
|
| 359 |
+
wavfile.write(buffer, sample_rate, wav_int16)
|
| 360 |
+
else:
|
| 361 |
+
wavfile.write(buffer, sample_rate, wav)
|
| 362 |
+
|
| 363 |
+
buffer.seek(0)
|
| 364 |
|
| 365 |
+
# ๐ OPTIMIZATION: WAV to MP3 Conversion
|
| 366 |
+
audio_segment = AudioSegment.from_wav(buffer)
|
| 367 |
+
mp3_buffer = io.BytesIO()
|
| 368 |
+
audio_segment.export(mp3_buffer, format="mp3", bitrate="128k", parameters=["-ar", "22050"])
|
| 369 |
+
mp3_buffer.seek(0)
|
| 370 |
|
| 371 |
# Return the actual audio file
|
| 372 |
+
return Response(mp3_buffer.read(), mimetype="audio/mpeg")
|
| 373 |
|
| 374 |
except Exception as e:
|
| 375 |
return Response(json.dumps({"error": str(e)}), status=500, mimetype='application/json')
|