CodeVed

Running

App Files Files Community

Vedika66 commited on about 18 hours ago

Commit

592a394

verified ·

1 Parent(s): 4319036

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -16

app.py CHANGED Viewed

@@ -3,11 +3,14 @@ import requests
 import json
 import re
 import tempfile
 import numpy as np
 from datetime import datetime, timedelta, timezone
 from bs4 import BeautifulSoup
 from flask import Flask, request, Response, stream_with_context, render_template_string, send_file
 from supertonic import TTS
 app = Flask(__name__)
@@ -36,6 +39,9 @@ LANGUAGES = {
     "Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi"
 }
 # ----------------------------------------------------
 # 📍 GPS REVERSE GEOCODING
 # ----------------------------------------------------
@@ -182,8 +188,8 @@ def chat():
         thinking_instruction = f"""
 [CRITICAL INSTRUCTION: THINKING MODE ENABLED]
 Effort Level: {thinking_effort.upper()} - {effort_text}
-You MUST format your reasoning exactly inside <think> and </think> HTML tags.
-Do NOT use special system tokens like <|channel|>thought or <|im_start|>. Use standard <think> tags.
 """
     location_instruction = ""
@@ -288,11 +294,11 @@ STRICT DIRECTIVES:
                                 if "content" in delta and delta["content"]:
                                     content = delta["content"]
-                                    # Translate Qwen/Other models' internal thinking tokens to our standard HTML <think> tags in real-time!
-                                    content = content.replace("<|channel|>thought <|channel|>", "<think>\n")
-                                    content = content.replace("<|channel|>answer <|channel|>", "\n</think>\n")
-                                    content = content.replace("<|im_start|>thought", "<think>\n")
-                                    content = content.replace("<|im_end|>", "\n</think>\n")
                                     delta["content"] = content
@@ -331,22 +337,39 @@ def generate_tts():
     try:
         lang_code = LANGUAGES.get(language_name, "en")
-        style = tts.get_voice_style(voice_name=voice)
         # Synthesize audio
         wav, duration = tts.synthesize(clean_text, voice_style=style, lang=lang_code)
-        # Save to temporary file temporarily
-        temp_path = tempfile.mktemp(suffix=".wav")
-        tts.save_audio(wav, temp_path)
-        # Read file into memory and delete the temp file immediately
-        with open(temp_path, "rb") as f:
-            audio_data = f.read()
-        os.remove(temp_path)
         # Return the actual audio file
-        return Response(audio_data, mimetype="audio/wav")
     except Exception as e:
         return Response(json.dumps({"error": str(e)}), status=500, mimetype='application/json')

 import json
 import re
 import tempfile
+import io
 import numpy as np
 from datetime import datetime, timedelta, timezone
 from bs4 import BeautifulSoup
 from flask import Flask, request, Response, stream_with_context, render_template_string, send_file
 from supertonic import TTS
+from pydub import AudioSegment
+from scipy.io import wavfile
 app = Flask(__name__)
     "Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi"
 }
+# Voice Styles Cache for faster processing
+VOICE_STYLES_CACHE = {}
 # ----------------------------------------------------
 # 📍 GPS REVERSE GEOCODING
 # ----------------------------------------------------
         thinking_instruction = f"""
 [CRITICAL INSTRUCTION: THINKING MODE ENABLED]
 Effort Level: {thinking_effort.upper()} - {effort_text}
+You MUST format your reasoning exactly inside  and  HTML tags.
+Do NOT use special system tokens like <|channel|>thought or <|im_start|>. Use standard  tags.
 """
     location_instruction = ""
                                 if "content" in delta and delta["content"]:
                                     content = delta["content"]
+                                    # Translate Qwen/Other models' internal thinking tokens to our standard HTML  tags in real-time!
+                                    content = content.replace("<|channel|>thought <|channel|>", "\n")
+                                    content = content.replace("<|channel|>answer <|channel|>", "\n\n")
+                                    content = content.replace("thought", "\n")
+                                    content = content.replace("", "\n\n")
                                     delta["content"] = content
     try:
         lang_code = LANGUAGES.get(language_name, "en")
+        # 🚀 OPTIMIZATION: Voice Style Caching
+        if voice not in VOICE_STYLES_CACHE:
+            VOICE_STYLES_CACHE[voice] = tts.get_voice_style(voice_name=voice)
+        style = VOICE_STYLES_CACHE[voice]
         # Synthesize audio
         wav, duration = tts.synthesize(clean_text, voice_style=style, lang=lang_code)
+        # 🚀 OPTIMIZATION: In-Memory Processing (No Disk I/O)
+        buffer = io.BytesIO()
+        sample_rate = 22050
+        if wav.dtype != np.int16:
+            max_val = np.max(np.abs(wav))
+            if max_val > 0:
+                wav_int16 = np.int16(wav / max_val * 32767)
+            else:
+                wav_int16 = wav.astype(np.int16)
+            wavfile.write(buffer, sample_rate, wav_int16)
+        else:
+            wavfile.write(buffer, sample_rate, wav)
+        buffer.seek(0)
+        # 🚀 OPTIMIZATION: WAV to MP3 Conversion
+        audio_segment = AudioSegment.from_wav(buffer)
+        mp3_buffer = io.BytesIO()
+        audio_segment.export(mp3_buffer, format="mp3", bitrate="128k", parameters=["-ar", "22050"])
+        mp3_buffer.seek(0)
         # Return the actual audio file
+        return Response(mp3_buffer.read(), mimetype="audio/mpeg")
     except Exception as e:
         return Response(json.dumps({"error": str(e)}), status=500, mimetype='application/json')