Abdalkaderdev commited on
Commit
2ce54a8
·
1 Parent(s): 56fcb88

Integrate Supertonic 2 for professional TTS

Browse files
Files changed (1) hide show
  1. app/ora_server.py +57 -1
app/ora_server.py CHANGED
@@ -4,10 +4,13 @@ from peft import PeftModel
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from fastapi.staticfiles import StaticFiles
7
- from fastapi.responses import FileResponse
8
  from pydantic import BaseModel
9
  import uvicorn
10
  import os
 
 
 
11
 
12
  # Settings
13
  BASE_MODEL = "unsloth/Llama-3.2-1B-Instruct"
@@ -107,6 +110,59 @@ async def chat_endpoint(req: ChatRequest):
107
 
108
  return {"response": response_text}
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  # Mount Static Frontend (Must be last)
111
  # Expects 'frontend/out' to exist (built via 'next build')
112
  if os.path.exists("frontend/out"):
 
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from fastapi.staticfiles import StaticFiles
7
+ from fastapi.responses import FileResponse, Response
8
  from pydantic import BaseModel
9
  import uvicorn
10
  import os
11
+ import io
12
+ import numpy as np
13
+ from scipy.io import wavfile
14
 
15
  # Settings
16
  BASE_MODEL = "unsloth/Llama-3.2-1B-Instruct"
 
110
 
111
  return {"response": response_text}
112
 
113
+ # TTS endpoint using Supertonic 2
114
+ tts_model = None
115
+ tts_processor = None
116
+
117
+ @app.on_event("startup")
118
+ async def load_tts():
119
+ global tts_model, tts_processor
120
+ try:
121
+ print("Loading Supertonic 2 TTS...")
122
+ from transformers import AutoProcessor, AutoModel
123
+ tts_processor = AutoProcessor.from_pretrained("Supertone/supertonic-2")
124
+ tts_model = AutoModel.from_pretrained("Supertone/supertonic-2")
125
+ if device == "cuda":
126
+ tts_model = tts_model.to("cuda")
127
+ print("TTS Model loaded successfully!")
128
+ except Exception as e:
129
+ print(f"Could not load TTS model: {e}")
130
+ print("Voice will not be available.")
131
+
132
+ class TTSRequest(BaseModel):
133
+ text: str
134
+
135
+ @app.post("/api/tts")
136
+ async def text_to_speech(req: TTSRequest):
137
+ global tts_model, tts_processor
138
+
139
+ if tts_model is None or tts_processor is None:
140
+ raise HTTPException(status_code=503, detail="TTS model not loaded")
141
+
142
+ try:
143
+ inputs = tts_processor(text=req.text, return_tensors="pt")
144
+ if device == "cuda":
145
+ inputs = {k: v.to("cuda") for k, v in inputs.items()}
146
+
147
+ with torch.no_grad():
148
+ audio = tts_model.generate(**inputs)
149
+
150
+ # Convert to numpy and create WAV
151
+ audio_np = audio.cpu().numpy().squeeze()
152
+
153
+ # Normalize audio
154
+ audio_np = np.int16(audio_np / np.max(np.abs(audio_np)) * 32767)
155
+
156
+ # Create WAV file in memory
157
+ wav_io = io.BytesIO()
158
+ wavfile.write(wav_io, 22050, audio_np)
159
+ wav_io.seek(0)
160
+
161
+ return Response(content=wav_io.read(), media_type="audio/wav")
162
+
163
+ except Exception as e:
164
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
165
+
166
  # Mount Static Frontend (Must be last)
167
  # Expects 'frontend/out' to exist (built via 'next build')
168
  if os.path.exists("frontend/out"):