Spaces:

akshatOP
/

nuera

Runtime error

App Files Files Community

akshatOP commited on Feb 28, 2025

Commit

81875a2

1 Parent(s): fa48fc0

Switch to facebook/tts_transformer-en-ljspeech for TTS

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from fastapi import FastAPI, File, UploadFile, Response
-from transformers import ParlerTTSForConditionalGeneration, AutoTokenizer
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from llama_cpp import Llama
 import torch
@@ -7,31 +6,32 @@ import soundfile as sf
 import io
 import os
 from pydantic import BaseModel
 app = FastAPI()
-# Load models
 if os.path.exists("./models/tts_model"):
-    tts_model = ParlerTTSForConditionalGeneration.from_pretrained("./models/tts_model")
     tts_tokenizer = AutoTokenizer.from_pretrained("./models/tts_model")
 else:
-    tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1")
-    tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")
-# SST and LLM loading remains unchanged
 if os.path.exists("./models/sst_model"):
     sst_model = Wav2Vec2ForCTC.from_pretrained("./models/sst_model")
     sst_processor = Wav2Vec2Processor.from_pretrained("./models/sst_model")
 else:
     sst_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
-    sst_processor = Wav2Vec2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
 if os.path.exists("./models/llama.gguf"):
     llm = Llama("./models/llama.gguf")
 else:
     raise FileNotFoundError("Please upload llama.gguf to models/ directory")
-# Request models and endpoints remain unchanged
 class TTSRequest(BaseModel):
     text: str
@@ -50,6 +50,7 @@ async def tts_endpoint(request: TTSRequest):
     buffer.seek(0)
     return Response(content=buffer.getvalue(), media_type="audio/wav")
 @app.post("/sst")
 async def sst_endpoint(file: UploadFile = File(...)):
     audio_bytes = await file.read()

+from transformers import AutoModelForSpeechSeq2Seq, AutoTokenizer
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from llama_cpp import Llama
 import torch
 import io
 import os
 from pydantic import BaseModel
+from fastapi import FastAPI, File, UploadFile, Response
 app = FastAPI()
+# Load TTS model
 if os.path.exists("./models/tts_model"):
+    tts_model = AutoModelForSpeechSeq2Seq.from_pretrained("./models/tts_model")
     tts_tokenizer = AutoTokenizer.from_pretrained("./models/tts_model")
 else:
+    tts_model = AutoModelForSpeechSeq2Seq.from_pretrained("facebook/tts_transformer-en-ljspeech")
+    tts_tokenizer = AutoTokenizer.from_pretrained("facebook/tts_transformer-en-ljspeech")
+# Load SST model
 if os.path.exists("./models/sst_model"):
     sst_model = Wav2Vec2ForCTC.from_pretrained("./models/sst_model")
     sst_processor = Wav2Vec2Processor.from_pretrained("./models/sst_model")
 else:
     sst_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+    sst_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+# Load LLM model
 if os.path.exists("./models/llama.gguf"):
     llm = Llama("./models/llama.gguf")
 else:
     raise FileNotFoundError("Please upload llama.gguf to models/ directory")
+# Request models (unchanged)
 class TTSRequest(BaseModel):
     text: str
     buffer.seek(0)
     return Response(content=buffer.getvalue(), media_type="audio/wav")
+# SST and LLM endpoints remain unchanged
 @app.post("/sst")
 async def sst_endpoint(file: UploadFile = File(...)):
     audio_bytes = await file.read()