Spaces:

RSHVR
/

Command_RTC

Sleeping

App Files Files Community

RSHVR commited on Mar 30, 2025

Commit

f3c69f5

verified ·

1 Parent(s): 9311026

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -73

app.py CHANGED Viewed

@@ -4,13 +4,10 @@ import gradio as gr
 import torch
 import torchaudio
 import spaces
-from fastapi import FastAPI, File, UploadFile, Form
-from fastapi.responses import FileResponse
 from tortoise.api import TextToSpeech
 from tortoise.utils.audio import load_audio
 import numpy as np
-import uvicorn
-from typing import Optional
 import uuid
 from pydub import AudioSegment
@@ -27,10 +24,7 @@ if torch.cuda.is_available():
     zero = zero.cuda()
     print(f"Zero tensor device: {zero.device}")
-# Initialize FastAPI
-app = FastAPI(title="Tortoise TTS API")
-# Initialize TTS (will be loaded on demand with Zero-GPU)
 tts = None
 # Available preset voice options
@@ -138,54 +132,6 @@ def tts_interface(text, audio_file, preset_voice, record_audio):
     else:
         return None, message
-# FastAPI endpoints
-@app.post("/api/tts_with_voice_file/")
-@spaces.GPU
-async def tts_with_voice_file(
-    text: str = Form(...),
-    voice_file: Optional[UploadFile] = File(None),
-    preset_voice: Optional[str] = Form("random")
-):
-    """API endpoint for TTS with an uploaded voice file"""
-    try:
-        print(f"Processing with device: {zero.device}")
-        voice_sample_path = None
-        if voice_file:
-            # Save uploaded file temporarily
-            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(voice_file.filename)[1])
-            temp_file.write(await voice_file.read())
-            temp_file.close()
-            voice_sample_path = temp_file.name
-        output_path, message = generate_tts_with_voice(text, voice_sample_path, preset_voice)
-        if output_path:
-            return FileResponse(output_path, media_type="audio/wav", filename="tts_output.wav")
-        else:
-            return {"status": "error", "message": message}
-    except Exception as e:
-        return {"status": "error", "message": f"Failed to process: {str(e)}"}
-@app.post("/api/tts_with_preset/")
-@spaces.GPU
-async def tts_with_preset(
-    text: str = Form(...),
-    preset_voice: str = Form("random")
-):
-    """API endpoint for TTS with a preset voice"""
-    try:
-        print(f"Processing with device: {zero.device}")
-        output_path, message = generate_tts_with_voice(text, preset_voice=preset_voice)
-        if output_path:
-            return FileResponse(output_path, media_type="audio/wav", filename="tts_output.wav")
-        else:
-            return {"status": "error", "message": message}
-    except Exception as e:
-        return {"status": "error", "message": f"Failed to process: {str(e)}"}
 # Create Gradio interface
 with gr.Blocks(title="Tortoise TTS with Voice Cloning") as demo:
     gr.Markdown("# Tortoise Text-to-Speech with Voice Cloning")
@@ -229,26 +175,17 @@ with gr.Blocks(title="Tortoise TTS with Voice Cloning") as demo:
         outputs=[output_audio, output_message]
     )
-    gr.Markdown("### API Endpoints")
     gr.Markdown("""
-    This app also provides API endpoints:
-    1. **Voice File TTS** - `/api/tts_with_voice_file/`
-       - POST request with:
-         - `text`: Text to convert to speech (required)
-         - `voice_file`: Audio file for voice cloning (optional)
-         - `preset_voice`: Name of preset voice (optional, defaults to "random")
-    2. **Preset Voice TTS** - `/api/tts_with_preset/`
-       - POST request with:
-         - `text`: Text to convert to speech (required)
-         - `preset_voice`: Name of preset voice (required)
-    Both endpoints return a WAV file with the generated speech.
     """)
-# Mount the Gradio app to FastAPI
-app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import torch
 import torchaudio
 import spaces
+from huggingface_hub import snapshot_download
 from tortoise.api import TextToSpeech
 from tortoise.utils.audio import load_audio
 import numpy as np
 import uuid
 from pydub import AudioSegment
     zero = zero.cuda()
     print(f"Zero tensor device: {zero.device}")
+# Initialize Tortoise TTS (will be loaded on demand with Zero-GPU)
 tts = None
 # Available preset voice options
     else:
         return None, message
 # Create Gradio interface
 with gr.Blocks(title="Tortoise TTS with Voice Cloning") as demo:
     gr.Markdown("# Tortoise Text-to-Speech with Voice Cloning")
         outputs=[output_audio, output_message]
     )
+    gr.Markdown("### About This App")
     gr.Markdown("""
+    This app uses Tortoise-TTS to generate high-quality speech from text.
+    You can:
+    - Enter any text you want to be spoken
+    - Upload or record a voice sample for voice cloning
+    - Or select from pre-defined voice presets
+    The app runs on Hugging Face Spaces with Zero-GPU optimization.
     """)
 if __name__ == "__main__":
+    demo.launch()