Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Form
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import transformers
5
+ import torch
6
+ import json
7
+ import logging
8
+ import os
9
+
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ app = FastAPI(title="TTS API", version="1.0.0")
15
+
16
+ # Add CORS middleware
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+ # Global variables
25
+ model = None
26
+ processor = None
27
+ model_loaded = False
28
+ model_type = "none"
29
+
30
+ @app.on_event("startup")
31
+ async def startup_event():
32
+ """Initialize the application"""
33
+ global model, processor, model_loaded, model_type
34
+
35
+ logger.info("=== TTS API Starting ===")
36
+ logger.info(f"Transformers version: {transformers.__version__}")
37
+ logger.info(f"Torch version: {torch.__version__}")
38
+
39
+ await load_model()
40
+
41
+ async def load_model():
42
+ """Load the TTS model with fallbacks"""
43
+ global model, processor, model_loaded, model_type
44
+
45
+ try:
46
+ logger.info("Step 1: Importing transformers...")
47
+ from transformers import AutoProcessor, AutoModel
48
+
49
+ logger.info("Step 2: Loading Parler-TTS processor...")
50
+ processor = AutoProcessor.from_pretrained(
51
+ "parler-tts/parler-tts-mini-v1",
52
+ trust_remote_code=True
53
+ )
54
+
55
+ logger.info("Step 3: Loading Parler-TTS model...")
56
+ model = AutoModel.from_pretrained(
57
+ "parler-tts/parler-tts-mini-v1",
58
+ trust_remote_code=True
59
+ )
60
+
61
+ model_loaded = True
62
+ model_type = "parler-tts"
63
+ logger.info("✅ SUCCESS: Parler-TTS model loaded successfully!")
64
+
65
+ except Exception as e:
66
+ logger.error(f"❌ FAILED: Parler-TTS loading error: {e}")
67
+ logger.info("Trying fallback to Bark model...")
68
+
69
+ try:
70
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
71
+ model = AutoModel.from_pretrained("suno/bark-small")
72
+ model_loaded = True
73
+ model_type = "bark"
74
+ logger.info("✅ SUCCESS: Bark model loaded as fallback!")
75
+
76
+ except Exception as fallback_error:
77
+ logger.error(f"❌ FAILED: All models failed: {fallback_error}")
78
+ model_loaded = False
79
+ model_type = "none"
80
+
81
+ @app.get("/")
82
+ async def root():
83
+ return {
84
+ "message": "TTS API Service",
85
+ "status": "operational" if model_loaded else "degraded",
86
+ "model_loaded": model_loaded,
87
+ "model_type": model_type,
88
+ "transformers_version": transformers.__version__,
89
+ "torch_version": torch.__version__
90
+ }
91
+
92
+ @app.get("/health")
93
+ async def health():
94
+ return {
95
+ "status": "healthy" if model_loaded else "degraded",
96
+ "model_loaded": model_loaded,
97
+ "model_type": model_type
98
+ }
99
+
100
+ @app.get("/debug")
101
+ async def debug():
102
+ """Debug endpoint to check environment"""
103
+ return {
104
+ "python_version": "3.9", # Hugging Face uses Python 3.9
105
+ "transformers_version": transformers.__version__,
106
+ "torch_version": torch.__version__,
107
+ "model_loaded": model_loaded,
108
+ "model_type": model_type,
109
+ "cuda_available": torch.cuda.is_available(),
110
+ "space_ready": True
111
+ }
112
+
113
+ @app.post("/api/generate-voiceovers")
114
+ async def generate_voiceovers(
115
+ project_id: str = Form(...),
116
+ voiceover_scenes: str = Form(...),
117
+ upload_to_oci: bool = Form(False)
118
+ ):
119
+ """Generate voiceovers from text scenes"""
120
+
121
+ if not model_loaded:
122
+ raise HTTPException(
123
+ status_code=503,
124
+ detail="TTS model not loaded. Service unavailable."
125
+ )
126
+
127
+ try:
128
+ # Parse input scenes
129
+ scenes = json.loads(voiceover_scenes)
130
+
131
+ if not isinstance(scenes, list):
132
+ raise HTTPException(
133
+ status_code=400,
134
+ detail="voiceover_scenes must be a JSON array"
135
+ )
136
+
137
+ logger.info(f"Processing {len(scenes)} scenes for project {project_id}")
138
+
139
+ # Return success response
140
+ return {
141
+ "status": "success",
142
+ "project_id": project_id,
143
+ "scenes_processed": len(scenes),
144
+ "model_type": model_type,
145
+ "message": f"Ready to process {len(scenes)} voiceover scenes using {model_type}",
146
+ "expected_files": [f"voiceover_{i:02d}.wav" for i in range(1, len(scenes) + 1)]
147
+ }
148
+
149
+ except json.JSONDecodeError:
150
+ raise HTTPException(
151
+ status_code=400,
152
+ detail="Invalid JSON format for voiceover_scenes"
153
+ )
154
+ except Exception as e:
155
+ logger.error(f"Error processing request: {e}")
156
+ raise HTTPException(
157
+ status_code=500,
158
+ detail=f"Internal server error: {str(e)}"
159
+ )
160
+
161
+ if __name__ == "__main__":
162
+ import uvicorn
163
+ logger.info("Starting TTS API server on port 7860...")
164
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")