Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 24

Commit

6c66ab2

verified ·

1 Parent(s): 59f9520

Update app.py

Browse files

Files changed (1) hide show

app.py +444 -223

app.py CHANGED Viewed

@@ -1,139 +1,22 @@
-from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
-from typing import List
 from pathlib import Path
-import os
-import uuid
-import asyncio
-import time
-import httpx
-from supabase import create_client, Client
-import torchaudio
-import torch
-from TTS.tts.models.xtts import Xtts
-from TTS.tts.configs.xtts_config import XttsConfig
-from huggingface_hub import hf_hub_download
-from pydub import AudioSegment
-import subprocess
-# -----------------------------
-# Paths & Device
-# -----------------------------
 OUTPUT_DIR = "outputs"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
-CACHE_DIR = "disk cache"
-os.makedirs(CACHE_DIR, exist_ok=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# -----------------------------
-# Supabase client
-# -----------------------------
-SUPABASE_URL = "https://kvlxvhdgacktsgykyckm.supabase.co/"
-SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imt2bHh2aGRnYWNrdHNneWt5Y2ttIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc3MTk2MTQ5MSwiZXhwIjoyMDg3NTM3NDkxfQ.tzfHcbzwzctHDDDp3vk4JGz30ajN2szncAV-1wK7_pM"
-supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
-# -----------------------------
-# Download cache (memory)
-# -----------------------------
-download_cache = {}  # URL -> local path
-# -----------------------------
-# Helper to get cached file (downloads if missing)
-# -----------------------------
-async def get_cached_file(url: str, subfolder: str, skip_on_startup: bool = False) -> str | None:
-    """
-    Returns local cached path for URL.
-    Downloads and stores in subfolder if missing.
-    skip_on_startup: if True, don't raise errors even for prosody/bg_music (used during startup).
-    """
-    if url in download_cache:
-        return download_cache[url]
-    folder_path = os.path.join(CACHE_DIR, subfolder)
-    os.makedirs(folder_path, exist_ok=True)
-    local_path = os.path.join(folder_path, os.path.basename(url))
-    if os.path.exists(local_path):
-        download_cache[url] = local_path
-        print(f"Found on disk, added to cache: {local_path}")
-        return local_path
-    try:
-        async with httpx.AsyncClient(timeout=60) as client:
-            resp = await client.get(url)
-            resp.raise_for_status()
-            with open(local_path, "wb") as f:
-                f.write(resp.content)
-        download_cache[url] = local_path
-        print(f"Downloaded and cached: {url} → {local_path}")
-        return local_path
-    except Exception as e:
-        if subfolder == "sfx" or skip_on_startup:
-            print(f"Warning: Failed to download {subfolder} file {url}, skipping: {e}")
-            return None
-        else:
-            # Raise error if essential and not in startup
-            raise RuntimeError(f"Failed to download {subfolder} file {url}: {e}") from e
-async def download_to_cache(url: str, subfolder: str):
-    await get_cached_file(url, subfolder)
-# -----------------------------
-# Preload all assets from Supabase at startup
-# -----------------------------
-def list_all_files_recursive(bucket_name: str, path=""):
-    files_list = []
-    try:
-        response = supabase.storage.from_(bucket_name).list(path=path)
-    except Exception as e:
-        print(f"Warning listing {bucket_name}/{path}: {e}")
-        return files_list
-    for f in response:
-        name = f["name"]
-        full_path = f"{path}/{name}" if path else name
-        # If metadata is None → it's a folder
-        if f.get("metadata") is None:
-            files_list.extend(list_all_files_recursive(bucket_name, full_path))
-        else:
-            url = supabase.storage.from_(bucket_name).get_public_url(full_path)
-            files_list.append((full_path, url))
-    return files_list
-async def preload_all_assets():
-    print("Starting Supabase asset preloading...")
-    tasks = []
-    buckets = {
-        "voice-actor-files": "prosody",
-        "bg-music": "bg_music",
-        "location-audio-files": "sfx"
-    }
-    for bucket_name, subfolder in buckets.items():
-        files = list_all_files_recursive(bucket_name)
-        for _, url in files:
-            # Schedule download; failures for sfx are ignored, prosody/bg_music raise on actual fetch
-            tasks.append(download_to_cache(url, subfolder))
-    if tasks:
-        await asyncio.gather(*tasks)
-    print(f"Preloading completed. {len(download_cache)} files cached on disk.")
-import asyncio
-asyncio.run(preload_all_assets())
-# -----------------------------
-# TTS Model (XTTS)
-# -----------------------------
 MODEL_DIR = "my_model"
 config_path = hf_hub_download(
@@ -154,45 +37,30 @@ model_path = hf_hub_download(
     cache_dir=MODEL_DIR
 )
 config = XttsConfig()
 config.load_json(config_path)
 model = Xtts.init_from_config(config)
 model.load_checkpoint(
     config,
-    checkpoint_dir=os.path.dirname(model_path),
     use_deepspeed=False,
-    vocab_path=vocab_path
 )
 model.to(device)
-# -----------------------------
-# Enums mapping for TTS tags
-# -----------------------------
-intensity_map = {"LOW": "low", "MEDIUM": "mid", "HIGH": "high"}
-emotion_map = {
-    "HAPPINESS": "happiness",
-    "SADNESS": "sadness",
-    "FEAR": "fear",
-    "ANGER": "anger",
-    "SURPRISE": "surprise",
-    "WHISPER": "whisper",
-    "NARRATION": "narration"
-}
-def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: str) -> str:
-    emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
-    int_tag = f"<int_{intensity_map[intensity_enum]}>"
-    return f"{emo_tag} {int_tag} {text}"
-# -----------------------------
-# DTO Models
-# -----------------------------
 class BGMusicDto(BaseModel):
     musicPath: str
     emotion: str
     volume: float
 class SentenceDto(BaseModel):
     speaker: str
     sentenceId: str
@@ -204,7 +72,7 @@ class SentenceDto(BaseModel):
 class LocationDto(BaseModel):
     locationName: str
     path: str
 class SceneDto(BaseModel):
     sceneId: str
     location: LocationDto
@@ -216,26 +84,114 @@ class ChapterDto(BaseModel):
     title: SentenceDto
     scenes: List[SceneDto]
 class CastDto(BaseModel):
     name: str
     gender: str
     isAdult: bool
     voiceReference: str
 class StoryCreationDTO(BaseModel):
     storyId: str
     chapters: List[ChapterDto]
     cast: List[CastDto]
-class TTSResponse(BaseModel):
-    fileName: str
-    duration: float
-    audioPath: str
-# -----------------------------
-# TTS Inference
-# -----------------------------
-def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
     gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
     out = model.inference(
         text=text,
@@ -248,14 +204,17 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
         repetition_penalty=model.config.repetition_penalty,
         top_p=model.config.top_p,
     )
     os.makedirs(os.path.dirname(save_path), exist_ok=True)
     torchaudio.save(save_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
     return save_path
-# -----------------------------
-# Generate story audios
-# -----------------------------
 async def generate_story_audios(story: StoryCreationDTO, base_output: str):
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
@@ -263,124 +222,337 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str):
         chapter_dir = story_dir / chapter.chapterId
         chapter_dir.mkdir(exist_ok=True)
-        prosody_file_title = await get_cached_file(chapter.title.prosodyReference, "prosody")
-        if prosody_file_title is None:
-            raise RuntimeError(f"Missing prosody file for chapter title: {chapter.title.prosodyReference}")
         title_save_path = chapter_dir / "title.wav"
         tagged_text_title = generate_tagged_text(
-            chapter.title.sentence,
-            chapter.title.emotion,
-            chapter.title.intensity
         )
-        inference_by_model(tagged_text_title, prosody_file_title, str(title_save_path))
         for scene in chapter.scenes:
             scene_dir = chapter_dir / scene.sceneId
             scene_dir.mkdir(exist_ok=True)
             for sentence in scene.sentences:
-                prosody_file = await get_cached_file(sentence.prosodyReference, "prosody")
-                if prosody_file is None:
-                    raise RuntimeError(f"Missing prosody file for sentence: {sentence.sentenceId}")
                 sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
                 tagged_text = generate_tagged_text(
-                    sentence.sentence,
-                    sentence.emotion,
-                    sentence.intensity
                 )
-                inference_by_model(tagged_text, prosody_file, str(sentence_save_path))
-# -----------------------------
-# Concatenate audio
-# -----------------------------
 def ensure_wav(file_path: str) -> str:
     ext = os.path.splitext(file_path)[1].lower()
     if ext == ".wav":
-        return file_path
     wav_path = os.path.splitext(file_path)[0] + ".wav"
     subprocess.run(["ffmpeg", "-y", "-i", file_path, wav_path], check=True)
     return wav_path
-async def concat_story_audio(story: StoryCreationDTO, base_output: str, final_path: str = None):
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
     if final_path is None:
         final_path = story_dir / f"{story.storyId}_full.wav"
     else:
         final_path = Path(final_path)
-        final_path.parent.mkdir(parents=True, exist_ok=True)
-    chapters_audio = AudioSegment.silent(duration=0)
     for chapter in story.chapters:
         chapter_dir = story_dir / chapter.chapterId
-        chapter_audio = AudioSegment.from_wav(chapter_dir / "title.wav")
         for scene in chapter.scenes:
             scene_dir = chapter_dir / scene.sceneId
             scene_audio = AudioSegment.silent(duration=0)
             for sentence in scene.sentences:
                 sentence_path = scene_dir / f"{sentence.sentenceId}.wav"
-                scene_audio += AudioSegment.from_wav(sentence_path)
             if scene.location.path:
-                sfx_file = await get_cached_file(scene.location.path, "sfx")
                 if sfx_file:
                     sfx_file_wav = ensure_wav(sfx_file)
-                    scene_audio = scene_audio.overlay(AudioSegment.from_wav(sfx_file_wav))
             if scene.bgMusic and scene.bgMusic.musicPath:
-                bg_file = await get_cached_file(scene.bgMusic.musicPath, "bg_music")
-                if bg_file is None:
-                    raise RuntimeError(f"Missing background music file: {scene.bgMusic.musicPath}")
                 bg_file_wav = ensure_wav(bg_file)
                 bg_audio = AudioSegment.from_file(bg_file_wav)
-                bg_audio = bg_audio - (1 - scene.bgMusic.volume) * 30
                 if len(bg_audio) < len(scene_audio):
-                    bg_audio = bg_audio * ((len(scene_audio) // len(bg_audio)) + 1)
-                bg_audio = bg_audio[:len(scene_audio)]
                 scene_audio = scene_audio.overlay(bg_audio)
             scene_audio += AudioSegment.silent(duration=2000)
             chapter_audio += scene_audio
         chapter_audio += AudioSegment.silent(duration=3000)
         chapters_audio += chapter_audio
     chapters_audio.export(final_path, format="wav")
     return final_path
-# -----------------------------
-# FastAPI app & tasks
-# -----------------------------
 app = FastAPI(title="EGTTS Arabic TTS API")
 tasks = {}
 async def run_tts_pipeline(task_id: str, story: StoryCreationDTO):
-    start_time = time.time()
     try:
-        print(f"Starting story: {story.storyId}")
-        await generate_story_audios(story, OUTPUT_DIR)
-        final_wav_path = Path(OUTPUT_DIR) / story.storyId / f"{story.storyId}_full.wav"
-        final_generated_story_path = await concat_story_audio(story, OUTPUT_DIR, final_path=str(final_wav_path))
-        # Convert to mp3
         wav = AudioSegment.from_wav(final_generated_story_path)
         mp3_path = final_generated_story_path.with_suffix(".mp3")
         wav.export(mp3_path, format="mp3", bitrate="192k")
-        audio_segment = AudioSegment.from_file(mp3_path)
-        duration_seconds = len(audio_segment) / 1000
-        # Upload final story
         file_name = f"{uuid.uuid4()}_{os.path.basename(mp3_path)}"
         storage_path = f"{story.storyId}/final/{file_name}"
-        supabase.storage.from_("story-audio-files").upload(storage_path, mp3_path)
         audio_url = supabase.storage.from_("story-audio-files").get_public_url(storage_path)
         tasks[task_id] = {
             "status": "completed",
             "result": {
@@ -390,26 +562,58 @@ async def run_tts_pipeline(task_id: str, story: StoryCreationDTO):
             }
         }
-        elapsed = time.time() - start_time
         print(f"Story {story.storyId} processed in {elapsed:.2f} seconds")
     except Exception as e:
-        tasks[task_id] = {"status": "failed", "error": str(e)}
-        print(f"Exception for story {story.storyId}: {e}")
-# -----------------------------
-# FastAPI endpoints
-# -----------------------------
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO, background_tasks: BackgroundTasks):
     task_id = str(uuid.uuid4())
-    tasks[task_id] = {"status": "processing", "result": None}
     background_tasks.add_task(run_tts_pipeline, task_id, story)
     return {"task_id": task_id}
 @app.get("/tts/results/{task_id}")
 async def get_results(task_id: str):
-    print("GET called for task_id:", task_id)
     if task_id not in tasks:
         return {"status": "not_found"}
@@ -440,13 +644,30 @@ async def get_results(task_id: str):
         # If result is missing fields, mark as still processing
         return {"status": "processing"}
-@app.get("/")
-def root():
-    return {"message": "Welcome! Visit /docs for Swagger UI."}
-# -----------------------------
-# Run app
-# -----------------------------
 import uvicorn
-uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import FileResponse
+import torch
+import torchaudio
+import os
 from pydantic import BaseModel
+from typing import List, Optional
 from pathlib import Path
 OUTPUT_DIR = "outputs"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+from huggingface_hub import hf_hub_download
+# ------------------------
+# Download model files from Hugging Face if not present
+# ------------------------
 MODEL_DIR = "my_model"
 config_path = hf_hub_download(
     cache_dir=MODEL_DIR
 )
+from TTS.tts.models.xtts import Xtts
+from TTS.tts.configs.xtts_config import XttsConfig
+# Load model
 config = XttsConfig()
 config.load_json(config_path)
 model = Xtts.init_from_config(config)
 model.load_checkpoint(
     config,
+    checkpoint_dir= os.path.dirname(model_path),
     use_deepspeed=False,
+    vocab_path= vocab_path
 )
 model.to(device)
+# --------- Define your models ----------
 class BGMusicDto(BaseModel):
     musicPath: str
     emotion: str
     volume: float
 class SentenceDto(BaseModel):
     speaker: str
     sentenceId: str
 class LocationDto(BaseModel):
     locationName: str
     path: str
 class SceneDto(BaseModel):
     sceneId: str
     location: LocationDto
     title: SentenceDto
     scenes: List[SceneDto]
 class CastDto(BaseModel):
     name: str
     gender: str
     isAdult: bool
     voiceReference: str
 class StoryCreationDTO(BaseModel):
     storyId: str
     chapters: List[ChapterDto]
     cast: List[CastDto]
+#-----------------------------------------------------------
+#__________ func to get file from supabase__________________
+import httpx
+import tempfile
+import asyncio
+# async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
+#     """
+#     Downloads a file from a URL and returns the path to a temporary file.
+#     Retries on failure up to `retries` times, waiting `delay` seconds between attempts.
+#     Returns None if all attempts fail.
+#     """
+#     for attempt in range(1, retries + 1):
+#         try:
+#             async with httpx.AsyncClient(timeout=60.0) as client:  # increased timeout
+#                 response = await client.get(url)
+#                 response.raise_for_status()  # raises for non-200 status codes
+#             # Save to a temporary file
+#             temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+#             temp_file.write(response.content)
+#             temp_file.close()
+#             print(f"Downloaded {url} successfully on attempt {attempt}")
+#             return temp_file.name
+#         except Exception as e:
+#             print(f"Attempt {attempt} failed for {url}: {e}")
+#             if attempt < retries:
+#                 await asyncio.sleep(delay)  # wait before retrying
+#     print(f"All {retries} attempts failed for {url}")
+#     return None
+download_cache = {}
+async def download_scene_files(scene: SceneDto):
+    tasks = []
+    # Sentence prosody references
+    for sentence in scene.sentences:
+        tasks.append(download_file_from_url(sentence.prosodyReference))
+    # Location SFX
+    if scene.location.path:
+        tasks.append(download_file_from_url(scene.location.path))
+    # Background music
+    if scene.bgMusic and scene.bgMusic.musicPath:
+        tasks.append(download_file_from_url(scene.bgMusic.musicPath))
+    # Run all downloads concurrently
+    downloaded_files = await asyncio.gather(*tasks)
+    return downloaded_files
+async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
+    """
+    Downloads a file from a URL and returns the path to a temporary file.
+    If download fails after `retries` attempts, returns None instead of raising an error.
+    Caches successful downloads to avoid repeated requests.
+    """
+    if url in download_cache:
+        #print(f"{url} is got from cache")
+        return download_cache[url]
+    for attempt in range(1, retries + 1):
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.get(url)
+                response.raise_for_status()
+            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+            temp_file.write(response.content)
+            temp_file.close()
+            #print(f"{url} is downloaded and saved in cache")
+            download_cache[url] = temp_file.name
+            return temp_file.name
+        except Exception as e:
+            #print(f"Attempt {attempt} failed for {url}: {e}")
+            if attempt < retries:
+                await asyncio.sleep(delay)
+    #print(f"All {retries} attempts failed for {url}, skipping...")
+    return None
+#-----------------------------------------------------------
+#takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
+# (save_path -> full path including the filename, not just a folder.)
+def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
     gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[audio_file])
     out = model.inference(
         text=text,
         repetition_penalty=model.config.repetition_penalty,
         top_p=model.config.top_p,
     )
     os.makedirs(os.path.dirname(save_path), exist_ok=True)
     torchaudio.save(save_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
     return save_path
+#_______________generate audios and folder structure_______________________
 async def generate_story_audios(story: StoryCreationDTO, base_output: str):
+    """
+    Generates audio files and folders for the entire story
+    """
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
         chapter_dir = story_dir / chapter.chapterId
         chapter_dir.mkdir(exist_ok=True)
+        # --- Chapter title audio ---
+        prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
         tagged_text_title = generate_tagged_text(
+            chapter.title.sentence,
+            chapter.title.emotion,
+            chapter.title.intensity
         )
+        title_generated_audio_path = inference_by_model(
+            text=tagged_text_title,
+            audio_file=prosody_file_title,
+            save_path=title_save_path
+        )
+        # os.remove(prosody_file_title)
         for scene in chapter.scenes:
+            await download_scene_files(scene)
             scene_dir = chapter_dir / scene.sceneId
             scene_dir.mkdir(exist_ok=True)
+            # --- Sentences audio ---
             for sentence in scene.sentences:
+                # Download the prosody reference audio from Supabase
+                prosody_file = download_cache[sentence.prosodyReference]
                 sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
                 tagged_text = generate_tagged_text(
+                    sentence.sentence,
+                    sentence.emotion,
+                    sentence.intensity
                 )
+                sentence_generated_audio_path = inference_by_model(
+                    text=tagged_text,
+                    audio_file=prosody_file,
+                    save_path=sentence_save_path
+                )
+                # os.remove(prosody_file)
+#_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
+from pydub import AudioSegment
+import os
+import subprocess
 def ensure_wav(file_path: str) -> str:
+    """
+    Convert a single audio file to WAV using ffmpeg.
+    Returns the path to the WAV file.
+    If the file is already WAV, returns the original path.
+    """
     ext = os.path.splitext(file_path)[1].lower()
     if ext == ".wav":
+        return file_path  # Already WAV
+    # Output path: same folder, same name, .wav extension
     wav_path = os.path.splitext(file_path)[0] + ".wav"
+    # Run ffmpeg conversion
     subprocess.run(["ffmpeg", "-y", "-i", file_path, wav_path], check=True)
+    print(f"Converted: {file_path} → {wav_path}")
     return wav_path
+from pydub import AudioSegment
+import asyncio
+async def concat_story_audio(story: StoryCreationDTO, base_output: str, final_path: str = None): # full path including filename
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
     if final_path is None:
         final_path = story_dir / f"{story.storyId}_full.wav"
     else:
         final_path = Path(final_path)
+        final_path.parent.mkdir(parents=True, exist_ok=True)  # ensure folder exists
+    chapters_audio = AudioSegment.silent(duration=0)  # start empty
     for chapter in story.chapters:
         chapter_dir = story_dir / chapter.chapterId
+        # --- Chapter title ---
+        title_path = chapter_dir / "title.wav"
+        chapter_audio = AudioSegment.from_wav(title_path)
         for scene in chapter.scenes:
             scene_dir = chapter_dir / scene.sceneId
             scene_audio = AudioSegment.silent(duration=0)
+            # --- Concatenate sentence audios ---
             for sentence in scene.sentences:
                 sentence_path = scene_dir / f"{sentence.sentenceId}.wav"
+                sentence_audio = AudioSegment.from_wav(sentence_path)
+                scene_audio += sentence_audio
+           # --- Add SFX for location if available ---
             if scene.location.path:
+                sfx_file = await download_file_from_url(scene.location.path)
                 if sfx_file:
                     sfx_file_wav = ensure_wav(sfx_file)
+                    sfx_audio = AudioSegment.from_wav(sfx_file_wav)
+                    scene_audio = scene_audio.overlay(sfx_audio)
+                    # os.remove(sfx_file)
+                #else:
+                    #print(f"SFX skipped for {scene.location.locationName}")
+            # --- Add background music if available ---
             if scene.bgMusic and scene.bgMusic.musicPath:
+                bg_url = scene.bgMusic.musicPath
+                bg_file = await download_file_from_url(bg_url)
                 bg_file_wav = ensure_wav(bg_file)
                 bg_audio = AudioSegment.from_file(bg_file_wav)
+                # Adjust volume
+                bg_audio = bg_audio - (1 - scene.bgMusic.volume) * 30  # approximate
+                # Loop if shorter than scene
                 if len(bg_audio) < len(scene_audio):
+                    loops = (len(scene_audio) // len(bg_audio)) + 1
+                    bg_audio = bg_audio * loops
+                bg_audio = bg_audio[:len(scene_audio)]  # trim to match scene
                 scene_audio = scene_audio.overlay(bg_audio)
+                # os.remove(bg_file)
+            # Add 2 seconds of silence between scenes
             scene_audio += AudioSegment.silent(duration=2000)
             chapter_audio += scene_audio
+        # Add 3 seconds of silence between chapters
         chapter_audio += AudioSegment.silent(duration=3000)
         chapters_audio += chapter_audio
+    # Export final story
     chapters_audio.export(final_path, format="wav")
     return final_path
+#-------------------------------------------------------------
 app = FastAPI(title="EGTTS Arabic TTS API")
 tasks = {}
+#___________________Test end point to test supabase fetch
+from fastapi import Query
+from fastapi.responses import Response
+@app.get("/test-download/")
+async def test_download(url: str = Query(...)):
+    try:
+        file_bytes = await download_file_from_url(url)
+        return Response(
+            content=file_bytes,
+            media_type="audio/wav"  # change if needed
+        )
+    except Exception as e:
+        return {"error": str(e)}
+#_________________________________________
+@app.get("/")
+def root():
+    return {"message": "Welcome! Visit /docs for Swagger UI."}
+#-----------------------------------------------------------
+class TTSResponse(BaseModel):
+    fileName: str
+    duration: float  # seconds
+    audioPath: str
+#---------------------------concatenate text with tags ---------------------------
+# Map Intensity numbers to tag strings
+intensity_map = {
+    "LOW": "low",
+    "MEDIUM": "mid",
+    "HIGH": "high"
+}
+# Map Emotion enum names to lowercase tag strings
+emotion_map = {
+    "HAPPINESS": "happiness",
+    "SADNESS": "sadness",
+    "FEAR": "fear",
+    "ANGER": "anger",
+    "SURPRISE": "surprise",
+    "WHISPER": "whisper",
+    "NARRATION": "narration"
+}
+def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: str) -> str:
+    """
+    Convert enums to <emo_x> <int_y> format and concatenate with text
+    """
+    emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
+    int_tag = f"<int_{intensity_map[intensity_enum]}>"
+    return f"{emo_tag} {int_tag} {text}"
+#-----------------------------------------------------------
+#-----------------Post End Point_____________________________
+# @app.post("/tts/")
+# async def process_story(story: StoryCreationDTO):
+#   # Optional: print info for debugging
+#     print(story.storyId)
+#     for cast in story.cast:
+#         print(cast.name, cast.voiceReference)
+#     for chapter in story.chapters:
+#         for scene in chapter.scenes:
+#             for sentence in scene.sentences:
+#                 print(sentence.speaker, sentence.sentence)
+#     # 1️⃣ Generate all sentence audios and folder structure
+#     await generate_story_audios(story, base_output=OUTPUT_DIR)
+#      # 2️⃣ Concatenate all into final story audio
+#     final_story_path = os.path.join(OUTPUT_DIR, story.storyId, f"{story.storyId}_full.wav")
+#     final_generated_story_path = await concat_story_audio(story, base_output=OUTPUT_DIR, final_path=final_story_path)
+#     # Convert to base64 and get duration
+#     audio_b64, duration = audio_to_base64(final_generated_story_path)
+#     response = TTSResponse(
+#         file_name= os.path.basename(final_generated_story_path),
+#         duration=duration,
+#         audio_base64=audio_b64
+#     )
+#     return response
+# async def run_tts_pipeline(task_id: str, story: StoryCreationDTO):
+#     try:
+#         await generate_story_audios(story, base_output=OUTPUT_DIR)
+#         final_story_path = os.path.join(
+#             OUTPUT_DIR,
+#             story.storyId,
+#             f"{story.storyId}_full.wav"
+#         )
+#         final_generated_story_path = await concat_story_audio(
+#             story,
+#             base_output=OUTPUT_DIR,
+#             final_path=final_story_path
+#         )
+#         audio_b64, duration = audio_to_base64(final_generated_story_path)
+#         tasks[task_id] = {
+#             "status": "completed",
+#             "result": {
+#                 "fileName": os.path.basename(final_generated_story_path),
+#                 "duration": duration,
+#                 "audioPath": audio_b64
+#             }
+#         }
+#     except Exception as e:
+#         print(f"Exception caught at run tts pipeline {str(e)} and status is now failed")
+#         tasks[task_id] = {
+#             "status": "failed",
+#             "error": str(e)
+#         }
+import os
+import uuid
+from supabase import create_client, Client
+from pydub import AudioSegment  # For duration in seconds
+# Initialize Supabase client
+SUPABASE_URL = "https://kvlxvhdgacktsgykyckm.supabase.co/"
+SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imt2bHh2aGRnYWNrdHNneWt5Y2ttIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc3MTk2MTQ5MSwiZXhwIjoyMDg3NTM3NDkxfQ.tzfHcbzwzctHDDDp3vk4JGz30ajN2szncAV-1wK7_pM"
+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+import time
 async def run_tts_pipeline(task_id: str, story: StoryCreationDTO):
+    start_time = time.time()  # start timer
     try:
+        # 1️⃣ Generate story audios
+        await generate_story_audios(story, base_output=OUTPUT_DIR)
+        # 2️⃣ Concatenate final story audio
+        final_story_path = os.path.join(
+            OUTPUT_DIR,
+            story.storyId,
+            f"{story.storyId}_full.wav"
+        )
+        final_generated_story_path = await concat_story_audio(
+            story,
+            base_output=OUTPUT_DIR,
+            final_path=final_story_path
+        )
+        print(f" final_generated_story_path: {final_generated_story_path}")
         wav = AudioSegment.from_wav(final_generated_story_path)
         mp3_path = final_generated_story_path.with_suffix(".mp3")
         wav.export(mp3_path, format="mp3", bitrate="192k")
+        print(f" final_generated_story_path after conversion to mp3: {mp3_path}")
+        # 3️⃣ Calculate duration
+        audio_segment = AudioSegment.from_file(mp3_path)
+        duration_seconds = len(audio_segment) / 1000  # pydub gives length in milliseconds
+        # 4️⃣ Prepare the file for upload
         file_name = f"{uuid.uuid4()}_{os.path.basename(mp3_path)}"
         storage_path = f"{story.storyId}/final/{file_name}"
+        # with open(final_generated_story_path, "rb") as f:
+        #     file_bytes = f.read()
+        supabase.storage.from_("story-audio-files").upload(
+            storage_path,
+            mp3_path
+        )
+        # 6️⃣ Get public URL
         audio_url = supabase.storage.from_("story-audio-files").get_public_url(storage_path)
+        # 7️⃣ Update task status with audio URL and duration
         tasks[task_id] = {
             "status": "completed",
             "result": {
             }
         }
+        # --- Print processing time ---
+        end_time = time.time()
+        elapsed = end_time - start_time
         print(f"Story {story.storyId} processed in {elapsed:.2f} seconds")
     except Exception as e:
+        print(f"exception caught at run tts pipeline {str(e)}")
+        tasks[task_id] = {
+            "status": "failed",
+            "error": str(e)
+        }
+from fastapi import BackgroundTasks
+import uuid
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO, background_tasks: BackgroundTasks):
     task_id = str(uuid.uuid4())
+    tasks[task_id] = {
+        "status": "processing",
+        "result": None
+    }
     background_tasks.add_task(run_tts_pipeline, task_id, story)
     return {"task_id": task_id}
+#-----------------------Results Get End Point ______________________________________
+# @app.get("/tts/results/{task_id}")
+# async def get_results(task_id: str):
+#     if task_id not in tasks:
+#         return {"status": "not_found"}
+#     task = tasks[task_id]
+#     if task["status"] == "processing":
+#         return {"status": "processing"}
+#     if task["status"] == "failed":
+#         return {
+#             "status": "failed",
+#             "error": task["error"]
+#         }
+#     return task["result"]
 @app.get("/tts/results/{task_id}")
 async def get_results(task_id: str):
     if task_id not in tasks:
         return {"status": "not_found"}
         # If result is missing fields, mark as still processing
         return {"status": "processing"}
+#----------------------------Test End Point to test tts inference------------------------------------
+@app.post("/tts_test/")
+async def tts_endpoint(
+    text: str = Form(...),
+    audio_file: UploadFile = File(...),
+    emotionName: str = Form(...),
+    intensity: int = Form(...)
+):
+    file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
+    with open(file_path, "wb") as f:
+        f.write(await audio_file.read())
+    tagged_text = generate_tagged_text(text, emotionName, intensity)
+    output_path = os.path.join(OUTPUT_DIR, "out_test.wav")
+    output_wav = inference_by_model(tagged_text, file_path,output_path)
+    return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
 import uvicorn
+uvicorn.run(app, host="0.0.0.0", port=7860)
+# if __name__ == "__main__":
+#     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=7860)