File size: 24,239 Bytes
a826212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102b5e7
a826212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102b5e7
a826212
 
 
 
 
102b5e7
a826212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102b5e7
a826212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102b5e7
a826212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102b5e7
a826212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
import os
import io
import re
import ast
import json
import tempfile
from pathlib import Path
from typing import List, Dict, Counter

# --- Third-Party Libraries ---
import cv2
import torch
from fastapi import APIRouter, UploadFile, File, Query, HTTPException
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
from transformers import AutoModelForCausalLM, AutoTokenizer
from openai import OpenAI

# --- Internal Modules / Project Imports ---
from svision_client import (
    extract_scenes,
    add_ocr_and_faces,
    keyframes_every_second_extraction,
    extract_descripcion_escena
)

from asr_client import (
    extract_audio_from_video,
    diarize_audio,
    transcribe_long_audio,
    transcribe_short_audio,
    identificar_veu
)

from storage.common import validate_token
from storage.files.file_manager import FileManager
from storage.embeddings_routers import get_embeddings_json

from main_process.main_router import (
    get_initial_info_path,
    get_initial_srt_path
)

EMBEDDINGS_ROOT = Path("/data/embeddings")
MEDIA_ROOT = Path("/data/media")
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
router = APIRouter(prefix="/moe", tags=["MoE Process"])
HF_TOKEN = os.getenv("HF_TOKEN")
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")

class DataHub:
    def __init__(self, video_analysis_json: str):
        print("DataHub inicializando con JSON:", video_analysis_json)
        self.video = json.loads(Path(video_analysis_json).read_text(encoding='utf-8'))

class NState(dict):
    pass

# ---------------- LLM utilizado para el free_narration ----------------
class SalamandraClient:
    def __init__(self, model_id="BSC-LT/salamandra-7b-instruct"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",
            torch_dtype=torch.bfloat16
        )

    def chat(self, prompt) -> str:
        encodings = self.tokenizer(
            prompt,
            return_tensors="pt",
            padding=True,
        )

        inputs = encodings["input_ids"].to(self.model.device)
        attention_mask = encodings["attention_mask"].to(self.model.device)

        outputs = self.model.generate(
            input_ids=inputs,
            attention_mask=attention_mask,
            pad_token_id=self.tokenizer.pad_token_id,
            max_new_tokens=300,  # más grande si el texto es largo
            temperature=0.01,         # control de creatividad
            top_k=50,                # tokens más probables
            top_p=0.9
        )
        print(self.tokenizer.decode(outputs[0], skip_special_tokens=True))
        print("Separación")
        # Cortar la parte del prompt
        generated_tokens = outputs[0][inputs.shape[1]:]
        return self.tokenizer.decode(generated_tokens, skip_special_tokens=True)

# Esto aquí sólo se utiliza para la valoración:
class GPT5Client:
    def __init__(self, api_key: str):
        key = api_key
        if not key:
            raise RuntimeError(f"Missing key in environment for GPT-5 client")
        self.cli = OpenAI(api_key=key)

    def chat(self, messages: list, model: str = 'gpt-4o-mini') -> str:
        print("GPT5Client.chat llamado con", len(messages), "mensajes")
        r = self.cli.chat.completions.create(model=model, messages=messages,temperature=0)
        content = r.choices[0].message.content.strip()
        return content


def get_video_duration(video_path: str) -> float:
    """
    Devuelve la duración total del vídeo en segundos.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise RuntimeError(f"No s'ha pogut obrir el vídeo: {video_path}")

    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
    cap.release()

    duration_sec = total_frames / fps if total_frames > 0 else 0.0
    return duration_sec

def generate_srt_con_silencios(path_srt_original, path_srt_silences, video_path):
    # Obtenir duració total del vídeo
    duracio_total = get_video_duration(video_path)

    with open(path_srt_original, "r", encoding="utf-8-sig") as f:
        srt_text = f.read()
    
    blocks = srt_text.strip().split("\n\n")
    prev = 0
    srt_entries = []
    idx = 1  

    for block in blocks:
        lines = block.split("\n")
        time_range = lines[1]
        print(time_range)
        content = " ".join(line.strip() for line in lines[2:])
        
        start_str, end_str = time_range.split(" --> ")
        start_sec = srt_time_to_seconds(start_str)
        end_sec = srt_time_to_seconds(end_str)

        # Afegir silenci si hi ha espai
        if prev < start_sec:
            srt_entries.append(
                f"{idx}\n{seconds_to_srt_time(prev)} --> {seconds_to_srt_time(start_sec)}\n[silenci]\n"
            )
            idx += 1

        # Afegir clip amb text
        srt_entries.append(
            f"{idx}\n{seconds_to_srt_time(start_sec)} --> {seconds_to_srt_time(end_sec)}\n{content}\n"
        )
        idx += 1
        prev = end_sec

    # Afegir últim bloc de silenci si la duració del vídeo és més llarga que l'últim clip
    if prev < duracio_total:
        srt_entries.append(
            f"{idx}\n{seconds_to_srt_time(prev)} --> {seconds_to_srt_time(duracio_total)}\n[silenci]\n"
        )

    # Guardar a l'arxiu final
    with open(path_srt_silences, "w", encoding="utf-8") as f:
        f.write("\n".join(srt_entries))

def srt_time_to_seconds(s):
    h, m, rest = s.split(":")
    s, ms = rest.split(",")
    return int(h)*3600 + int(m)*60 + float(s) + int(ms)/1000

def seconds_to_srt_time(seconds):
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = int(seconds % 60)
    ms = int((seconds - int(seconds)) * 1000)
    return f"{h:02}:{m:02}:{s:02},{ms:03}"

class Add_AD:
    def __init__(self, data: DataHub):
        self.data = data

    def __call__(self, state: NState, srt_modified_silence, srt_modified_silence_con_ad) -> NState:
        print("Add_Ad.__call__ iniciado")

        # Leer SRT original
        with open(srt_modified_silence, "r", encoding="utf-8") as f:
            srt_text = f.read()

        # Frames del video
        frames = self.data.video.get('info_escenas', {})

        # Parsear SRT a bloques
        srt_blocks = []
        srt_blocks_modified=[]
        pattern = re.compile(
            r"(\d+)\s+(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\s+(.*?)(?=\n\d+\n|\Z)",
            re.S
        )

        for match in pattern.finditer(srt_text):
            index = int(match.group(1))
            start = srt_time_to_seconds(match.group(2))
            end = srt_time_to_seconds(match.group(3))
            text = match.group(4).strip()
            srt_blocks.append({
                "index": index,
                "start": start,
                "end": end,
                "text": text
            })

        index=1
        # Procesar cada bloque
        for block in srt_blocks:
            if "[silenci]" in block["text"]:
                start_block = block["start"]
                end_block = block["end"]

                for frame in frames:
                    if frame.get("start")<=start_block and frame.get("end")>=end_block:
                        srt_blocks_modified.append({
                            "index":index,
                            "start": start_block,
                            "end": end_block,
                            "text": f"(AD): {frame.get('descripcion', '')}"
                        })
                        index+=1

                    elif start_block<frame.get("end")<end_block:
                        srt_blocks_modified.append({
                            "index":index,
                            "start": start_block,
                            "end": frame.get("end"),
                            "text": f"(AD): {frame.get('descripcion', '')}"
                        })
                        start_block=frame.get("end")
                        index+=1
                    
                    elif start_block==frame.get("start") and start_block<end_block and frame.get("end")>=end_block:
                        srt_blocks_modified.append({
                            "index":index,
                            "start": start_block,
                            "end": end_block,
                            "text": f"(AD): {frame.get('descripcion', '')}"
                            })
                        start_block=end_block
                        index+=1

            else:
                srt_blocks_modified.append({
                    "index": index,
                    "start": block["start"],
                    "end": block["end"],
                    "text": block["text"]
                })
                index+=1

        # Reconstruir el SRT final
        srt_final = ""

        for block in srt_blocks_modified:
            start_tc = seconds_to_srt_time(block["start"])
            end_tc = seconds_to_srt_time(block["end"])
            srt_final += f"{block['index']}\n{start_tc} --> {end_tc}\n{block['text']}\n\n"

        # Guardar en un nuevo archivo
        with open(srt_modified_silence_con_ad, "w", encoding="utf-8") as f:
            f.write(srt_final)

        # Actualizar estado
        state['srt_con_audiodescripcion'] = srt_final
        return state

class Free_Narration:
    def __init__(self, data: DataHub):
        self.data = data

    def __call__(self, state: NState, srt_original_silence_con_ad, story_path) -> NState:
        print("Free_Narration.__call__ iniciado")

        descriptions=[]
        frames = self.data.video.get('info_escenas', [])
        for frame in frames:
            descriptions.append(frame["descripcion"])

        full_transcription = self.data.video.get('full_transcription', [])

        with open(srt_original_silence_con_ad, "r", encoding="utf-8-sig") as f:
            diarization_text = f.read()
                    
        prompt = f"""
        La teva tasca és elaborar una descripció lliure d'un vídeo d'unes 100 paraules a partir de la informació següent:
                1.) A partir del vídeo s'han extret captures de pantalla en els moments en què es canviava d'escena i tens una descripció de cadascuna d'elles a: {descriptions}
                2.) La transcripció completa del vídeo és: {full_transcription}
        Per tant, a partir de tota aquesta informació, genera'm la història completa, intentant incloure els personatges identificats i la trama general de la història.
        """
        out = state['llm_Salamandra'](prompt)
        print(out)

        with open(story_path, "w", encoding="utf-8-sig") as f:
            f.write(out)

        state['free_narration'] = out

        return state
    
class Valoracion_Final:
    def __call__(self, state, srt_final, csv_evaluacion):
        print("Valoracion_Final.__call__ iniciat")

        # Llegeix el contingut del fitxer SRT
        with open(srt_final, "r", encoding="utf-8-sig") as f:
            srt_text = f.read().strip()

        # Defineix el prompt principal
        prompt = f"""
        Ets un avaluador expert en accessibilitat audiovisual segons la NORMA UNE 153020.

        Analitza el següent fitxer SRT i avalua'l segons les característiques indicades.
        Per a cada característica, assigna una puntuació del 0 al 7 i una justificació breu i específica, 
        seguint el format establert.

        SRT a analitzar:
        {srt_text}

        Format de sortida:
        Caracteristica,Valoracio (0-7),Justificacio

        Les característiques a avaluar són:
        - Precisió Descriptiva: Avalua si la descripció visual dels plans, accions i context és exacta i coherent amb el contingut esperat.
        - Sincronització Temporal: Avalua si el text apareix i desapareix al moment adequat segons el contingut visual o sonor.
        - Claredat i Concisió: Analitza si el llenguatge és clar, natural i sense redundàncies.
        - Inclusió de Diàleg/So: Determina si es recullen correctament els diàlegs, sons i elements musicals rellevants.
        - Contextualització: Avalua si el context (ambient, espai, personatges, situacions) està ben representat.
        - Flux i Ritme de la Narració: Avalua la fluïdesa de la lectura i la coherència temporal entre segments.

        Respon només amb la taula CSV, sense cap text addicional.
        """

        # Missatges estructurats per al model (rols system + user)
        messages = [
            {"role": "system", "content": "Ets un assistent expert en accessibilitat audiovisual i normativa UNE 153020."},
            {"role": "user", "content": prompt}
        ]

        # Crida al model (s’assumeix que state['llm_GPT'] és una funció que processa missatges)
        out = state['llm_GPT'](messages)

        out_text = str(out).strip()

        # Escriu el resultat CSV
        with open(csv_evaluacion, "w", encoding="utf-8-sig") as f:
            f.write(out_text)

        return state

@router.post("/generate_moe_result", tags=["MoE Process"])
async def generate_salamadra_result(
    sha1: str,
    token: str = Query(..., description="Token required for authorization")
):
    """
    Generate all MoE output files (final SRT, free narration, and evaluation CSV)
    for a processed video identified by its SHA1 hash.

    This endpoint orchestrates the full Salamandra processing pipeline:
    - Validates the access token.
    - Locates the processed video and its associated metadata.
    - Generates an intermediate SRT file enriched with silence markers.
    - Runs the Salamandra logic to produce:
        * A finalized SRT subtitle file (`result.srt`)
        * A free-narration text file (`free_narration.txt`)
        * An evaluation CSV (`evaluation.csv`)
    - Ensures the expected directory structure exists, creating folders if necessary.
    - Uses both GPT-based and Salamandra-based LLMs to generate narrative and evaluation content.

    Args:
        sha1 (str): The SHA1 hash that identifies the media processing workspace.
        token (str): Authorization token required to execute Salamandra operations.

    Raises:
        HTTPException:
            - 404 if the SHA1 folder does not exist.
            - 404 if the `clip` folder is missing.
            - 404 if no MP4 file is found inside the clip folder.

    Processing Steps:
        1. Validates that all required folders exist (`sha1`, `clip`, `result/Salamandra`).
        2. Retrieves the input video and initial metadata (original SRT, info JSON).
        3. Creates temporary enriched SRT with silence detection.
        4. Runs Add_AD, Free_Narration, and Valoracion_Final modules.
        5. Generates the final Salamandra output files:
            - result.srt
            - free_narration.txt
            - evaluation.csv

    Returns:
        dict: A JSON response indicating successful generation:
        {
            "status": "ok",
            "message": "Salamandra SRT, free_narration and CSV evaluation generated"
        }
    """
    validate_token(token)

    # Resolve directories
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    clip_folder = sha1_folder / "clip"

    if not sha1_folder.exists() or not sha1_folder.is_dir():
        raise HTTPException(status_code=404, detail="SHA1 folder not found")

    if not clip_folder.exists() or not clip_folder.is_dir():
        raise HTTPException(status_code=404, detail="Clip folder not found")

    # Locate video file
    mp4_files = list(clip_folder.glob("*.mp4"))
    if not mp4_files:
        raise HTTPException(status_code=404, detail="No MP4 files found")
    video_path = clip_folder / mp4_files[0]

    # Get initial srt
    srt_original = get_initial_srt_path(sha1)

    # Get initial info json
    informacion_json = get_initial_info_path(sha1)

    # Generate srt final path
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    result_folder = sha1_folder / "result"
    result_folder.mkdir(parents=True, exist_ok=True)
    salamdra_folder = result_folder / "Salamandra"
    salamdra_folder.mkdir(parents=True, exist_ok=True)
    srt_final = salamdra_folder / "result.srt"

    # Generate free_narration_salamandra final path
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    result_folder = sha1_folder / "result"
    result_folder.mkdir(parents=True, exist_ok=True)
    salamdra_folder = result_folder / "Salamandra"
    salamdra_folder.mkdir(parents=True, exist_ok=True)
    free_narration_salamandra = salamdra_folder / "free_narration.txt"

    # Generate evaluation csv path
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    result_folder = sha1_folder / "result"
    result_folder.mkdir(parents=True, exist_ok=True)
    salamdra_folder = result_folder / "Salamandra"
    salamdra_folder.mkdir(parents=True, exist_ok=True)
    csv_evaluacion = salamdra_folder / "evaluation.csv"
    
    # Temp srt name
    srt_name = sha1 + "_srt"
    tmp = tempfile.NamedTemporaryFile(mode="w+", suffix=".srt", prefix=srt_name + "_", delete=False)
    
    generate_srt_con_silencios(srt_original, tmp.name, video_path)
    
    datahub=DataHub(informacion_json)
    add_ad = Add_AD(datahub)
    free_narration = Free_Narration(datahub)
    valoracion_final = Valoracion_Final()
    
    GPTclient = GPT5Client(api_key=OPEN_AI_KEY)
    salamandraclient = SalamandraClient()
    
    state = {
        "llm_GPT": GPTclient.chat,
        "llm_Salamandra": salamandraclient.chat
    }
    
    state = add_ad(state, tmp.name, srt_final)
    state= free_narration(state, srt_final, free_narration_salamandra)
    state = valoracion_final(state, srt_final, csv_evaluacion)
    tmp.close()

    return {"status": "ok", "message": "Salamandra SRT, free_narration and CSV evaluation generated"}

@router.get("/download_moe_srt", tags=["MoE Process"])
def download_salamadra_srt(
    sha1: str,
    token: str = Query(..., description="Token required for authorization")
):
    """
    Download the final SRT subtitle file generated by the Salamandra processing pipeline.

    This endpoint retrieves the file `result.srt` associated with a specific SHA1 hash.
    It validates the authorization token, checks the expected folder structure, and 
    returns the subtitle file if it exists.

    Args:
        sha1 (str): The SHA1 identifier corresponding to the processed media folder.
        token (str): Authorization token required to access the resource.

    Raises:
        HTTPException: 
            - 404 if any of the required directories (SHA1 folder, result folder, Salamandra folder)
              are missing.
            - 404 if the `result.srt` file is not found.

    Returns:
        FileResponse: The SRT file (`result.srt`) with media type `text/srt`.
    """
    validate_token(token)
    
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    result_folder = sha1_folder / "result"
    result_folder.mkdir(parents=True, exist_ok=True)
    salamandra_folder = result_folder / "Salamandra"
    salamandra_folder.mkdir(parents=True, exist_ok=True)
    srt_final = salamandra_folder / "result.srt"

    if not sha1_folder.exists() or not sha1_folder.is_dir():
        raise HTTPException(status_code=404, detail="SHA1 folder not found")
    if not result_folder.exists() or not result_folder.is_dir():
        raise HTTPException(status_code=404, detail="result folder not found")
    if not salamandra_folder.exists() or not salamandra_folder.is_dir():
        raise HTTPException(status_code=404, detail="Salamandra folder not found")
    if not srt_final.exists() or not srt_final.is_file():
        raise HTTPException(status_code=404, detail="result.srt SRT not found")

    return FileResponse(
        path=srt_final,
        media_type="text/srt",
        filename="result.srt"
    )

@router.get("/download_moe_free_narration", tags=["MoE Process"])
def download_salamadra_free_narration(
    sha1: str,
    token: str = Query(..., description="Token required for authorization")
):
    """
    Download the free narration text file generated by the Salamandra process.

    This endpoint retrieves `free_narration.txt` from the Salamandra result directory 
    associated with a specific SHA1 hash. The token is validated before accessing the 
    file system. If the file or required folders do not exist, appropriate HTTP 
    errors are returned.

    Args:
        sha1 (str): The SHA1 identifier for the processed media folder.
        token (str): Authorization token required to access the file.

    Raises:
        HTTPException:
            - 404 if the SHA1 folder, result folder, or Salamandra folder is missing.
            - 404 if `free_narration.txt` is not found.

    Returns:
        FileResponse: The free narration text file with media type `text/srt`.
    """
    validate_token(token)
    
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    result_folder = sha1_folder / "result"
    result_folder.mkdir(parents=True, exist_ok=True)
    salamandra_folder = result_folder / "Salamandra"
    salamandra_folder.mkdir(parents=True, exist_ok=True)
    free_narration_salamandra = salamandra_folder / "free_narration.txt"

    if not sha1_folder.exists() or not sha1_folder.is_dir():
        raise HTTPException(status_code=404, detail="SHA1 folder not found")
    if not result_folder.exists() or not result_folder.is_dir():
        raise HTTPException(status_code=404, detail="result folder not found")
    if not salamandra_folder.exists() or not salamandra_folder.is_dir():
        raise HTTPException(status_code=404, detail="Salamandra folder not found")
    if not free_narration_salamandra.exists() or not free_narration_salamandra.is_file():
        raise HTTPException(status_code=404, detail="free_narration.txt not found")

    return FileResponse(
        path=free_narration_salamandra,
        media_type="text/srt",
        filename="free_narration.tx"
    )

@router.get("/download_moe_csv_evaluation", tags=["MoE Process"])
def download_salamadra_csv_evaluation(
    sha1: str,
    token: str = Query(..., description="Token required for authorization")
):
    """
    Download the evaluation CSV generated by the Salamandra processing workflow.

    This endpoint returns the `evaluation.csv` file corresponding to the given SHA1 hash.
    It performs token validation and ensures that the folder structure and file exist.
    If any element is missing, a 404 HTTP error is raised.

    Args:
        sha1 (str): The SHA1 identifier representing the processed media directory.
        token (str): Authorization token required for file retrieval.

    Raises:
        HTTPException:
            - 404 if the SHA1 folder, result folder, or Salamandra folder does not exist.
            - 404 if the `evaluation.csv` file is missing.

    Returns:
        FileResponse: The evaluation CSV file with media type `text/srt`.
    """
    validate_token(token)
    
    file_manager = FileManager(MEDIA_ROOT)
    sha1_folder = MEDIA_ROOT / sha1
    result_folder = sha1_folder / "result"
    result_folder.mkdir(parents=True, exist_ok=True)
    salamandra_folder = result_folder / "Salamandra"
    salamandra_folder.mkdir(parents=True, exist_ok=True)
    csv_evaluacion = salamandra_folder / "evaluation.csv"

    if not sha1_folder.exists() or not sha1_folder.is_dir():
        raise HTTPException(status_code=404, detail="SHA1 folder not found")
    if not result_folder.exists() or not result_folder.is_dir():
        raise HTTPException(status_code=404, detail="result folder not found")
    if not salamandra_folder.exists() or not salamandra_folder.is_dir():
        raise HTTPException(status_code=404, detail="Salamandra folder not found")
    if not csv_evaluacion.exists() or not csv_evaluacion.is_file():
        raise HTTPException(status_code=404, detail="evaluation.csv CSV not found")

    return FileResponse(
        path=csv_evaluacion,
        media_type="text/srt",
        filename="evaluation.csv"
    )