Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on Nov 27, 2025

Commit

0d349cc

verified ·

1 Parent(s): 8ffe11f

Delete finetuning

Browse files

Files changed (4) hide show

finetuning/finetuning.py +0 -762
finetuning/lora.py +0 -219
finetuning/reflection.py +0 -520
finetuning/video_analysis.py +0 -189

finetuning/finetuning.py DELETED Viewed

@@ -1,762 +0,0 @@
-import os
-import csv
-import json
-import logging
-import shutil
-from pathlib import Path
-from typing import TypedDict, Annotated, List, Dict, Union
-from langgraph.graph import StateGraph, END
-from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
-from langchain_openai import ChatOpenAI
-from operator import itemgetter
-# --- Configuración y Herramientas ---
-# Directorios de trabajo
-BASE_DIR = Path(__file__).resolve().parent
-TEMP_DIR = BASE_DIR / "temp"
-TEMP_DIR.mkdir(exist_ok=True)
-LOG_FILE = TEMP_DIR / "finetuning.log"
-# Configurar el logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(levelname)s: %(message)s',
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler(LOG_FILE, encoding="utf-8")
-    ],
-)
-logger = logging.getLogger(__name__)
-# Asegúrate de configurar tu API Key en la variable de entorno OPENAI_API_KEY.
-api_key = os.environ.get("OPENAI_API_KEY")
-if not api_key:
-    raise EnvironmentError("OPENAI_API_KEY no está configurada. Define la variable de entorno antes de ejecutar finetuning.py.")
-# Inicializar LLM (se usa GPT-4o por su capacidad de razonamiento)
-# En producción, considera un modelo que soporte tus tokens y latencia requeridas.
-llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
-# --- Ficheros de Ejemplo ---
-# Fichero SRT inicial (Narrador)
-INITIAL_SRT_CONTENT = """
-1
-00:00:00,000 --> 00:00:05,340
-[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final.
-2
-00:00:04,340 --> 00:00:05,790
-[Lucía] Hem de donar-ho tot.
-3
-00:00:05,790 --> 00:00:08,790
-[Sandra] Ho sé, ho sé.
-4
-00:00:08,000 --> 00:00:10,000
-(AD) De sobte, són al parc.
-5
-00:00:10,000 --> 00:00:14,000
-(AD) Ara tallen menjar i fan una amanida a una cuina.
-"""
-# Fichero JSON de contexto (ejemplo de la respuesta anterior, pero simplificado para el Narrador)
-CONTEXT_JSON_CONTENT = """
-{
-  "segments": [
-    {"id": 1, "start": "00:00:00,000", "end": "00:00:05,340", "type": "dialog", "text": "[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final."},
-    {"id": 2, "start": "00:00:04,340", "end": "00:00:05,790", "type": "dialog", "text": "[Lucía] Hem de donar-ho tot."},
-    {"id": 3, "start": "00:00:05,790", "end": "00:00:08,790", "type": "dialog", "text": "[Sandra] Ho sé, ho sé."},
-    {"id": 4, "start": "00:00:08,000", "end": "00:00:10,000", "type": "visual_context", "text": "Cambio de escena a un parque. Personajes caminando."},
-    {"id": 5, "start": "00:00:10,000", "end": "00:00:14,000", "type": "visual_context", "text": "Escena en una cocina. Los personajes están cortando vegetales y haciendo una ensalada."}
-  ]
-}
-"""
-# Fichero de Reglas UNE (Norma Técnica para el Crítico)
-# Nota: Aquí se usa un resumen de las reglas pertinentes para un LLM.
-UNE_RULES = """
-### Reglas UNE de Audiodescripción (Para el Crítico)
-1.  **Objetividad y Foco Visual:** La descripción debe ser puramente objetiva, describiendo solo lo que se ve. Debe priorizar la acción y los elementos relevantes (personajes, objetos, localización).
-2.  **Tiempo y Espacio (Sincronización):** Las audiodescripciones (AD) deben insertarse en los silencios del diálogo. El tiempo de la AD (entre START y END) debe ser suficiente para narrar el contenido sin solaparse con el diálogo o la música importante.
-3.  **Concisión y Claridad:** Usar lenguaje simple y conciso. Evitar redundancias y juicios de valor.
-4.  **Formato:** Cada segmento de AD debe tener un formato SRT válido, incluyendo el marcador (AD) al principio de la línea de texto.
-5.  **Utilidad:** Cada segmento de AD debe ser útil para la comprensión y nunca ser redundante. En caso de repetir algo ya explicado antes, mejor no decir nada.
-"""
-EVALUATION_CRITERIA = [
-    "Precisió Descriptiva",
-    "Sincronització Temporal",
-    "Claredat i Concisió",
-    "Inclusió de Diàleg/So",
-    "Contextualització",
-    "Flux i Ritme de la Narració",
-]
-CRITERIA_WEIGHTS = {
-    "Precisió Descriptiva": 1,
-    "Sincronització Temporal": 4,
-    "Claredat i Concisió": 1,
-    "Inclusió de Diàleg/So": 1,
-    "Contextualització": 1,
-    "Flux i Ritme de la Narració": 1,
-}
-# Inicializar ficheros para la ejecución
-def setup_files(initial_srt_content: str, context_json_content: str):
-    """Crea los ficheros iniciales necesarios en el sistema de archivos local."""
-    (TEMP_DIR / "une_ad_0.srt").write_text(initial_srt_content, encoding="utf-8")
-    (TEMP_DIR / "json_ad.json").write_text(context_json_content, encoding="utf-8")
-    logger.info("Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.")
-# --- Utilidades ---
-def _strip_markdown_fences(content: str) -> str:
-    """Elimina fences ```...``` alrededor de una respuesta JSON si existen."""
-    text = content.strip()
-    if text.startswith("```"):
-        lines = text.splitlines()
-        # descartar primera línea con ``` o ```json
-        lines = lines[1:]
-        # eliminar el cierre ``` (pueden existir varias líneas en blanco finales)
-        while lines and lines[-1].strip() == "```":
-            lines.pop()
-        text = "\n".join(lines).strip()
-    return text
-def generate_evaluation_report(srt_content: str, iteration: int) -> tuple[float, float, Path]:
-    """Solicita al LLM una avaluació estructurada i guarda'n el CSV."""
-    criteria_formatted = "\n".join(f"- {name}" for name in EVALUATION_CRITERIA)
-    prompt = (
-        "Actua com un auditor UNE. Avalua l'SRT generat, puntuant cada característica de 0 a 7 "
-        "segons la qualitat observada. Dónega justificació breve però concreta per a cada cas. "
-        "Les característiques obligatòries són:\n"
-        f"{criteria_formatted}\n"
-        "Retorna ÚNICAMENT un array JSON d'objectes amb les claus: "
-        "'caracteristica', 'valoracio' (nombre enter de 0 a 7) i 'justificacio'."
-    )
-    response = llm.invoke(
-        [
-            SystemMessage(content=prompt),
-            HumanMessage(
-                content=(
-                    "# SRT AVALUAT\n"
-                    f"{srt_content}\n\n"
-                    "Assegura't de complir el format indicat."
-                )
-            ),
-        ]
-    )
-    cleaned = _strip_markdown_fences(response.content)
-    try:
-        data = json.loads(cleaned)
-        if not isinstance(data, list):
-            raise ValueError("La resposta no és una llista.")
-    except Exception as exc:
-        logger.error(
-            "Error al generar l'avaluació estructurada: %s. Resposta original: %s",
-            exc,
-            response.content,
-        )
-        data = [
-            {
-                "caracteristica": "Avaluació fallida",
-                "valoracio": 1,
-                "justificacio": "No s'ha pogut obtenir l'avaluació del LLM.",
-            }
-        ]
-    eval_path = TEMP_DIR / f"eval_{iteration}.csv"
-    with eval_path.open("w", encoding="utf-8", newline="") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["Caracteristica", "Valoracio (0-7)", "Justificacio"])
-        for item in data:
-            writer.writerow(
-                [
-                    item.get("caracteristica", ""),
-                    item.get("valoracio", 0),
-                    item.get("justificacio", ""),
-                ]
-            )
-    scores = []
-    weighted_sum = 0.0
-    total_weight = 0.0
-    for entry in data:
-        if not isinstance(entry, dict):
-            continue
-        try:
-            score = float(entry.get("valoracio", 0))
-        except (TypeError, ValueError):
-            score = 0.0
-        scores.append(score)
-        weight = CRITERIA_WEIGHTS.get(entry.get("caracteristica", ""), 1)
-        weighted_sum += score * weight
-        total_weight += weight
-    mean_score = sum(scores) / len(scores) if scores else 0.0
-    weighted_mean = weighted_sum / total_weight if total_weight else mean_score
-    return mean_score, weighted_mean, eval_path
-# --- Definición del Estado de la Gráfica (StateGraph) ---
-class ReflectionState(TypedDict):
-    """Representa el estado del bucle de reflexión."""
-    iteration: int  # Ciclo actual (empezando en 0)
-    current_srt_path: str  # Ruta al archivo SRT actual (e.g., une_ad_0.srt, une_ad_1.srt)
-    critic_report: Dict[str, Union[float, str]]  # Último informe del crítico (puntuación y texto)
-    history: List[SystemMessage] # Historial de mensajes entre agentes
-    evaluation_mean: float
-    best_iteration: int
-    best_weighted_mean: float
-    best_srt_path: str
-    best_eval_path: str
-# --- Nodos/Agentes de la Gráfica ---
-def narrator_agent(state: ReflectionState):
-    """
-    Agente que genera o reescribe el SRT.
-    - En el ciclo 0, genera el SRT inicial.
-    - En ciclos > 0, reescribe el SRT basándose en el critic_report.
-    """
-    iteration = state["iteration"]
-    critic_report = state["critic_report"]
-    history = state["history"]
-    # Cargar contexto y último SRT
-    json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    # 1. Definir el prompt
-    if iteration == 0:
-        # Tarea inicial (aunque en este caso ya se proporciona une_ad_0.srt)
-        # Aquí se simula la generación inicial.
-        prompt = (
-            "Ets un Narrador expert en Audiodescripció (AD). La teva tasca inicial és generar "
-            "un fitxer SRT d'audiodescripcions basat en el JSON de context visual. "
-            "TOT I AIXÍ, per a aquesta primera iteració, l'SRT ja s'ha generat. "
-            "Simplement retorna el contingut de 'une_ad_0.srt' com si fos la teva sortida. "
-            "Assegura't que totes les audiodescripcions estiguin en català i que cadascuna pugui ser locutada "
-            "dins del temps disponible (utilitza un màxim aproximat d'11 caràcters per segon). Si el tram de temps "
-            "és massa curt (<1.5s), combina'l amb el bloc d'AD més proper i ajusta els timestamps perquè la narració sigui fluida. "
-            "Evita redundàncies: no repeteixis informació ja descrita en segments d'AD anteriors o al diàleg, i elimina qualsevol detall que no sigui essencial."
-        )
-        output_srt = current_srt
-        reflection_text = "Generación inicial. No hay reflexión."
-    else:
-        # Tarea de reflexión
-        prompt = (
-            "Ets un Narrador expert en Audiodescripció (AD). Has rebut una crítica sobre la teva última versió de l'SRT. "
-            "La teva tasca és REESCRIURE el contingut d'audiodescripció (línies amb '(AD)') del fitxer SRT, "
-            "assegurant que sigui coherent amb el JSON de context i, sobretot, que CORREGEIXIS TOTS els problemes "
-            "mencionats a l'Informe Crític adjunt. Mantén intactes els diàlegs (línies amb [Nom]) i escriu totes les audiodescripcions en català natural. "
-            "Garanteix que cada bloc d'AD pugui ser locutat dins del seu interval temporal disponible considerant un màxim d'11 caràcters per segon. "
-            "Si l'interval és massa curt (<1.5s), fusiona'l amb el bloc d'AD anterior o posterior més proper i ajusta els timestamps perquè quedin contínues. "
-            "Prefereix frases concises i accionables, prioritzant la informació visual essencial, i elimina redundàncies amb AD anteriors o amb els diàlegs."
-        )
-        # Concatenar la entrada para el LLM
-        input_content = f"""
-        # INFORME CRÍTICO
-        Porcentaje de Fiabilidad Anterior: {critic_report.get('reliability_percentage')}
-        Crítica Cualitativa: {critic_report.get('qualitative_critique')}
-        # JSON DE CONTEXTO VISUAL (Guía para la AD)
-        {json_context}
-        # ÚLTIMO ARCHIVO SRT GENERADO (une_ad_{iteration-1}.srt)
-        {current_srt}
-        REGLAS: Tu respuesta debe ser *SOLAMENTE* el contenido completo del nuevo archivo SRT (incluyendo diálogos), sin ningún comentario o explicación adicional.
-        """
-        # Llamada al LLM
-        response = llm.invoke(
-            [
-                SystemMessage(content=prompt),
-                HumanMessage(content=input_content)
-            ]
-        )
-        output_srt = response.content
-        reflection_text = f"Reescrito en base al informe crítico: {critic_report.get('qualitative_critique', 'N/A')}"
-    # 2. Guardar la nueva salida
-    new_srt_path = TEMP_DIR / f"une_ad_{iteration}.srt"
-    new_srt_path.write_text(output_srt, encoding="utf-8")
-    # 3. Guardar el pensamiento (reflection_text)
-    (TEMP_DIR / f"thinking_{iteration}.txt").write_text(reflection_text, encoding="utf-8")
-    logger.info(f"Narrador: Generada la versión {iteration} del SRT en '{new_srt_path}'.")
-    # 4. Actualizar el estado
-    new_history = history + [AIMessage(content=f"Narrador v{iteration} completado. Razón de reflexión: {reflection_text}")]
-    return {
-        "iteration": iteration,
-        "current_srt_path": str(new_srt_path),
-        "history": new_history,
-        "evaluation_mean": state.get("evaluation_mean", 0.0),
-        "best_iteration": state.get("best_iteration", -1),
-        "best_weighted_mean": state.get("best_weighted_mean", 0.0),
-        "best_srt_path": state.get("best_srt_path", str(new_srt_path)),
-        "best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
-    }
-def identity_manager_agent(state: ReflectionState):
-    """
-    Agente que gestiona la identidad del usuario.
-    """
-    iteration = state["iteration"]
-    history = state["history"]
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    prompt = (
-        "Ets un gestor d'identitats. La teva tasca és verificar la identitat de l'usuari "
-        "i assegurar-te que les seves dades estiguin actualitzades."
-    )
-    input_content = f"""
-    # ÚLTIMO ARCHIVO SRT GENERADO (une_ad_{iteration}.srt):
-    {current_srt}
-    REGLAS: Tu respuesta debe ser *SOLAMENTE* un objeto JSON con la información de la identidad del usuario.
-    """
-    # Llamada al LLM
-    response = llm.invoke(
-        [
-            SystemMessage(content=prompt),
-            HumanMessage(content=input_content)
-        ]
-    )
-    # Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
-    try:
-        cleaned_response = _strip_markdown_fences(response.content)
-        identity_info = json.loads(cleaned_response)
-        if not isinstance(identity_info, dict):
-            raise ValueError("Estructura JSON incorrecta.")
-    except Exception as e:
-        logger.error(f"Error al parsear el JSON de la identidad: {e}. Respuesta: {response.content}")
-        identity_info = {"error": "No s'ha pogut obtenir la informació d'identitat."}
-    logger.info(f"Identity Manager: Información de identidad actualizada.")
-    new_history = history + [AIMessage(content=f"Identity Manager v{iteration} completado.")]
-    return {
-        "iteration": iteration,
-        "current_srt_path": state["current_srt_path"],
-        "history": new_history,
-        "evaluation_mean": state.get("evaluation_mean", 0.0),
-        "best_iteration": state.get("best_iteration", -1),
-        "best_weighted_mean": state.get("best_weighted_mean", 0.0),
-        "best_srt_path": state.get("best_srt_path", state["current_srt_path"]),
-        "best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
-    }
-def critic_agent(state: ReflectionState):
-    """
-    Agente que evalúa la calidad del SRT generado por el Narrador basándose en las Reglas UNE.
-    Devuelve una puntuación y una crítica cualitativa.
-    """
-    iteration = state["iteration"]
-    history = state["history"]
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    prompt = (
-        "Ets un Crític d'Audiodescripció molt estricte. La teva tasca és avaluar l'SRT adjunt "
-        "únicament segons les Regles UNE proporcionades. L'avaluació ha de ser doble: "
-        "1. **Numèrica**: Un percentatge de fiabilitat (ex. 85.5) de 0 a 100%. "
-        "2. **Qualitativa**: Una crítica constructiva sobre les principals mancances de les AD respecte a les regles. "
-        "Has de ser EXTREMADAMENT estricte amb la sincronització (sense solapament amb el diàleg), "
-        "amb l'adequació temporal (velocitat màxima recomanada d'11 caràcters per segon) i amb l'absència de redundàncies. "
-        "Comprova també que totes les audiodescripcions estan escrites en català natural."
-    )
-    input_content = f"""
-    # REGLAS UNE DE AUDIODESCRIPCIÓN:
-    {UNE_RULES}
-    # ARCHIVO SRT A EVALUAR (une_ad_{iteration}.srt):
-    {current_srt}
-    REGLAS DE RESPUESTA:
-    Tu respuesta debe ser *SOLAMENTE* un objeto JSON con dos claves:
-    1. "reliability_percentage": (float) El porcentaje de fiabilidad.
-    2. "qualitative_critique": (string) La crítica cualitativa y sugerencias de mejora.
-    Ejemplo de respuesta: {{"reliability_percentage": 75.0, "qualitative_critique": "El segmento 4 se solapa 0.34s con el diálogo de Sandra. El segmento 5 es demasiado genérico y no describe bien la acción."}}
-    """
-    # Llamada al LLM
-    response = llm.invoke(
-        [
-            SystemMessage(content=prompt),
-            HumanMessage(content=input_content)
-        ]
-    )
-    # Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
-    try:
-        cleaned_response = _strip_markdown_fences(response.content)
-        report = json.loads(cleaned_response)
-        if not isinstance(report, dict) or 'reliability_percentage' not in report:
-            raise ValueError("Estructura JSON incorrecta.")
-    except Exception as e:
-        logger.error(f"Error al parsear el JSON del Crítico: {e}. Respuesta: {response.content}")
-        report = {"reliability_percentage": 1.0, "qualitative_critique": "El Crítico no devolvió un JSON válido. Reintentar."}
-    logger.info(f"Crítico: Evaluación completada. Fiabilidad: {report.get('reliability_percentage')}%.")
-    mean_score, weighted_mean, eval_path = generate_evaluation_report(current_srt, iteration)
-    thinking_path = TEMP_DIR / f"thinking_{iteration}.txt"
-    if thinking_path.exists():
-        previous_text = thinking_path.read_text(encoding="utf-8")
-        thinking_path.write_text(
-            (
-                f"{previous_text}\n\nMitjana simple d'avaluació: {mean_score:.2f} / 7"
-                f"\nMitjana ponderada d'avaluació: {weighted_mean:.2f} / 7"
-            ),
-            encoding="utf-8",
-        )
-    best_iteration = state.get("best_iteration", -1)
-    best_weighted_mean = state.get("best_weighted_mean", -1.0)
-    best_srt_path = state.get("best_srt_path", state["current_srt_path"])
-    best_eval_path = state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv"))
-    if weighted_mean > best_weighted_mean:
-        best_iteration = iteration
-        best_weighted_mean = weighted_mean
-        best_srt_path = state["current_srt_path"]
-        best_eval_path = str(eval_path)
-    new_history = history + [
-        AIMessage(
-            content=(
-                "Crítico v{iter} completado. Fiabilidad: {reliab}%. "
-                "Mitjana simple: {mean:.2f}/7. Mitjana ponderada: {wmean:.2f}/7"
-            ).format(
-                iter=iteration,
-                reliab=report.get("reliability_percentage"),
-                mean=mean_score,
-                wmean=weighted_mean,
-            )
-        )
-    ]
-    return {
-        "iteration": iteration + 1,
-        "critic_report": report,
-        "history": new_history,
-        "evaluation_mean": weighted_mean,
-        "best_iteration": best_iteration,
-        "best_weighted_mean": best_weighted_mean,
-        "best_srt_path": best_srt_path,
-        "best_eval_path": best_eval_path,
-    }
-def identity_manager_agent(state: ReflectionState):
-    """
-    Agente que verifica coherencia entre hablantes en SRT, casting.csv y contexto visual.
-    Corrige asignaciones de hablantes y genera log de cambios.
-    """
-    iteration = state["iteration"]
-    # Cargar archivos
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    casting_path = TEMP_DIR / "casting.csv"
-    json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
-    # Verificar existencia de casting.csv
-    if not casting_path.exists():
-        logger.warning("Casting.csv no encontrado. Saltando identity_manager.")
-        return state
-    casting_content = casting_path.read_text(encoding="utf-8")
-    prompt = (
-        "Ets un Identity Manager. La teva tasca és:\n"
-        "1. Verificar que les assignacions de parlants a l'SRT coincideixen amb casting.csv\n"
-        "2. Comprovar que els parlants assignats són coherents amb el context visual de json_ad.json\n"
-        "3. Si trobes inconsistències, re-assigna els parlants corregint les etiquetes [Nom]\n"
-        "4. Justifica canvis al fitxer identity_log.txt\n"
-        "\n"
-        "Dades d'entrada:\n"
-        f"- CASTING.CSV:\n{casting_content}\n"
-        f"- JSON CONTEXT:\n{json_context}\n"
-        f"- SRT ACTUAL:\n{current_srt}\n"
-        "\n"
-        "REGLES:\n"
-        "- Només modifica les línies de diàleg (ex: [Nom])\n"
-        "- Manté la numeració i timestamps\n"
-        "- Si no hi ha canvis, retorna l'SRT original\n"
-        "\n"
-        "Format de sortida:\n"
-        "```json\n"
-        "{{\n"
-        "  \"srt_content\": \"<nou contingut SRT>\",\n"
-        "  \"log_message\": \"<explicació canvis o 'Sense canvis'>\"\n"
-        "}}\n"
-        "```"
-    )
-    response = llm.invoke([SystemMessage(content=prompt)])
-    try:
-        # Parsejar resposta JSON
-        cleaned = _strip_markdown_fences(response.content)
-        data = json.loads(cleaned)
-        new_srt = data["srt_content"]
-        log_msg = data["log_message"]
-        # Escriure log
-        log_path = TEMP_DIR / f"identity_log_{iteration}.txt"
-        log_path.write_text(f"Iteració {iteration}: {log_msg}", encoding="utf-8")
-        # Actualitzar SRT si hi ha canvis
-        if new_srt != current_srt:
-            new_srt_path = TEMP_DIR / f"une_ad_{iteration}_corrected.srt"
-            new_srt_path.write_text(new_srt, encoding="utf-8")
-            logger.info(f"Identity Manager: Correccions aplicades. Detalls: {log_msg}")
-            return {
-                **state,
-                "current_srt_path": str(new_srt_path)
-            }
-    except Exception as e:
-        logger.error(f"Error en identity_manager: {e}")
-    return state
-def background_descriptor_agent(state: ReflectionState):
-    """
-    Agente que verifica coherencia entre escenarios en SRT y scenarios.csv.
-    Corrige nombres de escenarios usando descripciones coherentes.
-    """
-    iteration = state["iteration"]
-    # Cargar archivos
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    scenarios_path = TEMP_DIR / "scenarios.csv"
-    # Verificar existencia de scenarios.csv
-    if not scenarios_path.exists():
-        logger.warning("Scenarios.csv no encontrado. Saltando background_descriptor.")
-        return state
-    scenarios_content = scenarios_path.read_text(encoding="utf-8")
-    prompt = (
-        "Ets un Background Descriptor. La teva tasca és:\n"
-        "1. Verificar que les descripcions d'escenaris a l'SRT coincideixen amb scenarios.csv\n"
-        "2. Si trobes coincidències, reemplaça les descripcions genèriques pel nom oficial de l'escenari\n"
-        "3. Justifica canvis al fitxer background_log.txt\n"
-        "\n"
-        "Dades d'entrada:\n"
-        f"- SCENARIOS.CSV:\n{scenarios_content}\n"
-        f"- SRT ACTUAL:\n{current_srt}\n"
-        "\n"
-        "REGLES:\n"
-        "- Només modifica línies d'audiodescripció (ex: (AD) ...)\n"
-        "- Manté la numeració i timestamps\n"
-        "- Si no hi ha canvis, retorna l'SRT original\n"
-        "\n"
-        "Format de sortida:\n"
-        "```json\n"
-        "{{\n"
-        "  \"srt_content\": \"<nou contingut SRT>\",\n"
-        "  \"log_message\": \"<explicació canvis o 'Sense canvis'>\"\n"
-        "}}\n"
-        "```"
-    )
-    response = llm.invoke([SystemMessage(content=prompt)])
-    try:
-        # Parsejar resposta JSON
-        cleaned = _strip_markdown_fences(response.content)
-        data = json.loads(cleaned)
-        new_srt = data["srt_content"]
-        log_msg = data["log_message"]
-        # Escriure log
-        log_path = TEMP_DIR / f"background_log_{iteration}.txt"
-        log_path.write_text(f"Iteració {iteration}: {log_msg}", encoding="utf-8")
-        # Actualitzar SRT si hi ha canvis
-        if new_srt != current_srt:
-            new_srt_path = TEMP_DIR / f"une_ad_{iteration}_scenario_corrected.srt"
-            new_srt_path.write_text(new_srt, encoding="utf-8")
-            logger.info(f"Background Descriptor: Correccions aplicades. Detalls: {log_msg}")
-            return {
-                **state,
-                "current_srt_path": str(new_srt_path)
-            }
-    except Exception as e:
-        logger.error(f"Error en background_descriptor: {e}")
-    return state
-# --- Condición de Salida del Bucle ---
-def should_continue(state: ReflectionState) -> str:
-    """
-    Función de chequeo que decide si continuar iterando o finalizar.
-    """
-    MAX_ITERATIONS = 5  # Número máximo de ciclos
-    MIN_AVERAGE_SCORE = 6.0  # Umbral de calidad sobre 7
-    iteration = state["iteration"]
-    mean_score = state.get("evaluation_mean", 0.0)
-    if mean_score >= MIN_AVERAGE_SCORE:
-        logger.info(f"FIN: Mitjana ponderada d'avaluació assolida ({mean_score:.2f} >= {MIN_AVERAGE_SCORE}).")
-        return "end"
-    if iteration >= MAX_ITERATIONS:
-        logger.info(f"FIN: S'ha assolit el màxim d'iteracions ({iteration} / {MAX_ITERATIONS}).")
-        return "end"
-    logger.info(f"CONTINUAR: Iteració {iteration} / {MAX_ITERATIONS}. Mitjana ponderada actual: {mean_score:.2f} / 7.")
-    return "continue"
-# --- Construcción de la Gráfica ---
-# 1. Configurar el estado inicial
-initial_state: ReflectionState = {
-    "iteration": 0,
-    "current_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
-    "critic_report": {"reliability_percentage": 0.0, "qualitative_critique": "Inicializando el proceso."},
-    "history": [],
-    "evaluation_mean": 0.0,
-    "best_iteration": -1,
-    "best_weighted_mean": -1.0,
-    "best_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
-    "best_eval_path": str(TEMP_DIR / "eval_0.csv"),
-}
-# 2. Definir la gráfica
-workflow = StateGraph(ReflectionState)
-# Nodos
-workflow.add_node("narrator", narrator_agent)
-workflow.add_node("identity_manager", identity_manager_agent)
-workflow.add_node("background_descriptor", background_descriptor_agent)
-workflow.add_node("critic", critic_agent)
-# Estructura del bucle: Narrator -> Identity Manager -> Background Descriptor -> Critic -> Check
-workflow.set_entry_point("narrator")
-workflow.add_edge("narrator", "identity_manager")
-workflow.add_edge("identity_manager", "background_descriptor")
-workflow.add_edge("background_descriptor", "critic")
-# Condición (puente de ramificación)
-workflow.add_conditional_edges(
-    "critic",
-    should_continue,
-    {
-        "continue": "narrator", # Si no se cumple el umbral/ciclo, vuelve al narrador
-        "end": END               # Si se cumple, termina
-    }
-)
-# Compilar la gráfica
-app = workflow.compile()
-def generate_free_ad_from_srt(srt_path: Path) -> Path:
-    """Genera una narración libre detallada a partir del SRT final."""
-    srt_content = srt_path.read_text(encoding="utf-8")
-    prompt = (
-        "Actua com una narradora professional d'audiodescripcions lliures. "
-        "A partir de l'SRT proporcionat, escriu un text narratiu en català que descrigui "
-        "de manera exhaustiva i fluida tot el que succeeix a la peça audiovisual. "
-        "Inclou accions, aparença, gestos, canvis d'escena i qualsevol detall rellevant, "
-        "sense limitar-te a les restriccions temporals del format SRT. "
-        "Evita repetir literalment els diàlegs, però contextualitza'ls quan sigui útil. "
-        "La narració ha de ser clara, coherent i apta per ser locutada com una narració lliure."
-    )
-    response = llm.invoke(
-        [
-            SystemMessage(content=prompt),
-            HumanMessage(
-                content=(
-                    "# SRT FINAL\n"
-                    f"{srt_content}\n\n"
-                    "Respon únicamente con la narració lliure sin cap comentario adicional."
-                )
-            ),
-        ]
-    )
-    free_ad_path = TEMP_DIR / "free_ad.txt"
-    free_ad_path.write_text(response.content, encoding="utf-8")
-    logger.info(f"Narració lliure generada en '{free_ad_path}'.")
-    return free_ad_path
-# --- Ejecución Principal ---
-if __name__ == "__main__":
-    # Inicializar el entorno
-    setup_files(INITIAL_SRT_CONTENT, CONTEXT_JSON_CONTENT)
-    logger.info("--- Comenzando el Bucle de Finetuning ---")
-    # Ejecutar la gráfica
-    final_state = app.invoke(initial_state)
-    logger.info("\n--- Bucle Finalizado ---")
-    best_iteration = final_state.get("best_iteration", -1)
-    best_weighted_mean = final_state.get("best_weighted_mean", 0.0)
-    best_srt_path = Path(final_state.get("best_srt_path", final_state['current_srt_path']))
-    best_eval_path = Path(final_state.get("best_eval_path", TEMP_DIR / "eval_0.csv"))
-    final_srt_path = TEMP_DIR / "une_ad.srt"
-    final_eval_path = TEMP_DIR / "eval.csv"
-    try:
-        shutil.copy(best_srt_path, final_srt_path)
-        logger.info(f"SRT final copiado a '{final_srt_path}'.")
-    except Exception as exc:
-        logger.error(f"No se pudo copiar el SRT final: {exc}")
-    try:
-        shutil.copy(best_eval_path, final_eval_path)
-        logger.info(f"Evaluación final copiada a '{final_eval_path}'.")
-    except Exception as exc:
-        logger.error(f"No se pudo copiar el CSV final: {exc}")
-    free_ad_path: Union[Path, None] = None
-    try:
-        free_ad_path = generate_free_ad_from_srt(final_srt_path)
-    except Exception as exc:
-        logger.error(f"No s'ha pogut generar la narració lliure: {exc}")
-    # Mostrar resultados
-    print(f"Número final de ciclos: {final_state['iteration']}")
-    print(f"Iteración óptima: {best_iteration} (mitjana ponderada {best_weighted_mean:.2f}/7)")
-    print(f"Ruta al SRT final: {final_srt_path}")
-    print(f"Ruta a l'avaluació final: {final_eval_path}")
-    if free_ad_path is not None:
-        print(f"Ruta a la narració lliure: {free_ad_path}")
-    else:
-        print("No s'ha pogut generar la narració lliure.")
-    # Mostrar el SRT final generado
-    print("\n--- Contenido del SRT Final ---")
-    print(final_srt_path.read_text(encoding="utf-8"))
-    if free_ad_path is not None:
-        print("\n--- Narració Lliure ---")
-        print(free_ad_path.read_text(encoding="utf-8"))

finetuning/lora.py DELETED Viewed

@@ -1,219 +0,0 @@
-import os
-import argparse
-from pathlib import Path
-from typing import List, Dict
-from datasets import Dataset
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    TrainingArguments,
-    Trainer,
-)
-from peft import LoraConfig, get_peft_model
-BASE_DIR = Path(__file__).resolve().parent
-DATA_DIR = BASE_DIR / "data"
-def find_training_pairs(data_dir: Path) -> List[Dict[str, str]]:
-    """Recorre las subcarpetas de data_dir y busca pares target_une_ad.srt / free_ad.txt.
-    Cada ejemplo se formatea como una instrucción estilo instruct, usando el SRT como entrada
-    y la narración libre como salida.
-    """
-    examples: List[Dict[str, str]] = []
-    if not data_dir.exists():
-        raise FileNotFoundError(f"Data dir not found: {data_dir}")
-    for item in sorted(data_dir.iterdir()):
-        if not item.is_dir():
-            continue
-        srt_path = item / "target_une_ad.srt"
-        free_path = item / "free_ad.txt"
-        if not srt_path.exists() or not free_path.exists():
-            continue
-        srt_text = srt_path.read_text(encoding="utf-8")
-        free_text = free_path.read_text(encoding="utf-8")
-        # Formato tipo instruction-tuning, en catalán, coherente con la tarea
-        prompt = (
-            "Converteix el següent fitxer SRT d'audiodescripció UNE (amb restriccions temporals) "
-            "en una narració lliure detallada en català, sense límits de temps. "
-            "Mantén tota la informació visual rellevant però amb un to fluid i natural.\n\n"
-            "### SRT UNE\n" + srt_text.strip() + "\n\n### Narració lliure:"
-        )
-        examples.append({"prompt": prompt, "output": free_text.strip()})
-    if not examples:
-        raise RuntimeError(f"No training pairs found in {data_dir} (expected target_une_ad.srt + free_ad.txt)")
-    return examples
-def build_dataset(pairs: List[Dict[str, str]], tokenizer: AutoTokenizer, max_length: int = 2048) -> Dataset:
-    """Construye un Dataset de Hugging Face a partir de los pares prompt/output.
-    Se concatena en una sola secuencia para entrenamiento causal:
-    [PROMPT] + [OUTPUT] + eos
-    y se enmascaran los tokens del prompt para que la loss sólo se compute sobre la salida.
-    """
-    def _gen():
-        for ex in pairs:
-            yield {"prompt": ex["prompt"], "output": ex["output"]}
-    raw_ds = Dataset.from_generator(_gen)
-    def tokenize_fn(batch):
-        prompts = batch["prompt"]
-        outputs = batch["output"]
-        input_ids_list = []
-        labels_list = []
-        for p, o in zip(prompts, outputs):
-            full_text = p + "\n" + o + tokenizer.eos_token
-            enc = tokenizer(
-                full_text,
-                truncation=True,
-                max_length=max_length,
-                padding="max_length",
-            )
-            # Máscara: ignorar loss en tokens del prompt
-            prompt_ids = tokenizer(p + "\n", truncation=True, max_length=max_length)["input_ids"]
-            prompt_len = min(len(prompt_ids), max_length)
-            labels = enc["input_ids"].copy()
-            for i in range(prompt_len):
-                labels[i] = -100
-            input_ids_list.append(enc["input_ids"])
-            labels_list.append(labels)
-        return {"input_ids": input_ids_list, "attention_mask": [([1] * max_length)] * len(input_ids_list), "labels": labels_list}
-    tokenized = raw_ds.map(tokenize_fn, batched=True, remove_columns=["prompt", "output"])
-    return tokenized
-def create_lora_model(base_model_name: str, r: int = 16, alpha: int = 32, dropout: float = 0.05):
-    model = AutoModelForCausalLM.from_pretrained(
-        base_model_name,
-        torch_dtype="auto",
-        device_map="auto",
-    )
-    lora_config = LoraConfig(
-        r=r,
-        lora_alpha=alpha,
-        lora_dropout=dropout,
-        bias="none",
-        task_type="CAUSAL_LM",
-    )
-    model = get_peft_model(model, lora_config)
-    return model
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Fine-tuning LoRA per a salamandra-instruct-7b amb dades UNE/free AD")
-    parser.add_argument(
-        "--base_model",
-        type=str,
-        default="projecte-aina/salamandra-instruct-7b",
-        help="Nom o ruta del model base (HF hub o path local)",
-    )
-    parser.add_argument(
-        "--data_dir",
-        type=str,
-        default=str(DATA_DIR),
-        help="Directori base amb subcarpetes que contenen target_une_ad.srt i free_ad.txt",
-    )
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        default=str(BASE_DIR / "lora_output"),
-        help="Directori on desar l'adapter LoRA",
-    )
-    parser.add_argument("--batch_size", type=int, default=1)
-    parser.add_argument("--gradient_accumulation", type=int, default=8)
-    parser.add_argument("--epochs", type=int, default=3)
-    parser.add_argument("--lr", type=float, default=2e-4)
-    parser.add_argument("--max_length", type=int, default=2048)
-    parser.add_argument("--warmup_ratio", type=float, default=0.03)
-    parser.add_argument("--logging_steps", type=int, default=10)
-    parser.add_argument("--save_steps", type=int, default=200)
-    parser.add_argument("--eval_steps", type=int, default=200)
-    parser.add_argument("--r", type=int, default=16, help="Rank de LoRA")
-    parser.add_argument("--alpha", type=int, default=32, help="Alpha de LoRA")
-    parser.add_argument("--dropout", type=float, default=0.05, help="Dropout de LoRA")
-    return parser.parse_args()
-def main():
-    args = parse_args()
-    data_dir = Path(args.data_dir)
-    output_dir = Path(args.output_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-    print(f"[lora] Buscant dades a: {data_dir}")
-    pairs = find_training_pairs(data_dir)
-    print(f"[lora] Nombre d'exemples trobats: {len(pairs)}")
-    print(f"[lora] Carregant tokenizer de {args.base_model}")
-    tokenizer = AutoTokenizer.from_pretrained(args.base_model, use_fast=True)
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    print("[lora] Construint dataset tokenitzat...")
-    dataset = build_dataset(pairs, tokenizer, max_length=args.max_length)
-    print(f"[lora] Carregant model base {args.base_model} i aplicant LoRA...")
-    model = create_lora_model(args.base_model, r=args.r, alpha=args.alpha, dropout=args.dropout)
-    training_args = TrainingArguments(
-        output_dir=str(output_dir),
-        per_device_train_batch_size=args.batch_size,
-        gradient_accumulation_steps=args.gradient_accumulation,
-        num_train_epochs=args.epochs,
-        learning_rate=args.lr,
-        warmup_ratio=args.warmup_ratio,
-        logging_steps=args.logging_steps,
-        save_steps=args.save_steps,
-        evaluation_strategy="steps",
-        eval_steps=args.eval_steps,
-        save_total_limit=2,
-        bf16=True,
-        gradient_checkpointing=True,
-        report_to=[],
-    )
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=dataset,
-        eval_dataset=None,
-        tokenizer=tokenizer,
-    )
-    print("[lora] Iniciant entrenament...")
-    trainer.train()
-    print("[lora] Guardant adapter LoRA...")
-    model.save_pretrained(str(output_dir))
-    tokenizer.save_pretrained(str(output_dir))
-    print(f"[lora] Entrenament completat. Adapter guardat a {output_dir}")
-if __name__ == "__main__":
-    main()

finetuning/reflection.py DELETED Viewed

@@ -1,520 +0,0 @@
-import os
-import csv
-import json
-import logging
-import shutil
-from pathlib import Path
-from typing import TypedDict, Annotated, List, Dict, Union
-from langgraph.graph import StateGraph, END
-from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
-from langchain_openai import ChatOpenAI
-from operator import itemgetter
-# --- Configuración y Herramientas ---
-# Directorios de trabajo
-BASE_DIR = Path(__file__).resolve().parent
-TEMP_DIR = BASE_DIR / "temp"
-TEMP_DIR.mkdir(exist_ok=True)
-LOG_FILE = TEMP_DIR / "reflection.log"
-# Configurar el logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(levelname)s: %(message)s',
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler(LOG_FILE, encoding="utf-8")
-    ],
-)
-logger = logging.getLogger(__name__)
-# Asegúrate de configurar tu API Key.
-# En un entorno real, usa os.environ["OPENAI_API_KEY"]
-# Aquí usamos un placeholder para la demostración.
-if "OPENAI_API_KEY" not in os.environ:
-    logger.warning("OPENAI_API_KEY no está configurada. Usando un placeholder.")
-    os.environ["OPENAI_API_KEY"] = "sk-..."
-# Inicializar LLM (se usa GPT-4o por su capacidad de razonamiento)
-# En producción, considera un modelo que soporte tus tokens y latencia requeridas.
-llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
-# --- Ficheros de Ejemplo ---
-# Fichero SRT inicial (Narrador)
-INITIAL_SRT_CONTENT = """
-1
-00:00:00,000 --> 00:00:05,340
-[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final.
-2
-00:00:04,340 --> 00:00:05,790
-[Lucía] Hem de donar-ho tot.
-3
-00:00:05,790 --> 00:00:08,790
-[Sandra] Ho sé, ho sé.
-4
-00:00:08,000 --> 00:00:10,000
-(AD) De sobte, són al parc.
-5
-00:00:10,000 --> 00:00:14,000
-(AD) Ara tallen menjar i fan una amanida a una cuina.
-"""
-# Fichero JSON de contexto (ejemplo de la respuesta anterior, pero simplificado para el Narrador)
-CONTEXT_JSON_CONTENT = """
-{
-  "segments": [
-    {"id": 1, "start": "00:00:00,000", "end": "00:00:05,340", "type": "dialog", "text": "[Sandra] Però de veritat crec que aquest projecte canviarà la nostra nota final."},
-    {"id": 2, "start": "00:00:04,340", "end": "00:00:05,790", "type": "dialog", "text": "[Lucía] Hem de donar-ho tot."},
-    {"id": 3, "start": "00:00:05,790", "end": "00:00:08,790", "type": "dialog", "text": "[Sandra] Ho sé, ho sé."},
-    {"id": 4, "start": "00:00:08,000", "end": "00:00:10,000", "type": "visual_context", "text": "Cambio de escena a un parque. Personajes caminando."},
-    {"id": 5, "start": "00:00:10,000", "end": "00:00:14,000", "type": "visual_context", "text": "Escena en una cocina. Los personajes están cortando vegetales y haciendo una ensalada."}
-  ]
-}
-"""
-# Fichero de Reglas UNE (Norma Técnica para el Crítico)
-# Nota: Aquí se usa un resumen de las reglas pertinentes para un LLM.
-UNE_RULES = """
-### Reglas UNE de Audiodescripción (Para el Crítico)
-1.  **Objetividad y Foco Visual:** La descripción debe ser puramente objetiva, describiendo solo lo que se ve. Debe priorizar la acción y los elementos relevantes (personajes, objetos, localización).
-2.  **Tiempo y Espacio (Sincronización):** Las audiodescripciones (AD) deben insertarse en los silencios del diálogo. El tiempo de la AD (entre START y END) debe ser suficiente para narrar el contenido sin solaparse con el diálogo o la música importante.
-3.  **Concisión y Claridad:** Usar lenguaje simple y conciso. Evitar redundancias y juicios de valor.
-4.  **Formato:** Cada segmento de AD debe tener un formato SRT válido, incluyendo el marcador (AD) al principio de la línea de texto.
-5.  **Utilidad:** Cada segmento de AD debe ser útil para la comprensión y nunca ser redundante. En caso de repetir algo ya explicado antes, mejor no decir nada.
-"""
-EVALUATION_CRITERIA = [
-    "Precisió Descriptiva",
-    "Sincronització Temporal",
-    "Claredat i Concisió",
-    "Inclusió de Diàleg/So",
-    "Contextualització",
-    "Flux i Ritme de la Narració",
-]
-CRITERIA_WEIGHTS = {
-    "Precisió Descriptiva": 1,
-    "Sincronització Temporal": 4,
-    "Claredat i Concisió": 1,
-    "Inclusió de Diàleg/So": 1,
-    "Contextualització": 1,
-    "Flux i Ritme de la Narració": 1,
-}
-# Inicializar ficheros para la ejecución
-def setup_files(initial_srt_content: str, context_json_content: str):
-    """Crea los ficheros iniciales necesarios en el sistema de archivos local."""
-    (TEMP_DIR / "une_ad_0.srt").write_text(initial_srt_content, encoding="utf-8")
-    (TEMP_DIR / "json_ad.json").write_text(context_json_content, encoding="utf-8")
-    logger.info("Ficheros iniciales 'une_ad_0.srt' y 'json_ad.json' creados.")
-# --- Utilidades ---
-def _strip_markdown_fences(content: str) -> str:
-    """Elimina fences ```...``` alrededor de una respuesta JSON si existen."""
-    text = content.strip()
-    if text.startswith("```"):
-        lines = text.splitlines()
-        # descartar primera línea con ``` o ```json
-        lines = lines[1:]
-        # eliminar el cierre ``` (pueden existir varias líneas en blanco finales)
-        while lines and lines[-1].strip() == "```":
-            lines.pop()
-        text = "\n".join(lines).strip()
-    return text
-def generate_evaluation_report(srt_content: str, iteration: int) -> tuple[float, float, Path]:
-    """Solicita al LLM una avaluació estructurada i guarda'n el CSV."""
-    criteria_formatted = "\n".join(f"- {name}" for name in EVALUATION_CRITERIA)
-    prompt = (
-        "Actua com un auditor UNE. Avalua l'SRT generat, puntuant cada característica de 0 a 7 "
-        "segons la qualitat observada. Dónega justificació breve però concreta per a cada cas. "
-        "Les característiques obligatòries són:\n"
-        f"{criteria_formatted}\n"
-        "Retorna ÚNICAMENT un array JSON d'objectes amb les claus: "
-        "'caracteristica', 'valoracio' (nombre enter de 0 a 7) i 'justificacio'."
-    )
-    response = llm.invoke(
-        [
-            SystemMessage(content=prompt),
-            HumanMessage(
-                content=(
-                    "# SRT AVALUAT\n"
-                    f"{srt_content}\n\n"
-                    "Assegura't de complir el format indicat."
-                )
-            ),
-        ]
-    )
-    cleaned = _strip_markdown_fences(response.content)
-    try:
-        data = json.loads(cleaned)
-        if not isinstance(data, list):
-            raise ValueError("La resposta no és una llista.")
-    except Exception as exc:
-        logger.error(
-            "Error al generar l'avaluació estructurada: %s. Resposta original: %s",
-            exc,
-            response.content,
-        )
-        data = [
-            {
-                "caracteristica": "Avaluació fallida",
-                "valoracio": 1,
-                "justificacio": "No s'ha pogut obtenir l'avaluació del LLM.",
-            }
-        ]
-    eval_path = TEMP_DIR / f"eval_{iteration}.csv"
-    with eval_path.open("w", encoding="utf-8", newline="") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(["Caracteristica", "Valoracio (0-7)", "Justificacio"])
-        for item in data:
-            writer.writerow(
-                [
-                    item.get("caracteristica", ""),
-                    item.get("valoracio", 0),
-                    item.get("justificacio", ""),
-                ]
-            )
-    scores = []
-    weighted_sum = 0.0
-    total_weight = 0.0
-    for entry in data:
-        if not isinstance(entry, dict):
-            continue
-        try:
-            score = float(entry.get("valoracio", 0))
-        except (TypeError, ValueError):
-            score = 0.0
-        scores.append(score)
-        weight = CRITERIA_WEIGHTS.get(entry.get("caracteristica", ""), 1)
-        weighted_sum += score * weight
-        total_weight += weight
-    mean_score = sum(scores) / len(scores) if scores else 0.0
-    weighted_mean = weighted_sum / total_weight if total_weight else mean_score
-    return mean_score, weighted_mean, eval_path
-# --- Definición del Estado de la Gráfica (StateGraph) ---
-class ReflectionState(TypedDict):
-    """Representa el estado del bucle de reflexión."""
-    iteration: int  # Ciclo actual (empezando en 0)
-    current_srt_path: str  # Ruta al archivo SRT actual (e.g., une_ad_0.srt, une_ad_1.srt)
-    critic_report: Dict[str, Union[float, str]]  # Último informe del crítico (puntuación y texto)
-    history: List[SystemMessage] # Historial de mensajes entre agentes
-    evaluation_mean: float
-    best_iteration: int
-    best_weighted_mean: float
-    best_srt_path: str
-    best_eval_path: str
-# --- Nodos/Agentes de la Gráfica ---
-def narrator_agent(state: ReflectionState):
-    """
-    Agente que genera o reescribe el SRT.
-    - En el ciclo 0, genera el SRT inicial.
-    - En ciclos > 0, reescribe el SRT basándose en el critic_report.
-    """
-    iteration = state["iteration"]
-    critic_report = state["critic_report"]
-    history = state["history"]
-    # Cargar contexto y último SRT
-    json_context = (TEMP_DIR / "json_ad.json").read_text(encoding="utf-8")
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    # 1. Definir el prompt
-    if iteration == 0:
-        # Tarea inicial (aunque en este caso ya se proporciona une_ad_0.srt)
-        # Aquí se simula la generación inicial.
-        prompt = (
-            "Ets un Narrador expert en Audiodescripció (AD). La teva tasca inicial és generar "
-            "un fitxer SRT d'audiodescripcions basat en el JSON de context visual. "
-            "TOT I AIXÍ, per a aquesta primera iteració, l'SRT ja s'ha generat. "
-            "Simplement retorna el contingut de 'une_ad_0.srt' com si fos la teva sortida. "
-            "Assegura't que totes les audiodescripcions estiguin en català i que cadascuna pugui ser locutada "
-            "dins del temps disponible (utilitza un màxim aproximat d'11 caràcters per segon). Si el tram de temps "
-            "és massa curt (<1.5s), combina'l amb el bloc d'AD més proper i ajusta els timestamps perquè la narració sigui fluida. "
-            "Evita redundàncies: no repeteixis informació ja descrita en segments d'AD anteriors o al diàleg, i elimina qualsevol detall que no sigui essencial."
-        )
-        output_srt = current_srt
-        reflection_text = "Generación inicial. No hay reflexión."
-    else:
-        # Tarea de reflexión
-        prompt = (
-            "Ets un Narrador expert en Audiodescripció (AD). Has rebut una crítica sobre la teva última versió de l'SRT. "
-            "La teva tasca és REESCRIURE el contingut d'audiodescripció (línies amb '(AD)') del fitxer SRT, "
-            "assegurant que sigui coherent amb el JSON de context i, sobretot, que CORREGEIXIS TOTS els problemes "
-            "mencionats a l'Informe Crític adjunt. Mantén intactes els diàlegs (línies amb [Nom]) i escriu totes les audiodescripcions en català natural. "
-            "Garanteix que cada bloc d'AD pugui ser locutat dins del seu interval temporal disponible considerant un màxim d'11 caràcters per segon. "
-            "Si l'interval és massa curt (<1.5s), fusiona'l amb el bloc d'AD anterior o posterior més proper i ajusta els timestamps perquè quedin contínues. "
-            "Prefereix frases concises i accionables, prioritzant la informació visual essencial, i elimina redundàncies amb AD anteriors o amb els diàlegs."
-        )
-        # Concatenar la entrada para el LLM
-        input_content = f"""
-        # INFORME CRÍTICO
-        Porcentaje de Fiabilidad Anterior: {critic_report.get('reliability_percentage')}
-        Crítica Cualitativa: {critic_report.get('qualitative_critique')}
-        # JSON DE CONTEXTO VISUAL (Guía para la AD)
-        {json_context}
-        # ÚLTIMO ARCHIVO SRT GENERADO (une_ad_{iteration-1}.srt)
-        {current_srt}
-        REGLAS: Tu respuesta debe ser *SOLAMENTE* el contenido completo del nuevo archivo SRT (incluyendo diálogos), sin ningún comentario o explicación adicional.
-        """
-        # Llamada al LLM
-        response = llm.invoke(
-            [
-                SystemMessage(content=prompt),
-                HumanMessage(content=input_content)
-            ]
-        )
-        output_srt = response.content
-        reflection_text = f"Reescrito en base al informe crítico: {critic_report.get('qualitative_critique', 'N/A')}"
-    # 2. Guardar la nueva salida
-    new_srt_path = TEMP_DIR / f"une_ad_{iteration}.srt"
-    new_srt_path.write_text(output_srt, encoding="utf-8")
-    # 3. Guardar el pensamiento (reflection_text)
-    (TEMP_DIR / f"thinking_{iteration}.txt").write_text(reflection_text, encoding="utf-8")
-    logger.info(f"Narrador: Generada la versión {iteration} del SRT en '{new_srt_path}'.")
-    # 4. Actualizar el estado
-    new_history = history + [AIMessage(content=f"Narrador v{iteration} completado. Razón de reflexión: {reflection_text}")]
-    return {
-        "iteration": iteration,
-        "current_srt_path": str(new_srt_path),
-        "history": new_history,
-        "evaluation_mean": state.get("evaluation_mean", 0.0),
-        "best_iteration": state.get("best_iteration", -1),
-        "best_weighted_mean": state.get("best_weighted_mean", 0.0),
-        "best_srt_path": state.get("best_srt_path", str(new_srt_path)),
-        "best_eval_path": state.get("best_eval_path", str(TEMP_DIR / f"eval_{iteration}.csv")),
-    }
-def critic_agent(state: ReflectionState):
-    """
-    Agente que evalúa la calidad del SRT generado por el Narrador basándose en las Reglas UNE.
-    Devuelve una puntuación y una crítica cualitativa.
-    """
-    iteration = state["iteration"]
-    history = state["history"]
-    current_srt = Path(state["current_srt_path"]).read_text(encoding="utf-8")
-    prompt = (
-        "Ets un Crític d'Audiodescripció molt estricte. La teva tasca és avaluar l'SRT adjunt "
-        "únicament segons les Regles UNE proporcionades. L'avaluació ha de ser doble: "
-        "1. **Numèrica**: Un percentatge de fiabilitat (ex. 85.5) de 0 a 100%. "
-        "2. **Qualitativa**: Una crítica constructiva sobre les principals mancances de les AD respecte a les regles. "
-        "Has de ser EXTREMADAMENT estricte amb la sincronització (sense solapament amb el diàleg), "
-        "amb l'adequació temporal (velocitat màxima recomanada d'11 caràcters per segon) i amb l'absència de redundàncies. "
-        "Comprova també que totes les audiodescripcions estan escrites en català natural."
-    )
-    input_content = f"""
-    # REGLAS UNE DE AUDIODESCRIPCIÓN:
-    {UNE_RULES}
-    # ARCHIVO SRT A EVALUAR (une_ad_{iteration}.srt):
-    {current_srt}
-    REGLAS DE RESPUESTA:
-    Tu respuesta debe ser *SOLAMENTE* un objeto JSON con dos claves:
-    1. "reliability_percentage": (float) El porcentaje de fiabilidad.
-    2. "qualitative_critique": (string) La crítica cualitativa y sugerencias de mejora.
-    Ejemplo de respuesta: {{"reliability_percentage": 75.0, "qualitative_critique": "El segmento 4 se solapa 0.34s con el diálogo de Sandra. El segmento 5 es demasiado genérico y no describe bien la acción."}}
-    """
-    # Llamada al LLM
-    response = llm.invoke(
-        [
-            SystemMessage(content=prompt),
-            HumanMessage(content=input_content)
-        ]
-    )
-    # Intentar parsear la respuesta del LLM (puede fallar, por eso se usa un try/except)
-    try:
-        cleaned_response = _strip_markdown_fences(response.content)
-        report = json.loads(cleaned_response)
-        if not isinstance(report, dict) or 'reliability_percentage' not in report:
-            raise ValueError("Estructura JSON incorrecta.")
-    except Exception as e:
-        logger.error(f"Error al parsear el JSON del Crítico: {e}. Respuesta: {response.content}")
-        report = {"reliability_percentage": 1.0, "qualitative_critique": "El Crítico no devolvió un JSON válido. Reintentar."}
-    logger.info(f"Crítico: Evaluación completada. Fiabilidad: {report.get('reliability_percentage')}%.")
-    mean_score, weighted_mean, eval_path = generate_evaluation_report(current_srt, iteration)
-    thinking_path = TEMP_DIR / f"thinking_{iteration}.txt"
-    if thinking_path.exists():
-        previous_text = thinking_path.read_text(encoding="utf-8")
-        thinking_path.write_text(
-            (
-                f"{previous_text}\n\nMitjana simple d'avaluació: {mean_score:.2f} / 7"
-                f"\nMitjana ponderada d'avaluació: {weighted_mean:.2f} / 7"
-            ),
-            encoding="utf-8",
-        )
-    best_iteration = state.get("best_iteration", -1)
-    best_weighted_mean = state.get("best_weighted_mean", -1.0)
-    best_srt_path = state.get("best_srt_path", state["current_srt_path"])
-    best_eval_path = state.get("best_eval_path", str(eval_path))
-    if weighted_mean > best_weighted_mean:
-        best_iteration = iteration
-        best_weighted_mean = weighted_mean
-        best_srt_path = state["current_srt_path"]
-        best_eval_path = str(eval_path)
-    new_history = history + [
-        AIMessage(
-            content=(
-                "Crítico v{iter} completado. Fiabilidad: {reliab}%. "
-                "Mitjana simple: {mean:.2f}/7. Mitjana ponderada: {wmean:.2f}/7"
-            ).format(
-                iter=iteration,
-                reliab=report.get("reliability_percentage"),
-                mean=mean_score,
-                wmean=weighted_mean,
-            )
-        )
-    ]
-    return {
-        "iteration": iteration + 1,
-        "critic_report": report,
-        "history": new_history,
-        "evaluation_mean": weighted_mean,
-        "best_iteration": best_iteration,
-        "best_weighted_mean": best_weighted_mean,
-        "best_srt_path": best_srt_path,
-        "best_eval_path": best_eval_path,
-    }
-# --- Condición de Salida del Bucle ---
-def should_continue(state: ReflectionState) -> str:
-    """
-    Función de chequeo que decide si continuar iterando o finalizar.
-    """
-    MAX_ITERATIONS = 5  # Número máximo de ciclos
-    MIN_AVERAGE_SCORE = 6.0  # Umbral de calidad sobre 7
-    iteration = state["iteration"]
-    mean_score = state.get("evaluation_mean", 0.0)
-    if mean_score >= MIN_AVERAGE_SCORE:
-        logger.info(f"FIN: Mitjana ponderada d'avaluació assolida ({mean_score:.2f} >= {MIN_AVERAGE_SCORE}).")
-        return "end"
-    if iteration >= MAX_ITERATIONS:
-        logger.info(f"FIN: S'ha assolit el màxim d'iteracions ({iteration} / {MAX_ITERATIONS}).")
-        return "end"
-    logger.info(f"CONTINUAR: Iteració {iteration} / {MAX_ITERATIONS}. Mitjana ponderada actual: {mean_score:.2f} / 7.")
-    return "continue"
-# --- Construcción de la Gráfica ---
-# 1. Configurar el estado inicial
-initial_state: ReflectionState = {
-    "iteration": 0,
-    "current_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
-    "critic_report": {"reliability_percentage": 0.0, "qualitative_critique": "Inicializando el proceso."},
-    "history": [],
-    "evaluation_mean": 0.0,
-    "best_iteration": -1,
-    "best_weighted_mean": -1.0,
-    "best_srt_path": str(TEMP_DIR / "une_ad_0.srt"),
-    "best_eval_path": str(TEMP_DIR / "eval_0.csv"),
-}
-# 2. Definir la gráfica
-workflow = StateGraph(ReflectionState)
-# Nodos
-workflow.add_node("narrator", narrator_agent)
-workflow.add_node("critic", critic_agent)
-# Estructura del bucle: Narrator -> Critic -> Check
-workflow.set_entry_point("narrator")
-workflow.add_edge("narrator", "critic")
-# Condición (puente de ramificación)
-workflow.add_conditional_edges(
-    "critic",
-    should_continue,
-    {
-        "continue": "narrator", # Si no se cumple el umbral/ciclo, vuelve al narrador
-        "end": END               # Si se cumple, termina
-    }
-)
-# Compilar la gráfica
-app = workflow.compile()
-# --- Ejecución Principal ---
-if __name__ == "__main__":
-    # Inicializar el entorno
-    setup_files(INITIAL_SRT_CONTENT, CONTEXT_JSON_CONTENT)
-    logger.info("--- Comenzando el Bucle de Reflexión ---")
-    # Ejecutar la gráfica
-    final_state = app.invoke(initial_state)
-    logger.info("\n--- Bucle Finalizado ---")
-    best_iteration = final_state.get("best_iteration", -1)
-    best_weighted_mean = final_state.get("best_weighted_mean", 0.0)
-    best_srt_path = Path(final_state.get("best_srt_path", final_state['current_srt_path']))
-    best_eval_path = Path(final_state.get("best_eval_path", TEMP_DIR / "eval_0.csv"))
-    final_srt_path = TEMP_DIR / "une_ad.srt"
-    final_eval_path = TEMP_DIR / "eval.csv"
-    try:
-        shutil.copy(best_srt_path, final_srt_path)
-        logger.info(f"SRT final copiado a '{final_srt_path}'.")
-    except Exception as exc:
-        logger.error(f"No se pudo copiar el SRT final: {exc}")
-    try:
-        shutil.copy(best_eval_path, final_eval_path)
-        logger.info(f"Evaluación final copiada a '{final_eval_path}'.")
-    except Exception as exc:
-        logger.error(f"No se pudo copiar el CSV final: {exc}")
-    # Mostrar resultados
-    print(f"Número final de ciclos: {final_state['iteration']}")
-    print(f"Iteración òptima: {best_iteration} (mitjana ponderada {best_weighted_mean:.2f}/7)")
-    print(f"Ruta al SRT final: {final_srt_path}")
-    print(f"Ruta a l'avaluació final: {final_eval_path}")
-    # Mostrar el SRT final generado
-    print("\n--- Contenido del SRT Final ---")
-    print(final_srt_path.read_text(encoding="utf-8"))

finetuning/video_analysis.py DELETED Viewed

@@ -1,189 +0,0 @@
-from __future__ import annotations
-import re
-from dataclasses import dataclass
-from datetime import timedelta
-from typing import List, Optional, Dict, Any
-TIME_RE = re.compile(
-    r"(?P<start>\d{2}:\d{2}:\d{2}[,\.]\d{3})\s*-->\s*(?P<end>\d{2}:\d{2}:\d{2}[,\.]\d{3})"
-)
-@dataclass
-class SRTBlock:
-    index: int
-    start: float  # seconds
-    end: float    # seconds
-    text: str
-def _parse_timestamp(ts: str) -> float:
-    """Convierte 'HH:MM:SS,mmm' o 'HH:MM:SS.mmm' a segundos (float)."""
-    ts = ts.replace(",", ".")
-    h, m, s = ts.split(":")
-    seconds, millis = (s.split("." ) + ["0"])[:2]
-    td = timedelta(
-        hours=int(h),
-        minutes=int(m),
-        seconds=int(seconds),
-        milliseconds=int(millis.ljust(3, "0")),
-    )
-    return td.total_seconds()
-def _parse_srt(srt_text: str) -> List[SRTBlock]:
-    """Parsea texto SRT en una lista de bloques SRTBlock."""
-    srt_text = srt_text.replace("\r\n", "\n").replace("\r", "\n")
-    chunks = [c.strip() for c in re.split(r"\n\s*\n", srt_text) if c.strip()]
-    blocks: List[SRTBlock] = []
-    for chunk in chunks:
-        lines = chunk.split("\n")
-        idx_line = 0
-        index = None
-        if lines and lines[0].strip().isdigit():
-            index = int(lines[0].strip())
-            idx_line = 1
-        time_match = None
-        time_line_idx = None
-        for i in range(idx_line, min(idx_line + 3, len(lines))):
-            m = TIME_RE.search(lines[i])
-            if m:
-                time_match = m
-                time_line_idx = i
-                break
-        if not time_match or time_line_idx is None:
-            continue
-        start = _parse_timestamp(time_match.group("start"))
-        end = _parse_timestamp(time_match.group("end"))
-        if index is None:
-            index = len(blocks) + 1
-        text = "\n".join(lines[time_line_idx + 1 :]).strip()
-        blocks.append(SRTBlock(index=index, start=start, end=end, text=text))
-    return blocks
-def analyze_srt(
-    srt_text: str,
-    *,
-    ad_markers: Optional[List[str]] = None,
-) -> Dict[str, Any]:
-    """Analiza un SRT y devuelve métricas básicas.
-    Métricas devueltas:
-      - duration_sec: duración total estimada del vídeo (segundos)
-      - words_per_min: número de palabras por minuto
-      - speakers_blocks_per_min: número de bloques de diálogo por minuto
-      - ad_time_ratio: porcentaje (0..1) del tiempo total con bloques marcados como AD
-      - blocks_per_min: número total de bloques por minuto
-    Heurísticas:
-      - Se asume que la duración del vídeo es el final del último bloque.
-      - Un "bloque de AD" es aquel cuya primera línea contiene alguno de los
-        marcadores indicados en `ad_markers` (por ejemplo: "[AD]", "AD:", "(AD)").
-    """
-    blocks = _parse_srt(srt_text)
-    if not blocks:
-        return {
-            "duration_sec": 0.0,
-            "words_per_min": 0.0,
-            "speakers_blocks_per_min": 0.0,
-            "ad_time_ratio": 0.0,
-            "blocks_per_min": 0.0,
-        }
-    duration_sec = max(b.end for b in blocks)
-    duration_min = max(duration_sec / 60.0, 1e-6)
-    # Palabras totales
-    total_words = 0
-    for b in blocks:
-        total_words += len(b.text.split())
-    # Bloques considerados de "hablante" (no AD)
-    if ad_markers is None:
-        ad_markers = ["[AD]", "AD:", "(AD)"]
-    def is_ad_block(block: SRTBlock) -> bool:
-        first_line = (block.text.splitlines() or [""])[0].strip().upper()
-        for mk in ad_markers:
-            if mk.upper() in first_line:
-                return True
-        return False
-    ad_time = 0.0
-    speech_blocks = 0
-    for b in blocks:
-        if is_ad_block(b):
-            ad_time += max(0.0, b.end - b.start)
-        else:
-            speech_blocks += 1
-    words_per_min = total_words / duration_min
-    speakers_blocks_per_min = speech_blocks / duration_min
-    blocks_per_min = len(blocks) / duration_min
-    ad_time_ratio = ad_time / duration_sec if duration_sec > 0 else 0.0
-    return {
-        "duration_sec": float(duration_sec),
-        "words_per_min": float(words_per_min),
-        "speakers_blocks_per_min": float(speakers_blocks_per_min),
-        "ad_time_ratio": float(ad_time_ratio),
-        "blocks_per_min": float(blocks_per_min),
-    }
-def embed_srt_sentences(
-    srt_text: str,
-    *,
-    model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
-) -> Dict[str, Any]:
-    """Devuelve embeddings para las frases de un SRT.
-    Args:
-        srt_text: Contenido completo del archivo SRT como string.
-        model_name: Nombre del modelo de sentence-transformers a usar.
-    Returns:
-        Diccionario con:
-          - "model_name": nombre del modelo utilizado
-          - "sentences": lista de strings (una por bloque)
-          - "embeddings": lista de listas de floats con los embeddings
-    NOTA: Requiere instalar `sentence-transformers` y un backend de PyTorch
-    compatible. Si no está instalado, lanzará ImportError.
-    """
-    blocks = _parse_srt(srt_text)
-    sentences = [b.text.replace("\n", " ").strip() for b in blocks if b.text.strip()]
-    if not sentences:
-        return {"model_name": model_name, "sentences": [], "embeddings": []}
-    try:
-        from sentence_transformers import SentenceTransformer
-    except ImportError as exc:
-        raise ImportError(
-            "sentence-transformers no está instalado. "
-            "Instala la dependencia para poder generar embeddings."
-        ) from exc
-    model = SentenceTransformer(model_name)
-    embs = model.encode(sentences, convert_to_numpy=False)
-    embeddings = [list(map(float, vec)) for vec in embs]
-    return {
-        "model_name": model_name,
-        "sentences": sentences,
-        "embeddings": embeddings,
-    }