import os
import subprocess
import tempfile
import traceback
import time
import shutil
import torch
import gradio as gr
from textwrap import wrap

from nemo.collections.asr.models import EncDecRNNTBPEModel
from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
from faster_whisper import WhisperModel as FasterWhisperModel


ASR_BACKENDS = ["Whisper", "FasterWhisper", "NeMoParakeet"]

WHISPER_MODEL = "antony66/whisper-large-v3-russian"
FASTERW_MODEL = "Ash8181/whisper-large-v3-russian-ct2"
PARAKEET_MODEL = "nvidia/parakeet-tdt-0.6b-v3"

CHUNK_LENGTH = 307

PRESETS = {
    "Viral Shorts (TOP)": {
        "Alignment": 8,
        "FontName": "Arial Black",
        "FontSize": 64,
        "PrimaryColour": "#FFFF00",
        "Outline": 4,
        "OutlineColour": "#000000",
        "Shadow": 1,
        "BackColour": "#80000000",
        "Bold": 1,
        "BorderStyle": 1,
        "MarginV": 40,
    },
    "Минимал низ": {
        "Alignment": 2,
        "FontName": "Montserrat",
        "FontSize": 28,
        "PrimaryColour": "#17FC03",
        "Outline": 1,
        "OutlineColour": "#000000",
        "Shadow": 0,
        "BackColour": "#80000000",
        "Bold": 1,
        "BorderStyle": 1,
        "MarginV": 40,
    },
}

_cache = {}

def format_srt_time(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = int(sec % 60)
    ms = int((sec - int(sec)) * 1000)
    return f"{h:02}:{m:02}:{s:02},{ms:03}"

def color_hex_to_ass(val):
    if isinstance(val, str) and val.startswith("#"):
        val = val.lstrip("#")
        if len(val) == 6:
            val = "00" + val
        aa, rr, gg, bb = val[:2], val[2:4], val[4:6], val[6:8]
        return f"&H{aa}{bb}{gg}{rr}"
    return val

def style_to_force(style):
    parts = []
    for k, v in style.items():
        if "Colour" in k:
            v = color_hex_to_ass(v)
        parts.append(f"{k}={v}")
    return ",".join(parts)

def format_subtitle_text(text, font_size, bold):
    text = text.strip()
    if bold and font_size >= 48:
        text = text.upper()
    else:
        text = text.capitalize()
    
    # Адаптивная ширина переноса
    if font_size >= 60:
        wrap_width = 12
    elif font_size >= 48:
        wrap_width = 16
    elif font_size >= 36:
        wrap_width = 24
    else:
        wrap_width = 36
    
    return "\n".join(wrap(text, wrap_width))

# === Загрузка моделей ===
def get_whisper():
    if "whisper" in _cache:
        return _cache["whisper"]
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32
    processor = WhisperProcessor.from_pretrained(WHISPER_MODEL)
    model = WhisperForConditionalGeneration.from_pretrained(
        WHISPER_MODEL, torch_dtype=dtype, low_cpu_mem_usage=True
    )
    pipe = pipeline(
        "automatic-speech-recognition",
        model=model,
        tokenizer=processor.tokenizer,
        feature_extractor=processor.feature_extractor,
        return_timestamps=True,
        chunk_length_s=CHUNK_LENGTH,
        device=0 if device == "cuda" else -1,
    )
    _cache["whisper"] = pipe
    return pipe

def get_faster_whisper():
    if "faster" in _cache:
        return _cache["faster"]
    device = "cuda" if torch.cuda.is_available() else "cpu"
    compute = "float16" if device == "cuda" else "int8"
    model = FasterWhisperModel(FASTERW_MODEL, device=device, compute_type=compute)
    _cache["faster"] = model
    return model

def get_parakeet():
    if "parakeet" in _cache:
        return _cache["parakeet"]
    model = EncDecRNNTBPEModel.from_pretrained(PARAKEET_MODEL)
    model.eval()
    model = model.to("cuda" if torch.cuda.is_available() else "cpu")
    _cache["parakeet"] = model
    return model

# === Транскрибация ===
def transcribe(audio, backend):
    if backend == "Whisper":
        pipe = get_whisper()
        res = pipe(audio, generate_kwargs={"language": "russian"})
        chunks = [{"start": c["timestamp"][0], "end": c["timestamp"][1], "text": c["text"]} for c in res["chunks"]]
        return chunks
    if backend == "FasterWhisper":
        model = get_faster_whisper()
        segs, _ = model.transcribe(audio, language="ru")
        return [{"start": s.start, "end": s.end, "text": s.text} for s in segs]
    model = get_parakeet()
    out = model.transcribe([audio], timestamps=True)[0].timestamp["word"]
    chunks = []
    step = 6
    for i in range(0, len(out), step):
        g = out[i:i+step]
        chunks.append({
            "start": g[0]["start"],
            "end": g[-1]["end"],
            "text": " ".join(w["word"] for w in g)
        })
    return chunks

# === Превью стиля (без транскрибации!) ===
def preview_subtitle_style(video_path, font, size, color, bg, bold, margin):
    if not video_path:
        return None

    tmp = tempfile.mkdtemp()
    frame = os.path.join(tmp, "frame.jpg")
    ass_file = os.path.join(tmp, "preview.ass")

    try:
        # Извлекаем первый кадр
        subprocess.run([
            'ffmpeg', '-y', '-i', video_path, '-vframes', '1', '-q:v', '2', frame
        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        if not os.path.exists(frame):
            return None

        # Фиксированный текст для превью
        example_text = "Тут ваши субтитры"
        styled_text = format_subtitle_text(example_text, size, bold)

        # Стиль
        style = {
            "FontName": font,
            "FontSize": int(size),
            "PrimaryColour": color,
            "BackColour": bg,
            "Bold": int(bold),
            "MarginV": int(margin),
            "Alignment": 2,
            "Outline": 1,
            "OutlineColour": "&H00000000",
            "BorderStyle": 1,
            "Shadow": 0,
        }
        style_str = style_to_force(style)

        # Создаём .ass
        with open(ass_file, "w", encoding="utf-8") as f:
            f.write("[Script Info]\n")
            f.write("ScriptType: v4.00+\n")
            f.write("PlayResX: 1920\n")
            f.write("PlayResY: 1080\n\n")
            f.write("[V4+ Styles]\n")
            f.write("Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n")
            f.write(f"Style: Default,{style_str}\n\n")
            f.write("[Events]\n")
            f.write("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")
            f.write(f"Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{styled_text}\n")

        # Накладываем на кадр
        preview_img = os.path.join(tmp, "preview.jpg")
        safe_ass = ass_file.replace("\\", "/").replace(":", "\\:")
        subprocess.run([
            'ffmpeg', '-y', '-i', frame, '-vf', f"ass='{safe_ass}'", preview_img
        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        if os.path.exists(preview_img):
            return preview_img

    except Exception as e:
        print("Preview error:", e)
        traceback.print_exc()

    return None

# === Полная обработка видео ===
def process(video, backend, preset, font, size, color, bg, bold, margin):
    if not video:
        return "❌ Нет видео", None, None, "", None

    tmp = tempfile.mkdtemp()
    wav = os.path.join(tmp, "audio.wav")

    try:
        subprocess.run(['ffmpeg', '-y', '-i', video, '-vn', '-ac', '1', '-ar', '16000', wav],
                       check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        segs = transcribe(wav, backend)
        if not segs:
            return "❌ Нет речи", None, None, "", None

        style = PRESETS[preset].copy()
        style.update({
            "FontName": font,
            "FontSize": int(size),
            "PrimaryColour": color,
            "BackColour": bg,
            "Bold": int(bold),
            "MarginV": int(margin),
        })

        preview_text = ""
        for i, s in enumerate(segs, 1):
            formatted = format_subtitle_text(s["text"], size, bold)
            preview_text += f"{i}. {formatted.replace(chr(10), ' / ')}\n"

        srt = os.path.join(tmp, "subs.srt")
        with open(srt, "w", encoding="utf-8") as f:
            for i, s in enumerate(segs, 1):
                txt = format_subtitle_text(s["text"], size, bold)
                f.write(f"{i}\n{format_srt_time(s['start'])} --> {format_srt_time(s['end'])}\n{txt}\n\n")

        out = f"result_{int(time.time())}.mp4"
        fs = style_to_force(style)
        safe_srt = srt.replace("\\", "/").replace(":", "\\:")
        vf = f"subtitles='{safe_srt}':force_style='{fs}'"

        subprocess.run(['ffmpeg', '-y', '-i', video, '-vf', vf, '-c:a', 'copy', out],
                       check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        return "✅ Готово", out, srt, preview_text, None

    except Exception as e:
        return f"❌ Ошибка: {str(e)}", None, None, "", None

# === Интерфейс Gradio ===
with gr.Blocks() as demo:
    gr.Markdown("## 🎬 Автосубтитры (Whisper / FasterWhisper / NeMo) + LIVE preview + Превью стиля")

    with gr.Row():
        with gr.Column():
            video = gr.Video(label="Видео")
            backend = gr.Dropdown(ASR_BACKENDS, value="Whisper", label="ASR")
            preset = gr.Dropdown(list(PRESETS.keys()), value=list(PRESETS.keys())[0], label="Пресет")

            gr.Markdown("### 🎨 Ручная настройка")
            font = gr.Textbox("Montserrat", label="Шрифт")
            size = gr.Slider(minimum=10, maximum=96, value=32, step=1, label="Размер шрифта")
            color = gr.ColorPicker("#FFFFFF", label="Цвет текста")
            bg = gr.ColorPicker("#80000000", label="Фон")
            bold = gr.Checkbox(True, label="Bold")
            margin = gr.Slider(10, 100, 40, label="Отступ снизу")

            with gr.Row():
                run_btn = gr.Button("🚀 Сгенерировать субтитры")
                preview_btn = gr.Button("👁️ Превью стиля")

        with gr.Column():
            status = gr.Markdown()
            preview = gr.Textbox(label="LIVE preview текста субтитров", lines=8)
            preview_img = gr.Image(label="Превью стиля на кадре", type="filepath")
            out_video = gr.Video(label="Видео с субтитрами")
            out_srt = gr.File(label="SRT файл")

    run_btn.click(
        process,
        inputs=[video, backend, preset, font, size, color, bg, bold, margin],
        outputs=[status, out_video, out_srt, preview, preview_img]
    )

    preview_btn.click(
        preview_subtitle_style,
        inputs=[video, font, size, color, bg, bold, margin],
        outputs=[preview_img]
    )

demo.queue().launch()