import os
import shutil
import tempfile
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse, PlainTextResponse
from pydantic import BaseModel
import yt_dlp
import ffmpeg
import whisper
import torch
from typing import List

app = FastAPI()

WELCOME_MSG = "Whisper Transcriber Free API — أرسل طلب POST إلى /transcribe مع {\"url\": \"...\"} لتحليل الفيديو."

class TranscribeRequest(BaseModel):
    url: str

class TranscribedSegment(BaseModel):
    timestamp: str
    text: str

@app.get("/")
def root():
    return PlainTextResponse(WELCOME_MSG)

@app.post("/transcribe", response_model=List[TranscribedSegment])
def transcribe_audio(req: TranscribeRequest):
    url = req.url
    if not url or not url.startswith("http"):
        # Always return an array, even for errors
        return [
            {"timestamp": "00:00", "text": "خطأ: الرابط غير صالح."}
        ]
    tmpdir = tempfile.mkdtemp()
    audio_path = os.path.join(tmpdir, "audio.wav")
    video_path = os.path.join(tmpdir, "video")
    try:
        # Download video
        ydl_opts = {
            'outtmpl': video_path,
            'format': 'bestaudio/best',
            'quiet': True,
            'noplaylist': True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        # Find downloaded file
        downloaded_files = os.listdir(tmpdir)
        video_file = next((f for f in downloaded_files if f != "audio.wav"), None)
        if not video_file:
            raise Exception("فشل التنزيل.")
        video_file_path = os.path.join(tmpdir, video_file)
        # Convert to wav
        ffmpeg.input(video_file_path).output(audio_path, ac=1, ar=16000, format='wav').run(overwrite_output=True, quiet=True)
        # Load Whisper model
        model_size = os.environ.get("WHISPER_MODEL", "base")
        model = whisper.load_model(model_size, device="cuda" if torch.cuda.is_available() else "cpu")
        # Transcribe
        result = model.transcribe(audio_path, word_timestamps=False, verbose=False)
        segments = result.get("segments", [])
        output = []
        for seg in segments:
            start = int(seg["start"])
            minutes = start // 60
            seconds = start % 60
            # Always MM:SS format
            timestamp = f"{minutes:02d}:{seconds:02d}"
            output.append({
                "timestamp": timestamp,
                "text": seg["text"].strip()
            })
        return output
    except Exception as e:
        # Always return an array with error message
        return [
            {"timestamp": "00:00", "text": f"خطأ: {str(e)}"}
        ]
    finally:
        shutil.rmtree(tmpdir, ignore_errors=True)