import os
import json
import subprocess
import requests
import gradio as gr
from datetime import datetime
from huggingface_hub import HfApi, login

# Target Dataset Repository
REPO_ID = "Mafia2008/Vod"

def extract_json_from_http_text(text: str):
    """Extracts the JSON payload from the raw HTTP capture text."""
    try:
        start_idx = text.find('{"status"')
        if start_idx == -1: return None
        body = text[start_idx:]
        end_idx = body.rfind('}')
        return json.loads(body[:end_idx+1]) if end_idx != -1 else None
    except: 
        return None

def convert_and_upload_single(api, item, folder_name):
    """Downloads/converts a single file, uploads it immediately, and cleans up."""
    title = item.get("title", "untitled").replace("/", "-").replace("\\", "-").strip()
    link = item.get("link")
    duration = item.get("duration", "0")
    original_date = item.get("date", "")
    
    is_video = "VIDEO" in item.get("type", "").upper() or "video" in folder_name.lower()
    ext = ".mp4" if is_video else ".pdf"
    
    local_filename = f"{title}{ext}"
    local_path = os.path.join("/tmp", local_filename)
    repo_path = f"{folder_name}/{local_filename}"
    
    # Generate the direct download link for Hugging Face
    hf_direct_link = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{repo_path.replace(' ', '%20')}"

    try:
        if is_video:
            print(f"🎬 Downloading and converting: {title}")
            # FFmpeg with User-Agent and Whitelist to fix the 728kb error
            command = [
                "ffmpeg", "-y", 
                "-user_agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                "-protocol_whitelist", "file,http,https,tcp,tls,crypto",
                "-i", link, 
                "-c", "copy", "-bsf:a", "aac_adtstoasc", 
                local_path
            ]
            subprocess.run(command, check=True)
            
            # Size check: If it's under 1MB, something blocked FFmpeg
            if os.path.exists(local_path) and os.path.getsize(local_path) < 1024 * 1024:
                if os.path.exists(local_path): os.remove(local_path)
                return False, f"❌ Failed {title}: Output file is too small (likely blocked by server).", None

        else:
            print(f"📄 Downloading PDF: {title}")
            with requests.get(link, stream=True) as r:
                r.raise_for_status()
                with open(local_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192): f.write(chunk)

        # Upload the single file to Hugging Face immediately
        api.upload_file(
            path_or_fileobj=local_path, 
            path_in_repo=repo_path, 
            repo_id=REPO_ID, 
            repo_type="dataset"
        )
        
        # Clean up local storage immediately to prevent crashes
        if os.path.exists(local_path): os.remove(local_path)
        
        # Build the metadata record for the final JSON
        metadata = {
            "title": title,
            "type": "VIDEO" if is_video else "PDF",
            "duration": duration,
            "original_date": original_date,
            "upload_date": datetime.utcnow().isoformat() + "Z",
            "hf_direct_link": hf_direct_link
        }
        return True, f"✅ Uploaded: {local_filename}", metadata

    except Exception as e:
        if os.path.exists(local_path): os.remove(local_path)
        return False, f"❌ Failed {title}: {str(e)}", None

def start_process(vid_file, not_file):
    hf_token = os.environ.get("HF_TOKEN")
    if not hf_token: return "HF_TOKEN missing in Secrets.", ""
    login(token=hf_token)
    api = HfApi()

    with open(vid_file, 'r') as f: vid_data = extract_json_from_http_text(f.read())
    with open(not_file, 'r') as f: not_data = extract_json_from_http_text(f.read())

    logs = []
    final_json_data = {
        "dataset_repo": REPO_ID,
        "last_updated": datetime.utcnow().isoformat() + "Z",
        "videos": [],
        "notes": []
    }

    # 1. Process and Upload Videos One-by-One
    if vid_data:
        logs.append("### 🎥 Processing Videos")
        for item in vid_data.get('data', {}).get('chapters', []):
            success, msg, meta = convert_and_upload_single(api, item, "English 12th videos")
            logs.append(msg)
            if success and meta:
                final_json_data["videos"].append(meta)
    
    # 2. Process and Upload Notes One-by-One
    if not_data:
        logs.append("\n### 📄 Processing Notes")
        for item in not_data.get('data', {}).get('chapters', []):
            success, msg, meta = convert_and_upload_single(api, item, "English 12th notes")
            logs.append(msg)
            if success and meta:
                final_json_data["notes"].append(meta)

    # 3. Save and Upload the Master JSON Index
    logs.append("\n### 📝 Generating Master JSON File")
    json_path = "/tmp/index.json"
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(final_json_data, f, indent=4)
    
    try:
        api.upload_file(
            path_or_fileobj=json_path, 
            path_in_repo="index.json", 
            repo_id=REPO_ID, 
            repo_type="dataset"
        )
        logs.append("✅ Success: Uploaded index.json to dataset root")
    except Exception as e:
        logs.append(f"❌ Failed to upload index.json: {str(e)}")
    
    if os.path.exists(json_path): os.remove(json_path)

    return "Process Finished", "\n".join(logs)

# Build the Web UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"# 🎬 Fast Pipeline Worker for {REPO_ID}")
    gr.Markdown("Safely converts and uploads videos one-by-one to prevent storage crashes, then generates a master `index.json`.")
    
    with gr.Row():
        v_in = gr.File(label="Upload Video TXT")
        n_in = gr.File(label="Upload Notes TXT")
        
    btn = gr.Button("Start Conversion & Upload", variant="primary")
    out = gr.Textbox(label="Status")
    log = gr.Markdown(label="Execution Logs")
    
    btn.click(start_process, [v_in, n_in], [out, log])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)