import os
import subprocess
from sys import argv
from time import strftime, sleep
import shutil
from pathlib import Path
import gradio as gr
import signal
from huggingface_hub import snapshot_download, HfApi
from apscheduler.schedulers.background import BackgroundScheduler
from theme import blurple

# Used for restarting the space
HF_TOKEN = os.environ.get("HF_TOKEN")
TEST_OKEY = os.environ.get("TEST_OKEY")
TEST_TOKEN = os.environ.get("HF_TOKEN")
HOST_REPO = "lainlives/ztestzz"
LLAMACPP_DIR = Path("./llama.cpp")
CONVERT_SCRIPT = "/app/convert_hf_to_gguf.py"
QUANTIZE_BIN = "llama-quantize"
TARGET_QUANTS = ["Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0"]


def format_log(msg):
    return f"[{strftime('%H:%M:%S')}] {msg}"


def setup_ollama_keys(private_key_content):
    """
    Writes the user's private key to ~/.ollama/id_ed25519
    """
    if not private_key_content:
        return False, "⚠️ No Private Key provided. Pushing will fail."

    ollama_dir = Path(os.path.expanduser("~/.ollama"))
    ollama_dir.mkdir(parents=True, exist_ok=True)

    key_path = ollama_dir / "id_ed25519"
    os.remove(key_path)

    try:
        # Write the key
        with open(key_path, "w") as f:
            f.write(private_key_content.strip())
        os.chmod(key_path, 0o600)

        return True, "🔑 Private Key installed successfully."
    except Exception as e:
        return False, f"❌ Failed to install keys: {e}"


def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
    ollama_tag = f"{ollama_repo}:{tag_suffix.lower()}"

    # 1. Write the Modelfile to disk
    # The CLI needs a physical file to point to with the '-f' flag
    modelfile_path = gguf_path.parent / "Modelfile"
    with open(modelfile_path, "w") as f:
        f.write(f"FROM {gguf_path.resolve()}")

    logs = []
    logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))

    try:
        create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
        subprocess.run(create_cmd, check=True, capture_output=True)

        if modelfile_path.exists():
            os.remove(modelfile_path)

        logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))

        push_cmd = ["ollama", "push", ollama_tag]
        push_result = subprocess.run(push_cmd, capture_output=True, text=True)

        if push_result.returncode == 0:
            logs.append(format_log(f"✅ Successfully pushed {ollama_tag}"))
        else:
            logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))

        # Remove the local tag to save disk space in the container
        subprocess.run(["ollama", "rm", ollama_tag])  # stdout=subprocess.DEVNULL

    except subprocess.CalledProcessError as e:
        # Captures errors from the 'check=True' on create_cmd
        logs.append(format_log(f"❌ Ollama Create Error: {e}"))
    except Exception as e:
        logs.append(format_log(f"❌ Error on {tag_suffix}: {str(e)}"))

    return logs


def start_ollama_daemon(ollama_key):
    print("⏳ Starting Ollama daemon in background...")
    logs.append(format_log(f"⏳ Starting Ollama daemon in background...\n"))
    env = os.environ.copy()
    # Auth
    success, auth_msg = setup_ollama_keys(ollama_key)
    logs.append(format_log(auth_msg))
    yield "\n".join(logs)
    if not success:
        logs.append(format_log("❌ Stopping: Authentication setup failed."))
        yield "\n".join(logs)
        return
    process = subprocess.Popen(["ollama", "serve"], env=env)  # stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
    pid = process.pid
    logs.append(format_log("⏳ Starting Ollama daemon in background..."))
    sleep(2)
    return pid, logs


def stop_ollama_daemon(pid):
    print("⏳ Stopping Ollama daemon...")
    os.kill(pid, signal.SIGQUIT)
    subprocess.Popen(["pkill", "ollama"])  # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
    return logs


def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
    logs = []

    work_dir = Path("conversion_work_dir")
    download_dir = work_dir / "downloads"
    output_dir = work_dir / "output"

    if work_dir.exists():
        shutil.rmtree(work_dir)
    os.makedirs(download_dir, exist_ok=True)
    os.makedirs(output_dir, exist_ok=True)

    try:
        # Download
        logs.append(format_log(f"⬇️ Downloading {hf_repo}..."))
        yield "\n".join(logs)

        model_path = snapshot_download(
            repo_id=hf_repo,
            local_dir=download_dir,
            token=hf_token if hf_token else None
        )
        logs.append(format_log("✅ Download complete."))
        yield "\n".join(logs)

        # BF16
        bf16_path = output_dir / "model-bf16.gguf"
        logs.append(format_log("⚙️ Converting to BF16..."))
        yield "\n".join(logs)

        cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
        result = subprocess.run(cmd, capture_output=True, text=True)

        if result.returncode == 0:
            logs.extend(push_to_ollama(bf16_path, ollama_repo, "bf16"))
            os.remove(bf16_path)
            logs.append(format_log("🧹 Cleaned up BF16"))
        else:
            logs.append(format_log(f"⚠️ BF16 Conversion failed: {result.stderr}"))
        yield "\n".join(logs)

        # FP16
        fp16_path = output_dir / "model-f16.gguf"
        logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
        yield "\n".join(logs)

        cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
        subprocess.run(cmd, check=True, capture_output=True)

        logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
        yield "\n".join(logs)

        # Quant Loop
        for quant in TARGET_QUANTS:
            logs.append(format_log(f"--- {quant} ---"))
            yield "\n".join(logs)

            final_gguf = output_dir / f"model-{quant}.gguf"
            q_cmd = [str(QUANTIZE_BIN), str(fp16_path), str(final_gguf), quant]
            q_result = subprocess.run(q_cmd, capture_output=True, text=True)

            if q_result.returncode != 0:
                logs.append(format_log(f"❌ Quantize failed: {q_result.stderr}"))
                continue

            logs.extend(push_to_ollama(final_gguf, ollama_repo, quant))
            os.remove(final_gguf)
            logs.append(format_log(f"🧹 Cleaned up {quant}"))
            yield "\n".join(logs)

        if fp16_path.exists():
            os.remove(fp16_path)
        logs.append(format_log("🧹 Cleaned up f16"))

    except Exception as e:
        logs.append(format_log(f"❌ CRITICAL ERROR: {str(e)}"))

    finally:
        if work_dir.exists():
            shutil.rmtree(work_dir)
        logs.append(format_log("🏁 Job Done. Workspace cleared."))
        yield "\n".join(logs)


def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progress()):
    pid = start_ollama_daemon(ollama_key)
    # We yield from the generator
    for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
        yield update
    sleep(10)
    stop_ollama_daemon(pid)


# --- UI ---
with gr.Blocks(title="HF to Ollama") as demo:
    target_quants_str = ', '.join(str(item) for item in TARGET_QUANTS)
    gr.Markdown("## Convert a safetensor HF repo to an Ollama repo.")
    gr.Markdown(f"This space will generate F16, BF16, {target_quants_str} GGUFs from safetensors.")
    gr.Markdown("And pushes them to your Ollama repo. You will need an Ollama ssh key, not an API key to push.")
    gr.Markdown("Temporarily move yours from ~/.ollama/id_ed25519  This will cause Ollama to generate a new one")
    gr.Markdown("After logging in set it aside, that ssh key can be used, or the old one, whichever, for spaces")

    with gr.Row():
        with gr.Column():
            hf_input = gr.Textbox(label="Source HF Repo", placeholder="unsloth/Qwen3.5-9B", value="unsloth/Qwen3.5-0.8B")
            hf_token_input = gr.Textbox(label="HF Token (for gated models and faster download)", type="password", value=TEST_TOKEN)

        with gr.Column():
            ollama_input = gr.Textbox(label="Destination Ollama Repo", placeholder="user/model",  value="fervent_mcclintock/Qwen3.5-9B")
            ollama_key_input = gr.Textbox(label="Ollama Private (ssh) Key", lines=7, type="password", placeholder="-----BEGIN OPENSSH PRIVATE KEY-----...", value=TEST_OKEY)
    btn = gr.Button("Start", variant="primary")

    logs = gr.TextArea(label="Logs", interactive=False, lines=10, autoscroll=True)

    btn.click(
        fn=run_pipeline,
        inputs=[hf_input, ollama_input, hf_token_input, ollama_key_input],
        outputs=logs
    )


def restart_space():
    HfApi().restart_space(repo_id=HOST_REPO, token=HF_TOKEN, factory_reboot=True)


scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860, theme=blurple)