import os
import subprocess
import sys

# --- 1. MAGIC INSTALLER ---
# This forces the computer to install the engine NOW, inside the correct folder.
def install_engine():
    print("⏳ Installing Brain Engine (Safe Mode)...")
    try:
        subprocess.check_call([
            sys.executable, "-m", "pip", "install", 
            "llama-cpp-python", 
            "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
        ])
        print("✅ Engine Installed!")
    except Exception as e:
        print(f"❌ Install Failed: {e}")

# Try to import the library. If it fails, run the installer above.
try:
    import llama_cpp
except ImportError:
    install_engine()
    import llama_cpp

# --- 2. YOUR APP CODE ---
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

print("Downloading EMET Brain...")
model_path = hf_hub_download(
    repo_id="pavanc21/EMET-Mistral-2.0-GGUF",
    filename="mistral-7b-v0.3.Q4_K_M.gguf"
)

print("Starting Engine...")
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=2
)

def generate_response(message, history):
    prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{message}

### Input:


### Response:
"""
    output = llm(prompt, max_tokens=64, stop=["### Instruction:", "</s>"], echo=False)
    return output['choices'][0]['text'].strip()


interface = gr.ChatInterface(
    fn=generate_response,
    title="🤖 EMET 2.0 (Live)",
    description="My custom AI running 24/7 on Hugging Face.",
    examples=["Who created you?", "What is your purpose?"]
)

interface.launch()