from ctransformers import AutoModelForCausalLM
import gradio as gr
import re
import threading

# ==============================
# LOAD MODELS – OPTIMAL SPEED
# ==============================
print("Loading Mistral from HuggingFace Hub...")
mistral_model = AutoModelForCausalLM.from_pretrained(
    # r"C:\Users\ksrvisitor\Downloads\optimizationmodel\quant_model.gguf",
    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    model_type="mistral",
    threads=8,
    batch_size=512,
    context_length=8192,
    gpu_layers=0,
    temperature=0.7,
    top_p=0.9,
    top_k=30,
    repetition_penalty=1.1,
    max_new_tokens=1024
)

print("Loading Qwen2.5-Coder from HuggingFace Hub...")
qwen_model = Llama(
    model_path="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
    model_file="qwen2.5-coder-7b-instruct-q4_k_m.gguf",
    n_ctx=8192,
    n_threads=4,       # Fastest on CPU
    n_batch=512,       # Fastest on CPU
    n_gpu_layers=0,    # Change to 35–99 if GPU
    use_mlock=True,
    verbose=False
)

stop_event = threading.Event()

# ==============================
# SMART DETECTION
# ==============================
# ==============================
# BULLETPROOF CODE DETECTION (Qwen will catch EVERYTHING now)
# ==============================
# ==============================
# BULLETPROOF DETECTION — MATH + CODE = ALWAYS QWEN
# ==============================
def is_coding_or_math(text: str) -> bool:
    text = text.lower()
    
    # Math & number series triggers
    math_triggers = [
    # General math
    "next number", "series", "sequence", "pattern", "find the next",
    "solve", "calculate", "equation", "math", "mathematics", "integral",
    "derivative", "limit", "factorial", "prime", "composite", 
    "geometry", "algebra", "probability", "statistics", "number",
    "compute", "simplify", "evaluate", "expression", "fraction",
    "decimal", "percentage", "ratio", "proportion", "root", "square root",
    "logarithm", "log", "ln", "exponent", "power", "base",
    "matrix", "determinant", "vector", "dot product", "cross product",
    "trigonometry", "sine", "cosine", "tan", "cot", "sec", "cosec",
    "triangle", "circle", "radius", "diameter", "area", "perimeter",
    "volume", "surface area", "integrate", "differentiate",
    "quadratic", "polynomial", "cubic", "linear equation",
    "graph", "intercept", "slope", "intersection", "domain", "range",
    "modulus", "absolute", "complex number", "imaginary", "real number",
    "mean", "median", "mode", "variance", "standard deviation",
    "correlation", "regression", "distribution", "normal distribution",
    "binomial", "poisson", "combinatorics", "permutation", "combination",
    "set theory", "subset", "union", "intersection", "probability of",
]

    
    # Coding triggers
    code_triggers = [
    # General programming
    "code", "program", "coding", "script", "implement", "build",
    "function", "method", "class", "object", "module", "package",
    "syntax", "runtime", "variable", "parameter", "argument",
    "return", "loop", "for loop", "while loop", "if statement",
    "condition", "boolean", "string", "array", "list", "dictionary",
    "hashmap", "tuple", "stack", "queue", "tree", "graph", "linked list",
    "pointer", "reference", "memory", "heap", "stack memory",

    # Languages
    "python", "java", "javascript", "typescript", "c++", "c#", "c language",
    "go", "rust", "php", "sql", "html", "css", "react", "nodejs",
    "json", "xml", "yaml", "bash", "shell script",

    # Data science / ML
    "pandas", "numpy", "sklearn", "tensorflow", "pytorch",
    "dataframe", "dataset", "model training", "machine learning",
    "neural network", "deep learning",

    # Debugging & errors
    "debug", "traceback", "error", "bug", "fix this code",
    "segmentation fault", "stack overflow", "undefined variable",

    # Algorithms
    "algorithm", "time complexity", "space complexity",
    "big o notation", "sort", "merge sort", "quick sort",
    "binary search", "dynamic programming", "recursion",
    "graph traversal", "dfs", "bfs", "greedy algorithm",

    # DevOps / tools
    "docker", "kubernetes", "api", "rest api", "jwt",
    "server", "client", "database", "mongodb", "mysql",
    "postgres", "ORM", "deploy", "deployment", "kafka",

    # Competitive coding
    "leetcode", "hackerrank", "codechef", "geeksforgeeks"
]

    
    # If any math or code keyword is found → Qwen
    if any(trigger in text for trigger in math_triggers + code_triggers):
        return True
        
    # If contains numbers + math symbols → Qwen
    if re.search(r'\d', text) and any(op in text for op in "+-*/=^()[]{}"):
        return True
        
    # If contains comma-separated numbers (like 2, 6, 12, 20) → Qwen
    if re.search(r'\d+\s*[,]\s*\d+', text):
        return True
        
    return False

# ==============================
# FIXED STREAMING (NO ECHOING!)
# ==============================
def stream_mistral(prompt):
    stop_event.clear()

    system_prompt = (
        "You are a helpful, concise assistant. "
        "Do NOT repeat the user's question. "
        "Answer directly and clearly."
    )

    formatted_prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>> {prompt} [/INST]"

    yield [{"role": "assistant", "content": "**[Mistral]**\n\n"}]

    output = ""
    for token in mistral_model(
        formatted_prompt,
        stream=True,
        max_new_tokens=800,
        stop=["</s>"]
    ):
        if stop_event.is_set():
            break

        output += token
        clean = output.strip()

        yield [{"role": "assistant", "content": f"**[Mistral]**\n\n{clean}"}]

def stream_qwen(prompt):
    stop_event.clear()
    resp = ""
    
    # Start output
    yield [{"role": "assistant", "content": "**[Qwen2.5-Coder]**\n\n"}]

    formatted = (
        "<|im_start|>system\n"
        "You are a world-class math and coding assistant. "
        "ALWAYS respond with clean LaTeX. Use $...$ for inline and $$...$$ for display. "
        "Use \\boxed{} for final answers.\n"
        "<|im_end|>\n"
        "<|im_start|>user\n" + prompt + "\n<|im_end|>\n"
        "<|im_start|>assistant\n"
    )

    for chunk in qwen_model(
        formatted,
        stream=True,
        max_tokens=800,
        temperature=0.1,
        top_p=0.9,
        top_k=20,
        repeat_penalty=1.05
    ):
        if stop_event.is_set():
            break

        # SAFE EXTRACTION — won't crash
        choice = chunk["choices"][0]
        token = (
            choice.get("text") or
            choice.get("delta", {}).get("content", "") or
            ""
        )

        resp += token

        yield [{"role": "assistant", "content": f"**[Qwen2.5-Coder]**\n\n{resp}"}]

# ==============================
# MAIN CHAT — WORKS WITH MESSAGES FORMAT
# ==============================
def chat(message, history):
    stop_event.clear()

    # Handle history as list of dicts (Gradio's type="messages")
    messages = []
    for msg in history:
        if isinstance(msg, dict) and "role" in msg:
            messages.append(msg)
        else:
            # Fallback for tuples (old format)
            for u, a in msg if isinstance(msg, (list, tuple)) else []:
                if u: messages.append({"role": "user", "content": u})
                if a: messages.append({"role": "assistant", "content": a})
    messages.append({"role": "user", "content": message})

    streamer = stream_qwen(message) if is_coding_or_math(message) else stream_mistral(message)

    partial = messages.copy()
    first = True
    for chunk in streamer:
        if stop_event.is_set(): break
        if first:
            partial.append(chunk[0])
            first = False
        else:
            partial[-1] = chunk[0]
        yield partial

def stop():
    stop_event.set()

# ==============================
# UI
# ==============================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Dual Local AI — Clean Responses (No Echoing!)\n**Code/Math → Qwen2.5-Coder** | **Chat → Mistral**")
    chatbot = gr.Chatbot(height=720, type="messages", show_copy_button=True)
    with gr.Row():
        txt = gr.Textbox(placeholder="Ask anything…", label="Message", lines=4, scale=8)
        send = gr.Button("Send", variant="primary")
        stop_btn = gr.Button("Stop", variant="stop")

    send.click(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
    txt.submit(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
    stop_btn.click(stop)

print("Launching FINAL version (no echoing, no crashes)...")
demo.launch(server_port=7860, inbrowser=True)