import os
import threading
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

HF_TOKEN = os.environ.get("HF_TOKEN")
REPO_ID = "TitleOS/GalacticReasoning-1.3B-Q8"

# Standard ChatML template for models missing their tokenizer configs
FALLBACK_CHAT_TEMPLATE = (
    "{% for message in messages %}"
    "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
    "{{ '<|im_start|>assistant\n' }}"
    "{% endif %}"
)

tokenizer = None
model = None

def load_model():
    global tokenizer, model
    if model is None:
        tokenizer = AutoTokenizer.from_pretrained(REPO_ID, token=HF_TOKEN)
        model = AutoModelForCausalLM.from_pretrained(
            REPO_ID, 
            token=HF_TOKEN, 
            device_map="auto"
        )
    return tokenizer, model

@spaces.GPU(duration=180)
def bot(history):
    tok, mod = load_model()
    
    raw_llama_messages = []
    
    for msg in history:
        role = msg["role"]
        content = msg["content"]
        
        if isinstance(content, str):
            if content.strip():
                raw_llama_messages.append({"role": role, "content": content})
        elif isinstance(content, tuple):
            filepath = content[0]
            try:
                with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
                    file_text = f.read()
                    raw_llama_messages.append({
                        "role": "user", 
                        "content": f"--- Attachment: {os.path.basename(filepath)} ---\n{file_text}"
                    })
            except Exception as e:
                print(f"Error reading file: {e}")

    merged_messages = []
    for msg in raw_llama_messages:
        if merged_messages and merged_messages[-1]["role"] == msg["role"]:
            merged_messages[-1]["content"] += "\n\n" + msg["content"]
        else:
            merged_messages.append(msg)

    # We inject the fallback template here to bypass the missing config error
    prompt_tensors = tok.apply_chat_template(
        merged_messages, 
        chat_template=FALLBACK_CHAT_TEMPLATE,
        tokenize=True, 
        add_generation_prompt=True, 
        return_tensors="pt"
    ).to(mod.device)
            
    history.append({"role": "assistant", "content": ""})

    streamer = TextIteratorStreamer(tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        input_ids=prompt_tensors,
        streamer=streamer,
        max_new_tokens=4096,
    )
    
    t = threading.Thread(target=mod.generate, kwargs=generate_kwargs)
    t.start()

    for new_text in streamer:
        history[-1]["content"] += new_text
        yield history

def add_user_message(msg, hist):
    for f in msg["files"]:
        hist.append({"role": "user", "content": (f,)})
    if msg["text"]:
        hist.append({"role": "user", "content": msg["text"]})
        
    return hist, gr.MultimodalTextbox(value={"text": "", "files": []}, interactive=False)

with gr.Blocks(fill_height=True) as demo:
    chatbot = gr.Chatbot(scale=1)
    chat_input = gr.MultimodalTextbox(
        interactive=True, 
        file_types=["text"], 
        placeholder="Write a prompt to test Galactic Reasoning's Chain of Thought, use <think> to encourage this behavior at the end of your prompt.", 
        show_label=False
    )

    chat_input.submit(
        add_user_message, 
        inputs=[chat_input, chatbot], 
        outputs=[chatbot, chat_input]
    ).then(
        bot, 
        inputs=[chatbot], 
        outputs=[chatbot]
    ).then(
        lambda: gr.MultimodalTextbox(interactive=True), 
        outputs=[chat_input]
    )

if __name__ == "__main__":
    demo.launch(ssr_mode=False)