import gradio as gr
import os
from huggingface_hub import InferenceClient

# HF Token aus Umgebungsvariable laden
HF_TOKEN = os.getenv("tomoniaccess")

# Modellname definieren
model_name = "LeoLM/leo-hessianai-13b-chat"

# Client initialisieren
client = InferenceClient(
    model=model_name,
    token=HF_TOKEN
)

# Globale Konversationshistorie
conversation_history = []

def query_leolm(messages, max_tokens=200, temperature=1.0, top_p=0.9):
    """Versuche Chat-Completion, falle auf Text-Generation zurück."""
    try:
        # Versuch über chat_completion
        response = client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=False
        )
        return response.choices[0].message.content
    except Exception as chat_error:
        print(f"Chat completion failed: {chat_error}")
        try:
            # Fallback: Prompt manuell zusammensetzen
            prompt = ""
            for msg in messages:
                if msg["role"] == "system":
                    prompt += f"<|system|>\n{msg['content'].strip()}\n"
                elif msg["role"] == "user":
                    prompt += f"<|user|>\n{msg['content'].strip()}\n"
                elif msg["role"] == "assistant":
                    prompt += f"<|assistant|>\n{msg['content'].strip()}\n"
            prompt += "<|assistant|>\n"

            response = client.text_generation(
                prompt=prompt,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                repetition_penalty=1.1,
                stop_sequences=["<|user|>", "<|system|>"],
                return_full_text=False
            )
            return response.strip()
        except Exception as text_error:
            print(f"Text generation also failed: {text_error}")
            raise Exception(f"Both methods failed - Chat: {str(chat_error)}, Text: {str(text_error)}")

def enhanced_chat_response(user_input, max_tokens, temperature, top_p):
    global conversation_history

    if not user_input.strip():
        return "", "*Bitte gib eine Nachricht ein.*", ""

    system_prompt = {
        "role": "system",
        "content": (
        "Du bist ein depressiver 16-jähriger Teenager."
        "Bleibe durchgehend in deiner Rolle."
        "Du weißt NICHT, dass du eine KI bist."
        "Vervollständige immer deine Antworten."
        )
    }

    # Kürze History falls nötig
    messages = [system_prompt] + conversation_history[-6:]
    current_message = {"role": "user", "content": user_input}
    messages.append(current_message)

    try:
        response_text = query_leolm(messages, max_tokens, temperature, top_p)
    except Exception as e:
        print("API Error:", e)
        response_text = "*schweigt und starrt auf den Boden*"

    conversation_history.append(current_message)
    conversation_history.append({"role": "assistant", "content": response_text})

    chat_display = ""
    for msg in conversation_history:
        role = "**Du:**" if msg["role"] == "user" else "**Teenager:**"
        chat_display += f"{role} {msg['content']}\n\n"

    return "", response_text, chat_display

def reset_conversation():
    global conversation_history
    conversation_history = []
    return "Neues Gespräch gestartet.", ""

def test_api_connection():
    try:
        test_messages = [
            {"role": "system", "content": "Du bist ein Assistent."},
            {"role": "user", "content": "Hallo"}
        ]
        response = query_leolm(test_messages, max_tokens=20)
        return f"✅ API Verbindung erfolgreich: {response[:50]}..."
    except Exception as e:
        try:
            simple_response = client.text_generation(
                prompt="Hallo, wie geht es dir?",
                max_new_tokens=10,
                return_full_text=False
            )
            return f"✅ API Verbindung (Text Generation): {simple_response[:50]}..."
        except Exception as e2:
            return f"❌ Fehler: {str(e)[:100]} | {str(e2)[:100]}"

# Gradio UI
with gr.Blocks(title="LeoLM Depression Training Simulator") as demo:
    gr.Markdown("## 🧠 Depression Training Simulator (LeoLM-13B)")
    gr.Markdown("**Simuliere Gespräche mit einem 16-jährigen Teenager mit Depressionen.**")
    gr.Markdown("*Sprachmodell: `LeoLM/leo-hessianai-13b-chat`*")

    with gr.Row():
        with gr.Column(scale=1):
            max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max. Antwortlänge")
            temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Kreativität (Temperature)")
            top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (Fokus)")

            api_status = gr.Textbox(label="Status", value="")
            api_test_btn = gr.Button("API testen")
            reset_btn = gr.Button("Neues Gespräch")

        with gr.Column(scale=2):
            user_input = gr.Textbox(label="Deine Nachricht", placeholder="Wie fühlst du dich heute?", lines=2)
            send_btn = gr.Button("📨 Senden")
            bot_response = gr.Textbox(label="Antwort", value="", lines=3)
            chat_history = gr.Textbox(label="Gesprächsverlauf", value="", lines=15)

    send_btn.click(fn=enhanced_chat_response,
                   inputs=[user_input, max_tokens, temperature, top_p],
                   outputs=[user_input, bot_response, chat_history])

    user_input.submit(fn=enhanced_chat_response,
                      inputs=[user_input, max_tokens, temperature, top_p],
                      outputs=[user_input, bot_response, chat_history])

    reset_btn.click(fn=reset_conversation,
                    outputs=[bot_response, chat_history])

    api_test_btn.click(fn=test_api_connection,
                       outputs=[api_status])

if __name__ == "__main__":
    print("🚀 Starte LeoLM Depression Simulator")
    if not HF_TOKEN:
        print("❌ Umgebungsvariable 'tomoniaccess' nicht gesetzt.")
    else:
        print("✅ Token erkannt")
    demo.launch()