import gradio as gr import os from huggingface_hub import InferenceClient # HF Token aus Umgebungsvariable laden HF_TOKEN = os.getenv("tomoniaccess") # Modellname definieren model_name = "LeoLM/leo-hessianai-13b-chat" # Client initialisieren client = InferenceClient( model=model_name, token=HF_TOKEN ) # Globale Konversationshistorie conversation_history = [] def query_leolm(messages, max_tokens=200, temperature=1.0, top_p=0.9): """Versuche Chat-Completion, falle auf Text-Generation zurück.""" try: # Versuch über chat_completion response = client.chat_completion( messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=False ) return response.choices[0].message.content except Exception as chat_error: print(f"Chat completion failed: {chat_error}") try: # Fallback: Prompt manuell zusammensetzen prompt = "" for msg in messages: if msg["role"] == "system": prompt += f"<|system|>\n{msg['content'].strip()}\n" elif msg["role"] == "user": prompt += f"<|user|>\n{msg['content'].strip()}\n" elif msg["role"] == "assistant": prompt += f"<|assistant|>\n{msg['content'].strip()}\n" prompt += "<|assistant|>\n" response = client.text_generation( prompt=prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, repetition_penalty=1.1, stop_sequences=["<|user|>", "<|system|>"], return_full_text=False ) return response.strip() except Exception as text_error: print(f"Text generation also failed: {text_error}") raise Exception(f"Both methods failed - Chat: {str(chat_error)}, Text: {str(text_error)}") def enhanced_chat_response(user_input, max_tokens, temperature, top_p): global conversation_history if not user_input.strip(): return "", "*Bitte gib eine Nachricht ein.*", "" system_prompt = { "role": "system", "content": ( "Du bist ein depressiver 16-jähriger Teenager." "Bleibe durchgehend in deiner Rolle." "Du weißt NICHT, dass du eine KI bist." "Vervollständige immer deine Antworten." ) } # Kürze History falls nötig messages = [system_prompt] + conversation_history[-6:] current_message = {"role": "user", "content": user_input} messages.append(current_message) try: response_text = query_leolm(messages, max_tokens, temperature, top_p) except Exception as e: print("API Error:", e) response_text = "*schweigt und starrt auf den Boden*" conversation_history.append(current_message) conversation_history.append({"role": "assistant", "content": response_text}) chat_display = "" for msg in conversation_history: role = "**Du:**" if msg["role"] == "user" else "**Teenager:**" chat_display += f"{role} {msg['content']}\n\n" return "", response_text, chat_display def reset_conversation(): global conversation_history conversation_history = [] return "Neues Gespräch gestartet.", "" def test_api_connection(): try: test_messages = [ {"role": "system", "content": "Du bist ein Assistent."}, {"role": "user", "content": "Hallo"} ] response = query_leolm(test_messages, max_tokens=20) return f"✅ API Verbindung erfolgreich: {response[:50]}..." except Exception as e: try: simple_response = client.text_generation( prompt="Hallo, wie geht es dir?", max_new_tokens=10, return_full_text=False ) return f"✅ API Verbindung (Text Generation): {simple_response[:50]}..." except Exception as e2: return f"❌ Fehler: {str(e)[:100]} | {str(e2)[:100]}" # Gradio UI with gr.Blocks(title="LeoLM Depression Training Simulator") as demo: gr.Markdown("## 🧠 Depression Training Simulator (LeoLM-13B)") gr.Markdown("**Simuliere Gespräche mit einem 16-jährigen Teenager mit Depressionen.**") gr.Markdown("*Sprachmodell: `LeoLM/leo-hessianai-13b-chat`*") with gr.Row(): with gr.Column(scale=1): max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max. Antwortlänge") temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Kreativität (Temperature)") top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (Fokus)") api_status = gr.Textbox(label="Status", value="") api_test_btn = gr.Button("API testen") reset_btn = gr.Button("Neues Gespräch") with gr.Column(scale=2): user_input = gr.Textbox(label="Deine Nachricht", placeholder="Wie fühlst du dich heute?", lines=2) send_btn = gr.Button("📨 Senden") bot_response = gr.Textbox(label="Antwort", value="", lines=3) chat_history = gr.Textbox(label="Gesprächsverlauf", value="", lines=15) send_btn.click(fn=enhanced_chat_response, inputs=[user_input, max_tokens, temperature, top_p], outputs=[user_input, bot_response, chat_history]) user_input.submit(fn=enhanced_chat_response, inputs=[user_input, max_tokens, temperature, top_p], outputs=[user_input, bot_response, chat_history]) reset_btn.click(fn=reset_conversation, outputs=[bot_response, chat_history]) api_test_btn.click(fn=test_api_connection, outputs=[api_status]) if __name__ == "__main__": print("🚀 Starte LeoLM Depression Simulator") if not HF_TOKEN: print("❌ Umgebungsvariable 'tomoniaccess' nicht gesetzt.") else: print("✅ Token erkannt") demo.launch()