Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| from huggingface_hub import InferenceClient | |
| # HF Token aus Umgebungsvariable laden | |
| HF_TOKEN = os.getenv("tomoniaccess") | |
| # Modellname definieren | |
| model_name = "LeoLM/leo-hessianai-13b-chat" | |
| # Client initialisieren | |
| client = InferenceClient( | |
| model=model_name, | |
| token=HF_TOKEN | |
| ) | |
| # Globale Konversationshistorie | |
| conversation_history = [] | |
| def query_leolm(messages, max_tokens=200, temperature=1.0, top_p=0.9): | |
| """Versuche Chat-Completion, falle auf Text-Generation zurück.""" | |
| try: | |
| # Versuch über chat_completion | |
| response = client.chat_completion( | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=False | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as chat_error: | |
| print(f"Chat completion failed: {chat_error}") | |
| try: | |
| # Fallback: Prompt manuell zusammensetzen | |
| prompt = "" | |
| for msg in messages: | |
| if msg["role"] == "system": | |
| prompt += f"<|system|>\n{msg['content'].strip()}\n" | |
| elif msg["role"] == "user": | |
| prompt += f"<|user|>\n{msg['content'].strip()}\n" | |
| elif msg["role"] == "assistant": | |
| prompt += f"<|assistant|>\n{msg['content'].strip()}\n" | |
| prompt += "<|assistant|>\n" | |
| response = client.text_generation( | |
| prompt=prompt, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| repetition_penalty=1.1, | |
| stop_sequences=["<|user|>", "<|system|>"], | |
| return_full_text=False | |
| ) | |
| return response.strip() | |
| except Exception as text_error: | |
| print(f"Text generation also failed: {text_error}") | |
| raise Exception(f"Both methods failed - Chat: {str(chat_error)}, Text: {str(text_error)}") | |
| def enhanced_chat_response(user_input, max_tokens, temperature, top_p): | |
| global conversation_history | |
| if not user_input.strip(): | |
| return "", "*Bitte gib eine Nachricht ein.*", "" | |
| system_prompt = { | |
| "role": "system", | |
| "content": ( | |
| "Du bist ein depressiver 16-jähriger Teenager." | |
| "Bleibe durchgehend in deiner Rolle." | |
| "Du weißt NICHT, dass du eine KI bist." | |
| "Vervollständige immer deine Antworten." | |
| ) | |
| } | |
| # Kürze History falls nötig | |
| messages = [system_prompt] + conversation_history[-6:] | |
| current_message = {"role": "user", "content": user_input} | |
| messages.append(current_message) | |
| try: | |
| response_text = query_leolm(messages, max_tokens, temperature, top_p) | |
| except Exception as e: | |
| print("API Error:", e) | |
| response_text = "*schweigt und starrt auf den Boden*" | |
| conversation_history.append(current_message) | |
| conversation_history.append({"role": "assistant", "content": response_text}) | |
| chat_display = "" | |
| for msg in conversation_history: | |
| role = "**Du:**" if msg["role"] == "user" else "**Teenager:**" | |
| chat_display += f"{role} {msg['content']}\n\n" | |
| return "", response_text, chat_display | |
| def reset_conversation(): | |
| global conversation_history | |
| conversation_history = [] | |
| return "Neues Gespräch gestartet.", "" | |
| def test_api_connection(): | |
| try: | |
| test_messages = [ | |
| {"role": "system", "content": "Du bist ein Assistent."}, | |
| {"role": "user", "content": "Hallo"} | |
| ] | |
| response = query_leolm(test_messages, max_tokens=20) | |
| return f"✅ API Verbindung erfolgreich: {response[:50]}..." | |
| except Exception as e: | |
| try: | |
| simple_response = client.text_generation( | |
| prompt="Hallo, wie geht es dir?", | |
| max_new_tokens=10, | |
| return_full_text=False | |
| ) | |
| return f"✅ API Verbindung (Text Generation): {simple_response[:50]}..." | |
| except Exception as e2: | |
| return f"❌ Fehler: {str(e)[:100]} | {str(e2)[:100]}" | |
| # Gradio UI | |
| with gr.Blocks(title="LeoLM Depression Training Simulator") as demo: | |
| gr.Markdown("## 🧠 Depression Training Simulator (LeoLM-13B)") | |
| gr.Markdown("**Simuliere Gespräche mit einem 16-jährigen Teenager mit Depressionen.**") | |
| gr.Markdown("*Sprachmodell: `LeoLM/leo-hessianai-13b-chat`*") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max. Antwortlänge") | |
| temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Kreativität (Temperature)") | |
| top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (Fokus)") | |
| api_status = gr.Textbox(label="Status", value="") | |
| api_test_btn = gr.Button("API testen") | |
| reset_btn = gr.Button("Neues Gespräch") | |
| with gr.Column(scale=2): | |
| user_input = gr.Textbox(label="Deine Nachricht", placeholder="Wie fühlst du dich heute?", lines=2) | |
| send_btn = gr.Button("📨 Senden") | |
| bot_response = gr.Textbox(label="Antwort", value="", lines=3) | |
| chat_history = gr.Textbox(label="Gesprächsverlauf", value="", lines=15) | |
| send_btn.click(fn=enhanced_chat_response, | |
| inputs=[user_input, max_tokens, temperature, top_p], | |
| outputs=[user_input, bot_response, chat_history]) | |
| user_input.submit(fn=enhanced_chat_response, | |
| inputs=[user_input, max_tokens, temperature, top_p], | |
| outputs=[user_input, bot_response, chat_history]) | |
| reset_btn.click(fn=reset_conversation, | |
| outputs=[bot_response, chat_history]) | |
| api_test_btn.click(fn=test_api_connection, | |
| outputs=[api_status]) | |
| if __name__ == "__main__": | |
| print("🚀 Starte LeoLM Depression Simulator") | |
| if not HF_TOKEN: | |
| print("❌ Umgebungsvariable 'tomoniaccess' nicht gesetzt.") | |
| else: | |
| print("✅ Token erkannt") | |
| demo.launch() | |