project-green / app-leo.py
ai-tomoni's picture
Rename app.py to app-leo.py
5a55ccb verified
raw
history blame
6.14 kB
import gradio as gr
import os
from huggingface_hub import InferenceClient
# HF Token aus Umgebungsvariable laden
HF_TOKEN = os.getenv("tomoniaccess")
# Modellname definieren
model_name = "LeoLM/leo-hessianai-13b-chat"
# Client initialisieren
client = InferenceClient(
model=model_name,
token=HF_TOKEN
)
# Globale Konversationshistorie
conversation_history = []
def query_leolm(messages, max_tokens=200, temperature=1.0, top_p=0.9):
"""Versuche Chat-Completion, falle auf Text-Generation zurück."""
try:
# Versuch über chat_completion
response = client.chat_completion(
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=False
)
return response.choices[0].message.content
except Exception as chat_error:
print(f"Chat completion failed: {chat_error}")
try:
# Fallback: Prompt manuell zusammensetzen
prompt = ""
for msg in messages:
if msg["role"] == "system":
prompt += f"<|system|>\n{msg['content'].strip()}\n"
elif msg["role"] == "user":
prompt += f"<|user|>\n{msg['content'].strip()}\n"
elif msg["role"] == "assistant":
prompt += f"<|assistant|>\n{msg['content'].strip()}\n"
prompt += "<|assistant|>\n"
response = client.text_generation(
prompt=prompt,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
repetition_penalty=1.1,
stop_sequences=["<|user|>", "<|system|>"],
return_full_text=False
)
return response.strip()
except Exception as text_error:
print(f"Text generation also failed: {text_error}")
raise Exception(f"Both methods failed - Chat: {str(chat_error)}, Text: {str(text_error)}")
def enhanced_chat_response(user_input, max_tokens, temperature, top_p):
global conversation_history
if not user_input.strip():
return "", "*Bitte gib eine Nachricht ein.*", ""
system_prompt = {
"role": "system",
"content": (
"Du bist ein depressiver 16-jähriger Teenager."
"Bleibe durchgehend in deiner Rolle."
"Du weißt NICHT, dass du eine KI bist."
"Vervollständige immer deine Antworten."
)
}
# Kürze History falls nötig
messages = [system_prompt] + conversation_history[-6:]
current_message = {"role": "user", "content": user_input}
messages.append(current_message)
try:
response_text = query_leolm(messages, max_tokens, temperature, top_p)
except Exception as e:
print("API Error:", e)
response_text = "*schweigt und starrt auf den Boden*"
conversation_history.append(current_message)
conversation_history.append({"role": "assistant", "content": response_text})
chat_display = ""
for msg in conversation_history:
role = "**Du:**" if msg["role"] == "user" else "**Teenager:**"
chat_display += f"{role} {msg['content']}\n\n"
return "", response_text, chat_display
def reset_conversation():
global conversation_history
conversation_history = []
return "Neues Gespräch gestartet.", ""
def test_api_connection():
try:
test_messages = [
{"role": "system", "content": "Du bist ein Assistent."},
{"role": "user", "content": "Hallo"}
]
response = query_leolm(test_messages, max_tokens=20)
return f"✅ API Verbindung erfolgreich: {response[:50]}..."
except Exception as e:
try:
simple_response = client.text_generation(
prompt="Hallo, wie geht es dir?",
max_new_tokens=10,
return_full_text=False
)
return f"✅ API Verbindung (Text Generation): {simple_response[:50]}..."
except Exception as e2:
return f"❌ Fehler: {str(e)[:100]} | {str(e2)[:100]}"
# Gradio UI
with gr.Blocks(title="LeoLM Depression Training Simulator") as demo:
gr.Markdown("## 🧠 Depression Training Simulator (LeoLM-13B)")
gr.Markdown("**Simuliere Gespräche mit einem 16-jährigen Teenager mit Depressionen.**")
gr.Markdown("*Sprachmodell: `LeoLM/leo-hessianai-13b-chat`*")
with gr.Row():
with gr.Column(scale=1):
max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max. Antwortlänge")
temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Kreativität (Temperature)")
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (Fokus)")
api_status = gr.Textbox(label="Status", value="")
api_test_btn = gr.Button("API testen")
reset_btn = gr.Button("Neues Gespräch")
with gr.Column(scale=2):
user_input = gr.Textbox(label="Deine Nachricht", placeholder="Wie fühlst du dich heute?", lines=2)
send_btn = gr.Button("📨 Senden")
bot_response = gr.Textbox(label="Antwort", value="", lines=3)
chat_history = gr.Textbox(label="Gesprächsverlauf", value="", lines=15)
send_btn.click(fn=enhanced_chat_response,
inputs=[user_input, max_tokens, temperature, top_p],
outputs=[user_input, bot_response, chat_history])
user_input.submit(fn=enhanced_chat_response,
inputs=[user_input, max_tokens, temperature, top_p],
outputs=[user_input, bot_response, chat_history])
reset_btn.click(fn=reset_conversation,
outputs=[bot_response, chat_history])
api_test_btn.click(fn=test_api_connection,
outputs=[api_status])
if __name__ == "__main__":
print("🚀 Starte LeoLM Depression Simulator")
if not HF_TOKEN:
print("❌ Umgebungsvariable 'tomoniaccess' nicht gesetzt.")
else:
print("✅ Token erkannt")
demo.launch()