Spaces:

ai-tomoni
/

project-green

Runtime error

App Files Files

project-green / app-leo.py

ai-tomoni

Rename app.py to app-leo.py

5a55ccb verified 6 months ago

raw

history blame

6.14 kB

	import gradio as gr
	import os
	from huggingface_hub import InferenceClient

	# HF Token aus Umgebungsvariable laden
	HF_TOKEN = os.getenv("tomoniaccess")

	# Modellname definieren
	model_name = "LeoLM/leo-hessianai-13b-chat"

	# Client initialisieren
	client = InferenceClient(
	model=model_name,
	token=HF_TOKEN
	)

	# Globale Konversationshistorie
	conversation_history = []

	def query_leolm(messages, max_tokens=200, temperature=1.0, top_p=0.9):
	"""Versuche Chat-Completion, falle auf Text-Generation zurück."""
	try:
	# Versuch über chat_completion
	response = client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	stream=False
	)
	return response.choices[0].message.content
	except Exception as chat_error:
	print(f"Chat completion failed: {chat_error}")
	try:
	# Fallback: Prompt manuell zusammensetzen
	prompt = ""
	for msg in messages:
	if msg["role"] == "system":
	prompt += f"<\|system\|>\n{msg['content'].strip()}\n"
	elif msg["role"] == "user":
	prompt += f"<\|user\|>\n{msg['content'].strip()}\n"
	elif msg["role"] == "assistant":
	prompt += f"<\|assistant\|>\n{msg['content'].strip()}\n"
	prompt += "<\|assistant\|>\n"

	response = client.text_generation(
	prompt=prompt,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	repetition_penalty=1.1,
	stop_sequences=["<\|user\|>", "<\|system\|>"],
	return_full_text=False
	)
	return response.strip()
	except Exception as text_error:
	print(f"Text generation also failed: {text_error}")
	raise Exception(f"Both methods failed - Chat: {str(chat_error)}, Text: {str(text_error)}")

	def enhanced_chat_response(user_input, max_tokens, temperature, top_p):
	global conversation_history

	if not user_input.strip():
	return "", "Bitte gib eine Nachricht ein.", ""

	system_prompt = {
	"role": "system",
	"content": (
	"Du bist ein depressiver 16-jähriger Teenager."
	"Bleibe durchgehend in deiner Rolle."
	"Du weißt NICHT, dass du eine KI bist."
	"Vervollständige immer deine Antworten."
	)
	}

	# Kürze History falls nötig
	messages = [system_prompt] + conversation_history[-6:]
	current_message = {"role": "user", "content": user_input}
	messages.append(current_message)

	try:
	response_text = query_leolm(messages, max_tokens, temperature, top_p)
	except Exception as e:
	print("API Error:", e)
	response_text = "schweigt und starrt auf den Boden"

	conversation_history.append(current_message)
	conversation_history.append({"role": "assistant", "content": response_text})

	chat_display = ""
	for msg in conversation_history:
	role = "Du:" if msg["role"] == "user" else "Teenager:"
	chat_display += f"{role} {msg['content']}\n\n"

	return "", response_text, chat_display

	def reset_conversation():
	global conversation_history
	conversation_history = []
	return "Neues Gespräch gestartet.", ""

	def test_api_connection():
	try:
	test_messages = [
	{"role": "system", "content": "Du bist ein Assistent."},
	{"role": "user", "content": "Hallo"}
	]
	response = query_leolm(test_messages, max_tokens=20)
	return f"✅ API Verbindung erfolgreich: {response[:50]}..."
	except Exception as e:
	try:
	simple_response = client.text_generation(
	prompt="Hallo, wie geht es dir?",
	max_new_tokens=10,
	return_full_text=False
	)
	return f"✅ API Verbindung (Text Generation): {simple_response[:50]}..."
	except Exception as e2:
	return f"❌ Fehler: {str(e)[:100]} \| {str(e2)[:100]}"

	# Gradio UI
	with gr.Blocks(title="LeoLM Depression Training Simulator") as demo:
	gr.Markdown("## 🧠 Depression Training Simulator (LeoLM-13B)")
	gr.Markdown("Simuliere Gespräche mit einem 16-jährigen Teenager mit Depressionen.")
	gr.Markdown("Sprachmodell: `LeoLM/leo-hessianai-13b-chat`")

	with gr.Row():
	with gr.Column(scale=1):
	max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max. Antwortlänge")
	temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Kreativität (Temperature)")
	top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (Fokus)")

	api_status = gr.Textbox(label="Status", value="")
	api_test_btn = gr.Button("API testen")
	reset_btn = gr.Button("Neues Gespräch")

	with gr.Column(scale=2):
	user_input = gr.Textbox(label="Deine Nachricht", placeholder="Wie fühlst du dich heute?", lines=2)
	send_btn = gr.Button("📨 Senden")
	bot_response = gr.Textbox(label="Antwort", value="", lines=3)
	chat_history = gr.Textbox(label="Gesprächsverlauf", value="", lines=15)

	send_btn.click(fn=enhanced_chat_response,
	inputs=[user_input, max_tokens, temperature, top_p],
	outputs=[user_input, bot_response, chat_history])

	user_input.submit(fn=enhanced_chat_response,
	inputs=[user_input, max_tokens, temperature, top_p],
	outputs=[user_input, bot_response, chat_history])

	reset_btn.click(fn=reset_conversation,
	outputs=[bot_response, chat_history])

	api_test_btn.click(fn=test_api_connection,
	outputs=[api_status])

	if __name__ == "__main__":
	print("🚀 Starte LeoLM Depression Simulator")
	if not HF_TOKEN:
	print("❌ Umgebungsvariable 'tomoniaccess' nicht gesetzt.")
	else:
	print("✅ Token erkannt")
	demo.launch()