Spaces:

Abigail45
/

Chee-Lolipop

Runtime error

App Files Files Community

Chee-Lolipop / app.py

Abigail45

Update app.py

afbc8a3 verified 2 months ago

raw

history blame contribute delete

4.42 kB

	# =============================
	# Shay Chatbot — Hugging Face Space
	# =============================

	import warnings
	warnings.filterwarnings("ignore", category=ResourceWarning)
	warnings.filterwarnings("ignore", category=RuntimeWarning)

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# -----------------------------
	# Model Lazy Loading
	# -----------------------------
	MODEL_ID = "your-username/Shay"
	model = None
	tokenizer = None

	def load_model():
	global model, tokenizer
	if model is None or tokenizer is None:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	rope_scaling={"type": "dynamic", "factor": 10.0}
	)

	# -----------------------------
	# Chat Function
	# -----------------------------
	def generate_chat(user_message, history, max_tokens, temperature, top_p, top_k):
	load_model()
	if history is None:
	history = []

	# Build prompt including full conversation
	prompt = "<\|system\|>You are Shay, an intelligent, unbiased, emotionless AI assistant.\n"
	for u, b in history:
	prompt += f"<\|user\|>{u}<\|end\|>\n<\|assistant\|>{b}<\|end\|>\n"
	prompt += f"<\|user\|>{user_message}<\|end\|>\n<\|assistant\|>"

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	output = model.generate(
	**inputs,
	max_new_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	top_k=int(top_k),
	repetition_penalty=1.1,
	do_sample=True
	)

	reply = tokenizer.decode(output[0], skip_special_tokens=True).split("<\|assistant\|>")[-1].strip()
	history.append((user_message, reply))
	return history

	# -----------------------------
	# Utility Functions
	# -----------------------------
	def copy_last(history):
	return history[-1][1] if history else ""

	def copy_all(history):
	return "\n".join([f"User: {u}\nShay: {b}" for u, b in history]) if history else ""

	# -----------------------------
	# Dark Theme CSS
	# -----------------------------
	DARK_CSS = """
	body { background-color: #111; color: #eee; }
	.gradio-container { background-color: #111; max-width: 100% !important; }
	.gradio-chatbot { background-color: #222; color: #eee; }
	input, textarea { background-color: #222; color: #eee; border: 1px solid #555; }
	button { background-color: #333; color: #eee; border: 1px solid #555; }
	"""

	# -----------------------------
	# Gradio UI
	# -----------------------------
	with gr.Blocks() as app:
	gr.Markdown(
	"## Shay — Ultra Reliable AI Assistant\n"
	"Unbiased, emotionless, and able to converse on any topic.\n\n"
	"Adjust generation parameters below:"
	)

	chatbot = gr.Chatbot(height=600)
	user_input = gr.Textbox(placeholder="Type your message here...", container=False, scale=8)
	send_btn = gr.Button("Send", variant="primary", scale=2)
	clear_btn = gr.Button("Clear", variant="secondary")
	copy_last_btn = gr.Button("Copy Last Message", variant="secondary")
	copy_all_btn = gr.Button("Copy Full History", variant="secondary")

	# Sliders for generation parameters
	max_tokens_slider = gr.Slider(32, 1024, value=256, step=32, label="Max New Tokens")
	temperature_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
	top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.01, label="Top-p")
	top_k_slider = gr.Slider(1, 200, value=50, step=1, label="Top-k")

	# Chat actions
	user_input.submit(
	generate_chat,
	[user_input, chatbot, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider],
	chatbot
	)
	send_btn.click(
	generate_chat,
	[user_input, chatbot, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider],
	chatbot
	)
	clear_btn.click(lambda: None, None, chatbot, queue=False)
	copy_last_btn.click(copy_last, chatbot, None)
	copy_all_btn.click(copy_all, chatbot, None)

	# Queue to handle multiple users efficiently
	app.queue(max_size=64)

	# -----------------------------
	# Launch App (Single Launch)
	# -----------------------------
	app.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, css=DARK_CSS)