# ============================= # Shay Chatbot — Hugging Face Space # ============================= import warnings warnings.filterwarnings("ignore", category=ResourceWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # ----------------------------- # Model Lazy Loading # ----------------------------- MODEL_ID = "your-username/Shay" model = None tokenizer = None def load_model(): global model, tokenizer if model is None or tokenizer is None: tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, rope_scaling={"type": "dynamic", "factor": 10.0} ) # ----------------------------- # Chat Function # ----------------------------- def generate_chat(user_message, history, max_tokens, temperature, top_p, top_k): load_model() if history is None: history = [] # Build prompt including full conversation prompt = "<|system|>You are Shay, an intelligent, unbiased, emotionless AI assistant.\n" for u, b in history: prompt += f"<|user|>{u}<|end|>\n<|assistant|>{b}<|end|>\n" prompt += f"<|user|>{user_message}<|end|>\n<|assistant|>" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) output = model.generate( **inputs, max_new_tokens=int(max_tokens), temperature=float(temperature), top_p=float(top_p), top_k=int(top_k), repetition_penalty=1.1, do_sample=True ) reply = tokenizer.decode(output[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip() history.append((user_message, reply)) return history # ----------------------------- # Utility Functions # ----------------------------- def copy_last(history): return history[-1][1] if history else "" def copy_all(history): return "\n".join([f"User: {u}\nShay: {b}" for u, b in history]) if history else "" # ----------------------------- # Dark Theme CSS # ----------------------------- DARK_CSS = """ body { background-color: #111; color: #eee; } .gradio-container { background-color: #111; max-width: 100% !important; } .gradio-chatbot { background-color: #222; color: #eee; } input, textarea { background-color: #222; color: #eee; border: 1px solid #555; } button { background-color: #333; color: #eee; border: 1px solid #555; } """ # ----------------------------- # Gradio UI # ----------------------------- with gr.Blocks() as app: gr.Markdown( "## Shay — Ultra Reliable AI Assistant\n" "Unbiased, emotionless, and able to converse on any topic.\n\n" "**Adjust generation parameters below:**" ) chatbot = gr.Chatbot(height=600) user_input = gr.Textbox(placeholder="Type your message here...", container=False, scale=8) send_btn = gr.Button("Send", variant="primary", scale=2) clear_btn = gr.Button("Clear", variant="secondary") copy_last_btn = gr.Button("Copy Last Message", variant="secondary") copy_all_btn = gr.Button("Copy Full History", variant="secondary") # Sliders for generation parameters max_tokens_slider = gr.Slider(32, 1024, value=256, step=32, label="Max New Tokens") temperature_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature") top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.01, label="Top-p") top_k_slider = gr.Slider(1, 200, value=50, step=1, label="Top-k") # Chat actions user_input.submit( generate_chat, [user_input, chatbot, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider], chatbot ) send_btn.click( generate_chat, [user_input, chatbot, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider], chatbot ) clear_btn.click(lambda: None, None, chatbot, queue=False) copy_last_btn.click(copy_last, chatbot, None) copy_all_btn.click(copy_all, chatbot, None) # Queue to handle multiple users efficiently app.queue(max_size=64) # ----------------------------- # Launch App (Single Launch) # ----------------------------- app.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, css=DARK_CSS)