import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer import torch from threading import Thread MODEL_NAMES = { "LFM 350M": "LiquidAI/LFM2-350M", "LFM 700M": "LiquidAI/LFM2-700M", "LFM 1.2B": "LiquidAI/LFM2-1.2B", } model_cache = {} def load_model(model_key): if model_key in model_cache: return model_cache[model_key] model_name = MODEL_NAMES[model_key] tokenizer = AutoTokenizer.from_pretrained(model_name) device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained( model_name, dtype=torch.float16 if device == "cuda" else torch.float32, ).to(device) model_cache[model_key] = (tokenizer, model) return tokenizer, model def chat_with_model(message, model_choice): tokenizer, model = load_model(model_choice) device = model.device # Absolute zero modification - your text goes straight to the AI prompt = message streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) inputs = tokenizer(prompt, return_tensors="pt").to(device) generation_kwargs = dict( **inputs, streamer=streamer, max_new_tokens=1024, temperature=0.0, top_p=0.9, do_sample=True, ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() partial_text = "" for new_text in streamer: partial_text += new_text # Returns exactly one exchange: [User message, AI response] yield [[message, partial_text]] def create_demo(): # WhatsApp-inspired "Creeper" Dark Theme custom_theme = gr.themes.Soft( primary_hue="green", neutral_hue="slate", ).set( body_background_fill="*neutral_950", block_background_fill="*neutral_900", block_border_width="1px", block_label_text_color="*primary_500", button_primary_background_fill="*primary_600", ) with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo: gr.Markdown("# 🌿 Creeper AI Chatbot") model_choice = gr.Dropdown( label="AI Brain (LFM)", choices=list(MODEL_NAMES.keys()), value="LFM 1.2B" ) chatbot = gr.Chatbot( label="Chat View", height=500, bubble_full_width=False ) with gr.Row(): msg = gr.Textbox( label="Message", placeholder="Type here...", scale=4, show_label=False ) submit_btn = gr.Button("Send", variant="primary", scale=1) clear = gr.Button("Clear Screen") # This handles the "No Memory" logic: # Every time you hit send, it ignores history and just runs the current message. def start_chat(user_message): return "", [[user_message, None]] msg.submit(start_chat, [msg], [msg, chatbot]).then( chat_with_model, [msg, model_choice], chatbot ) submit_btn.click(start_chat, [msg], [msg, chatbot]).then( chat_with_model, [msg, model_choice], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) return demo if __name__ == "__main__": demo = create_demo() demo.queue() demo.launch(server_name="0.0.0.0", server_port=7860)