Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| import torch | |
| from threading import Thread | |
| MODEL_NAMES = { | |
| "LFM 350M": "LiquidAI/LFM2-350M", | |
| "LFM 700M": "LiquidAI/LFM2-700M", | |
| "LFM 1.2B": "LiquidAI/LFM2-1.2B", | |
| } | |
| model_cache = {} | |
| def load_model(model_key): | |
| if model_key in model_cache: | |
| return model_cache[model_key] | |
| model_name = MODEL_NAMES[model_key] | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| dtype=torch.float16 if device == "cuda" else torch.float32, | |
| ).to(device) | |
| model_cache[model_key] = (tokenizer, model) | |
| return tokenizer, model | |
| def chat_with_model(message, model_choice): | |
| tokenizer, model = load_model(model_choice) | |
| device = model.device | |
| # Absolute zero modification - your text goes straight to the AI | |
| prompt = message | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=1024, | |
| temperature=0.0, | |
| top_p=0.9, | |
| do_sample=True, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| # Returns exactly one exchange: [User message, AI response] | |
| yield [[message, partial_text]] | |
| def create_demo(): | |
| # WhatsApp-inspired "Creeper" Dark Theme | |
| custom_theme = gr.themes.Soft( | |
| primary_hue="green", | |
| neutral_hue="slate", | |
| ).set( | |
| body_background_fill="*neutral_950", | |
| block_background_fill="*neutral_900", | |
| block_border_width="1px", | |
| block_label_text_color="*primary_500", | |
| button_primary_background_fill="*primary_600", | |
| ) | |
| with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo: | |
| gr.Markdown("# 🌿 Creeper AI Chatbot") | |
| model_choice = gr.Dropdown( | |
| label="AI Brain (LFM)", | |
| choices=list(MODEL_NAMES.keys()), | |
| value="LFM 1.2B" | |
| ) | |
| chatbot = gr.Chatbot( | |
| label="Chat View", | |
| height=500, | |
| bubble_full_width=False | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Message", | |
| placeholder="Type here...", | |
| scale=4, | |
| show_label=False | |
| ) | |
| submit_btn = gr.Button("Send", variant="primary", scale=1) | |
| clear = gr.Button("Clear Screen") | |
| # This handles the "No Memory" logic: | |
| # Every time you hit send, it ignores history and just runs the current message. | |
| def start_chat(user_message): | |
| return "", [[user_message, None]] | |
| msg.submit(start_chat, [msg], [msg, chatbot]).then( | |
| chat_with_model, [msg, model_choice], chatbot | |
| ) | |
| submit_btn.click(start_chat, [msg], [msg, chatbot]).then( | |
| chat_with_model, [msg, model_choice], chatbot | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.queue() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |