lfm2

Running

File size: 3,513 Bytes

56ccbf2
0d4961f
56ccbf2
 
09d1967
56ccbf2
09d1967
 
 
56ccbf2
09d1967
56ccbf2
09d1967
56ccbf2
 
 
 
 
0d4961f
56ccbf2
0d4961f
13f0e11
0d4961f
56ccbf2
 
09d1967
5feac3d
a7254c4
 
09d1967
5feac3d
c551a45
09d1967
56ccbf2
0d4961f
09d1967
56ccbf2
 
 
c551a45
b5555b7
0d4961f
56ccbf2
 
09d1967
56ccbf2
 
09d1967
13f0e11
56ccbf2
13f0e11
5feac3d
 
09d1967
56ccbf2
5feac3d
d048b7a
 
 
 
 
 
5feac3d
d048b7a
 
 
 
 
 
09d1967
c551a45
5feac3d
c551a45
 
 
09d1967
0d4961f
5feac3d
 
 
0d4961f
09d1967
d048b7a
 
5feac3d
 
 
 
d048b7a
 
 
5feac3d
09d1967
5feac3d
 
 
 
 
 
 
cb13191
5feac3d
 
d048b7a
09d1967
5feac3d
09d1967
56ccbf2
09d1967
56ccbf2
 
0d4961f
09d1967

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
from threading import Thread

MODEL_NAMES = {
    "LFM 350M": "LiquidAI/LFM2-350M",
    "LFM 700M": "LiquidAI/LFM2-700M",
    "LFM 1.2B": "LiquidAI/LFM2-1.2B",
}

model_cache = {}

def load_model(model_key):
    if model_key in model_cache:
        return model_cache[model_key]
    model_name = MODEL_NAMES[model_key]
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        dtype=torch.float16 if device == "cuda" else torch.float32,
    ).to(device)
    model_cache[model_key] = (tokenizer, model)
    return tokenizer, model

def chat_with_model(message, model_choice):
    tokenizer, model = load_model(model_choice)
    device = model.device
    
    # Absolute zero modification - your text goes straight to the AI
    prompt = message
    
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=1024,
        temperature=0.0,
        top_p=0.9,
        do_sample=True,
    )
    
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    
    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        # Returns exactly one exchange: [User message, AI response]
        yield [[message, partial_text]]

def create_demo():
    # WhatsApp-inspired "Creeper" Dark Theme
    custom_theme = gr.themes.Soft(
        primary_hue="green",
        neutral_hue="slate",
    ).set(
        body_background_fill="*neutral_950",
        block_background_fill="*neutral_900",
        block_border_width="1px",
        block_label_text_color="*primary_500",
        button_primary_background_fill="*primary_600",
    )

    with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo:
        gr.Markdown("# 🌿 Creeper AI Chatbot")
        
        model_choice = gr.Dropdown(
            label="AI Brain (LFM)",
            choices=list(MODEL_NAMES.keys()),
            value="LFM 1.2B"
        )
        
        chatbot = gr.Chatbot(
            label="Chat View",
            height=500,
            bubble_full_width=False
        )
        
        with gr.Row():
            msg = gr.Textbox(
                label="Message",
                placeholder="Type here...",
                scale=4,
                show_label=False
            )
            submit_btn = gr.Button("Send", variant="primary", scale=1)
        
        clear = gr.Button("Clear Screen")
        
        # This handles the "No Memory" logic: 
        # Every time you hit send, it ignores history and just runs the current message.
        def start_chat(user_message):
            return "", [[user_message, None]]

        msg.submit(start_chat, [msg], [msg, chatbot]).then(
            chat_with_model, [msg, model_choice], chatbot
        )
        submit_btn.click(start_chat, [msg], [msg, chatbot]).then(
            chat_with_model, [msg, model_choice], chatbot
        )
        
        clear.click(lambda: None, None, chatbot, queue=False)
        
    return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.queue()
    demo.launch(server_name="0.0.0.0", server_port=7860)