lfm2 / app.py
CryptoCreeper
Update app.py
b5555b7 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
from threading import Thread
MODEL_NAMES = {
"LFM 350M": "LiquidAI/LFM2-350M",
"LFM 700M": "LiquidAI/LFM2-700M",
"LFM 1.2B": "LiquidAI/LFM2-1.2B",
}
model_cache = {}
def load_model(model_key):
if model_key in model_cache:
return model_cache[model_key]
model_name = MODEL_NAMES[model_key]
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained(
model_name,
dtype=torch.float16 if device == "cuda" else torch.float32,
).to(device)
model_cache[model_key] = (tokenizer, model)
return tokenizer, model
def chat_with_model(message, model_choice):
tokenizer, model = load_model(model_choice)
device = model.device
# Absolute zero modification - your text goes straight to the AI
prompt = message
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=1024,
temperature=0.0,
top_p=0.9,
do_sample=True,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
# Returns exactly one exchange: [User message, AI response]
yield [[message, partial_text]]
def create_demo():
# WhatsApp-inspired "Creeper" Dark Theme
custom_theme = gr.themes.Soft(
primary_hue="green",
neutral_hue="slate",
).set(
body_background_fill="*neutral_950",
block_background_fill="*neutral_900",
block_border_width="1px",
block_label_text_color="*primary_500",
button_primary_background_fill="*primary_600",
)
with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo:
gr.Markdown("# 🌿 Creeper AI Chatbot")
model_choice = gr.Dropdown(
label="AI Brain (LFM)",
choices=list(MODEL_NAMES.keys()),
value="LFM 1.2B"
)
chatbot = gr.Chatbot(
label="Chat View",
height=500,
bubble_full_width=False
)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Type here...",
scale=4,
show_label=False
)
submit_btn = gr.Button("Send", variant="primary", scale=1)
clear = gr.Button("Clear Screen")
# This handles the "No Memory" logic:
# Every time you hit send, it ignores history and just runs the current message.
def start_chat(user_message):
return "", [[user_message, None]]
msg.submit(start_chat, [msg], [msg, chatbot]).then(
chat_with_model, [msg, model_choice], chatbot
)
submit_btn.click(start_chat, [msg], [msg, chatbot]).then(
chat_with_model, [msg, model_choice], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
return demo
if __name__ == "__main__":
demo = create_demo()
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860)