lfm2

Running

lfm2 / app.py

CryptoCreeper

Update app.py

b5555b7 verified 7 days ago

3.51 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
	import torch
	from threading import Thread

	MODEL_NAMES = {
	"LFM 350M": "LiquidAI/LFM2-350M",
	"LFM 700M": "LiquidAI/LFM2-700M",
	"LFM 1.2B": "LiquidAI/LFM2-1.2B",
	}

	model_cache = {}

	def load_model(model_key):
	if model_key in model_cache:
	return model_cache[model_key]
	model_name = MODEL_NAMES[model_key]
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	dtype=torch.float16 if device == "cuda" else torch.float32,
	).to(device)
	model_cache[model_key] = (tokenizer, model)
	return tokenizer, model

	def chat_with_model(message, model_choice):
	tokenizer, model = load_model(model_choice)
	device = model.device

	# Absolute zero modification - your text goes straight to the AI
	prompt = message

	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	generation_kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=1024,
	temperature=0.0,
	top_p=0.9,
	do_sample=True,
	)

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	partial_text = ""
	for new_text in streamer:
	partial_text += new_text
	# Returns exactly one exchange: [User message, AI response]
	yield [[message, partial_text]]

	def create_demo():
	# WhatsApp-inspired "Creeper" Dark Theme
	custom_theme = gr.themes.Soft(
	primary_hue="green",
	neutral_hue="slate",
	).set(
	body_background_fill="*neutral_950",
	block_background_fill="*neutral_900",
	block_border_width="1px",
	block_label_text_color="*primary_500",
	button_primary_background_fill="*primary_600",
	)

	with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo:
	gr.Markdown("# 🌿 Creeper AI Chatbot")

	model_choice = gr.Dropdown(
	label="AI Brain (LFM)",
	choices=list(MODEL_NAMES.keys()),
	value="LFM 1.2B"
	)

	chatbot = gr.Chatbot(
	label="Chat View",
	height=500,
	bubble_full_width=False
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Type here...",
	scale=4,
	show_label=False
	)
	submit_btn = gr.Button("Send", variant="primary", scale=1)

	clear = gr.Button("Clear Screen")

	# This handles the "No Memory" logic:
	# Every time you hit send, it ignores history and just runs the current message.
	def start_chat(user_message):
	return "", [[user_message, None]]

	msg.submit(start_chat, [msg], [msg, chatbot]).then(
	chat_with_model, [msg, model_choice], chatbot
	)
	submit_btn.click(start_chat, [msg], [msg, chatbot]).then(
	chat_with_model, [msg, model_choice], chatbot
	)

	clear.click(lambda: None, None, chatbot, queue=False)

	return demo

	if __name__ == "__main__":
	demo = create_demo()
	demo.queue()
	demo.launch(server_name="0.0.0.0", server_port=7860)