Spaces:
Running
Running
File size: 3,513 Bytes
56ccbf2 0d4961f 56ccbf2 09d1967 56ccbf2 09d1967 56ccbf2 09d1967 56ccbf2 09d1967 56ccbf2 0d4961f 56ccbf2 0d4961f 13f0e11 0d4961f 56ccbf2 09d1967 5feac3d a7254c4 09d1967 5feac3d c551a45 09d1967 56ccbf2 0d4961f 09d1967 56ccbf2 c551a45 b5555b7 0d4961f 56ccbf2 09d1967 56ccbf2 09d1967 13f0e11 56ccbf2 13f0e11 5feac3d 09d1967 56ccbf2 5feac3d d048b7a 5feac3d d048b7a 09d1967 c551a45 5feac3d c551a45 09d1967 0d4961f 5feac3d 0d4961f 09d1967 d048b7a 5feac3d d048b7a 5feac3d 09d1967 5feac3d cb13191 5feac3d d048b7a 09d1967 5feac3d 09d1967 56ccbf2 09d1967 56ccbf2 0d4961f 09d1967 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
from threading import Thread
MODEL_NAMES = {
"LFM 350M": "LiquidAI/LFM2-350M",
"LFM 700M": "LiquidAI/LFM2-700M",
"LFM 1.2B": "LiquidAI/LFM2-1.2B",
}
model_cache = {}
def load_model(model_key):
if model_key in model_cache:
return model_cache[model_key]
model_name = MODEL_NAMES[model_key]
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained(
model_name,
dtype=torch.float16 if device == "cuda" else torch.float32,
).to(device)
model_cache[model_key] = (tokenizer, model)
return tokenizer, model
def chat_with_model(message, model_choice):
tokenizer, model = load_model(model_choice)
device = model.device
# Absolute zero modification - your text goes straight to the AI
prompt = message
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=1024,
temperature=0.0,
top_p=0.9,
do_sample=True,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
# Returns exactly one exchange: [User message, AI response]
yield [[message, partial_text]]
def create_demo():
# WhatsApp-inspired "Creeper" Dark Theme
custom_theme = gr.themes.Soft(
primary_hue="green",
neutral_hue="slate",
).set(
body_background_fill="*neutral_950",
block_background_fill="*neutral_900",
block_border_width="1px",
block_label_text_color="*primary_500",
button_primary_background_fill="*primary_600",
)
with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo:
gr.Markdown("# 🌿 Creeper AI Chatbot")
model_choice = gr.Dropdown(
label="AI Brain (LFM)",
choices=list(MODEL_NAMES.keys()),
value="LFM 1.2B"
)
chatbot = gr.Chatbot(
label="Chat View",
height=500,
bubble_full_width=False
)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Type here...",
scale=4,
show_label=False
)
submit_btn = gr.Button("Send", variant="primary", scale=1)
clear = gr.Button("Clear Screen")
# This handles the "No Memory" logic:
# Every time you hit send, it ignores history and just runs the current message.
def start_chat(user_message):
return "", [[user_message, None]]
msg.submit(start_chat, [msg], [msg, chatbot]).then(
chat_with_model, [msg, model_choice], chatbot
)
submit_btn.click(start_chat, [msg], [msg, chatbot]).then(
chat_with_model, [msg, model_choice], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
return demo
if __name__ == "__main__":
demo = create_demo()
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860)
|