File size: 2,097 Bytes
38542d3 312457f 38542d3 312457f 38542d3 36cdc2a 935c8c9 38542d3 935c8c9 38542d3 36cdc2a 312457f 36cdc2a 312457f 36cdc2a 312457f 36cdc2a 312457f 36cdc2a 312457f 36cdc2a 312457f 935c8c9 38542d3 36cdc2a 38542d3 36cdc2a 38542d3 ae47978 38542d3 935c8c9 36cdc2a 38542d3 e093c03 36cdc2a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import torch
from threading import Thread
# Load model
model_id = "LiquidAI/LFM2-700M" # Balanced speed and quality
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
dtype=torch.float32,
device_map="cpu"
)
print("Model loaded!")
def chat(message, history):
"""Gradio chat interface with streaming"""
messages = []
# Build message history
if history:
for entry in history:
if isinstance(entry, dict):
messages.append(entry)
elif isinstance(entry, (list, tuple)) and len(entry) >= 2:
messages.append({"role": "user", "content": entry[0]})
if entry[1]:
messages.append({"role": "assistant", "content": entry[1]})
messages.append({"role": "user", "content": message})
# Prepare for streaming
inputs = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
add_generation_prompt=True
)
# Setup streamer
streamer = TextIteratorStreamer(
tokenizer,
skip_special_tokens=True,
skip_prompt=True
)
generation_kwargs = {
"inputs": inputs,
"max_new_tokens": 512,
"temperature": 0.7,
"top_p": 0.9,
"do_sample": True,
"streamer": streamer
}
# Start generation in thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Stream tokens as they're generated
partial_text = ""
for new_text in streamer:
partial_text += new_text
yield partial_text
# Create Gradio interface
demo = gr.ChatInterface(
fn=chat,
title="LFM2-700M Chatbot (Streaming)",
description="Chat with Liquid AI's LFM2-700M - balanced speed and quality",
examples=["Hello!", "Explain AI", "Write a Python function"]
)
if __name__ == "__main__":
demo.launch()
|