Spaces:
Running
Running
| import threading | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| import torch | |
| model_id = "LiquidAI/LFM2.5-1.2B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, device_map="auto", dtype=torch.bfloat16 | |
| ) | |
| def chat(message, history): | |
| messages = [] | |
| for item in history: | |
| if isinstance(item, dict): | |
| messages.append(item) | |
| else: | |
| user_msg, assistant_msg = item | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| encoded = tokenizer.apply_chat_template( | |
| messages, add_generation_prompt=True, | |
| return_tensors="pt", tokenize=True | |
| ) | |
| input_ids = (encoded.input_ids if hasattr(encoded, "input_ids") else encoded).to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| thread = threading.Thread(target=model.generate, kwargs=dict( | |
| input_ids=input_ids, | |
| do_sample=True, | |
| temperature=0.1, | |
| top_k=50, | |
| repetition_penalty=1.05, | |
| max_new_tokens=512, | |
| streamer=streamer, | |
| )) | |
| thread.start() | |
| partial = "" | |
| for token in streamer: | |
| partial += token | |
| yield partial | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="LFM2.5 Chat", | |
| description="Chat avec le modèle LiquidAI LFM2.5-1.2B-Instruct", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |