File size: 1,647 Bytes
f9b70b8
58457ef
f9b70b8
58457ef
 
 
 
 
52f4788
58457ef
 
 
52f4788
c45e8d4
 
 
 
 
 
 
 
52f4788
58457ef
50ab95f
58457ef
 
50ab95f
 
58457ef
f9b70b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58457ef
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import threading
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import torch

model_id = "LiquidAI/LFM2.5-1.2B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, device_map="auto", dtype=torch.bfloat16
)

def chat(message, history):
    messages = []
    for item in history:
        if isinstance(item, dict):
            messages.append(item)
        else:
            user_msg, assistant_msg = item
            messages.append({"role": "user", "content": user_msg})
            if assistant_msg:
                messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    encoded = tokenizer.apply_chat_template(
        messages, add_generation_prompt=True,
        return_tensors="pt", tokenize=True
    )
    input_ids = (encoded.input_ids if hasattr(encoded, "input_ids") else encoded).to(model.device)

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    thread = threading.Thread(target=model.generate, kwargs=dict(
        input_ids=input_ids,
        do_sample=True,
        temperature=0.1,
        top_k=50,
        repetition_penalty=1.05,
        max_new_tokens=512,
        streamer=streamer,
    ))
    thread.start()

    partial = ""
    for token in streamer:
        partial += token
        yield partial

demo = gr.ChatInterface(
    fn=chat,
    title="LFM2.5 Chat",
    description="Chat avec le modèle LiquidAI LFM2.5-1.2B-Instruct",
)

if __name__ == "__main__":
    demo.launch()