clem HF Staff commited on
Commit
36e55b3
·
verified ·
1 Parent(s): 4278ae6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from threading import Thread
3
+ import gradio as gr
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+
7
+ MODEL_ID = "clem/macron-style-qwen2.5-1.5B"
8
+
9
+ try:
10
+ import spaces
11
+ HAS_SPACES = True
12
+ except ImportError:
13
+ HAS_SPACES = False
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ MODEL_ID,
18
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
19
+ device_map="auto",
20
+ use_cache=True,
21
+ )
22
+
23
+
24
+ def _generate(input_ids, max_new_tokens, temperature, top_p):
25
+ streamer = TextIteratorStreamer(
26
+ tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
27
+ )
28
+ generate_kwargs = dict(
29
+ input_ids=input_ids.to(model.device),
30
+ streamer=streamer,
31
+ max_new_tokens=max_new_tokens,
32
+ do_sample=True,
33
+ temperature=temperature,
34
+ top_p=top_p,
35
+ eos_token_id=tokenizer.eos_token_id,
36
+ )
37
+ Thread(target=model.generate, kwargs=generate_kwargs, daemon=True).start()
38
+ output = ""
39
+ for chunk in streamer:
40
+ output += chunk
41
+ yield output
42
+
43
+
44
+ if HAS_SPACES:
45
+ _generate = spaces.GPU(_generate)
46
+
47
+
48
+ def respond(
49
+ message: str,
50
+ chat_history: list[dict],
51
+ system_prompt: str,
52
+ max_new_tokens: int,
53
+ temperature: float,
54
+ top_p: float,
55
+ ):
56
+ conversation = []
57
+ if system_prompt.strip():
58
+ conversation.append({"role": "system", "content": system_prompt})
59
+ conversation.extend(chat_history)
60
+ conversation.append({"role": "user", "content": message})
61
+
62
+ input_ids = tokenizer.apply_chat_template(
63
+ conversation,
64
+ add_generation_prompt=True,
65
+ return_tensors="pt",
66
+ )
67
+ yield from _generate(input_ids, max_new_tokens, temperature, top_p)
68
+
69
+
70
+ demo = gr.ChatInterface(
71
+ fn=respond,
72
+ type="messages",
73
+ chatbot=gr.Chatbot(height=500, type="messages"),
74
+ additional_inputs=[
75
+ gr.Textbox(
76
+ value="You are Emmanuel Macron, President of the French Republic. Respond in his characteristic style: eloquent, diplomatic yet direct, reformist, and deeply European.",
77
+ label="System prompt",
78
+ lines=3,
79
+ ),
80
+ gr.Slider(64, 1024, value=256, step=64, label="Max new tokens"),
81
+ gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
82
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
83
+ ],
84
+ examples=[
85
+ ["What is your vision for Europe?"],
86
+ ["Comment voyez-vous le rôle de l'IA dans la société ?"],
87
+ ["How do you respond to critics of your reform agenda?"],
88
+ ],
89
+ cache_examples=False,
90
+ title="💬 Macron-style Qwen2.5-1.5B",
91
+ description="A Qwen2.5-1.5B fine-tuned to speak in the style of Emmanuel Macron. Trained on [clem/macron-style-conversations](https://hf.co/datasets/clem/macron-style-conversations).",
92
+ )
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch()