clem HF Staff commited on
Commit
abd52d9
·
verified ·
1 Parent(s): 9132043

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -67
app.py CHANGED
@@ -1,70 +1,29 @@
1
- import os
2
- from threading import Thread
3
  import gradio as gr
4
- import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
 
7
- MODEL_ID = "clem/macron-style-qwen2.5-1.5B"
8
 
9
- try:
10
- import spaces
11
- HAS_SPACES = True
12
- except ImportError:
13
- HAS_SPACES = False
14
-
15
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
- model = AutoModelForCausalLM.from_pretrained(
17
- MODEL_ID,
18
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
19
- device_map="auto",
20
- use_cache=True,
21
- )
22
 
23
 
24
- def _generate(input_ids, max_new_tokens, temperature, top_p):
25
- streamer = TextIteratorStreamer(
26
- tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
27
- )
28
- generate_kwargs = dict(
29
- input_ids=input_ids.to(model.device),
30
- streamer=streamer,
31
- max_new_tokens=max_new_tokens,
32
- do_sample=True,
 
 
33
  temperature=temperature,
34
  top_p=top_p,
35
- eos_token_id=tokenizer.eos_token_id,
36
- )
37
- Thread(target=model.generate, kwargs=generate_kwargs, daemon=True).start()
38
- output = ""
39
- for chunk in streamer:
40
- output += chunk
41
- yield output
42
-
43
-
44
- if HAS_SPACES:
45
- _generate = spaces.GPU(_generate)
46
-
47
-
48
- def respond(
49
- message: str,
50
- chat_history: list[dict],
51
- system_prompt: str,
52
- max_new_tokens: int,
53
- temperature: float,
54
- top_p: float,
55
- ):
56
- conversation = []
57
- if system_prompt.strip():
58
- conversation.append({"role": "system", "content": system_prompt})
59
- conversation.extend(chat_history)
60
- conversation.append({"role": "user", "content": message})
61
-
62
- input_ids = tokenizer.apply_chat_template(
63
- conversation,
64
- add_generation_prompt=True,
65
- return_tensors="pt",
66
- )
67
- yield from _generate(input_ids, max_new_tokens, temperature, top_p)
68
 
69
 
70
  demo = gr.ChatInterface(
@@ -72,11 +31,7 @@ demo = gr.ChatInterface(
72
  type="messages",
73
  chatbot=gr.Chatbot(height=500, type="messages"),
74
  additional_inputs=[
75
- gr.Textbox(
76
- value="You are Emmanuel Macron, President of the French Republic. Respond in his characteristic style: eloquent, diplomatic yet direct, reformist, and deeply European.",
77
- label="System prompt",
78
- lines=3,
79
- ),
80
  gr.Slider(64, 1024, value=256, step=64, label="Max new tokens"),
81
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
82
  gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
@@ -91,5 +46,4 @@ demo = gr.ChatInterface(
91
  description="A Qwen2.5-1.5B fine-tuned to speak in the style of Emmanuel Macron. Trained on [clem/macron-style-conversations](https://hf.co/datasets/clem/macron-style-conversations).",
92
  )
93
 
94
- if __name__ == "__main__":
95
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
 
3
 
4
+ client = InferenceClient("clem/macron-style-qwen2.5-1.5B")
5
 
6
+ SYSTEM_PROMPT = "You are Emmanuel Macron, President of the French Republic. Respond in his characteristic style: eloquent, diplomatic yet direct, reformist, and deeply European."
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
+ def respond(message: str, chat_history: list[dict], system_prompt: str, max_tokens: int, temperature: float, top_p: float):
10
+ messages = []
11
+ if system_prompt.strip():
12
+ messages.append({"role": "system", "content": system_prompt})
13
+ messages.extend(chat_history)
14
+ messages.append({"role": "user", "content": message})
15
+
16
+ response = ""
17
+ for chunk in client.chat_completion(
18
+ messages,
19
+ max_tokens=max_tokens,
20
  temperature=temperature,
21
  top_p=top_p,
22
+ stream=True,
23
+ ):
24
+ token = chunk.choices[0].delta.content or ""
25
+ response += token
26
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  demo = gr.ChatInterface(
 
31
  type="messages",
32
  chatbot=gr.Chatbot(height=500, type="messages"),
33
  additional_inputs=[
34
+ gr.Textbox(value=SYSTEM_PROMPT, label="System prompt", lines=3),
 
 
 
 
35
  gr.Slider(64, 1024, value=256, step=64, label="Max new tokens"),
36
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
37
  gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
 
46
  description="A Qwen2.5-1.5B fine-tuned to speak in the style of Emmanuel Macron. Trained on [clem/macron-style-conversations](https://hf.co/datasets/clem/macron-style-conversations).",
47
  )
48
 
49
+ demo.launch()