Azzindani commited on
Commit
40a19a5
·
verified ·
1 Parent(s): 25ff041

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -28
app.py CHANGED
@@ -1,30 +1,31 @@
 
 
1
  import gradio as gr
2
  import os
3
  import spaces
 
4
  from transformers import GemmaTokenizer, AutoModelForCausalLM
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  from threading import Thread
7
 
8
- # Set an environment variable
9
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
-
11
 
12
  DESCRIPTION = '''
13
  <div>
14
- <h1 style="text-align: center;">deepseek-ai/DeepSeek-R1-Distill-Llama-8B</h1>
 
15
  </div>
16
  '''
17
 
18
  LICENSE = """
19
  <p/>
20
-
21
  ---
 
22
  """
23
 
24
  PLACEHOLDER = """
25
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
26
- <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">DeepSeek-R1-Distill-Llama-8B</h1>
27
- <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
28
  </div>
29
  """
30
 
@@ -34,7 +35,6 @@ h1 {
34
  text-align: center;
35
  display: block;
36
  }
37
-
38
  #duplicate-button {
39
  margin: auto;
40
  color: white;
@@ -43,17 +43,11 @@ h1 {
43
  }
44
  """
45
 
46
- model_path = "Azzindani/Qwen2.5_1.5B_Instruct_ID_Legal"
47
-
48
  # Load the tokenizer and model
49
- tokenizer = AutoTokenizer.from_pretrained(model_path)
50
- model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto") # to("cuda:0")
51
- terminators = [
52
- tokenizer.eos_token_id,
53
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
54
- ]
55
 
56
- @spaces.GPU(duration=120)
57
  def chat_llama3_8b(message: str,
58
  history: list,
59
  temperature: float,
@@ -70,11 +64,12 @@ def chat_llama3_8b(message: str,
70
  str: The generated response.
71
  """
72
  conversation = []
 
73
  for user, assistant in history:
74
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
75
  conversation.append({"role": "user", "content": message})
76
 
77
- input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
78
 
79
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
80
 
@@ -84,7 +79,8 @@ def chat_llama3_8b(message: str,
84
  max_new_tokens=max_new_tokens,
85
  do_sample=True,
86
  temperature=temperature,
87
- eos_token_id=terminators,
 
88
  )
89
  # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
90
  if temperature == 0:
@@ -96,7 +92,7 @@ def chat_llama3_8b(message: str,
96
  outputs = []
97
  for text in streamer:
98
  outputs.append(text)
99
- #print(outputs)
100
  yield "".join(outputs)
101
 
102
 
@@ -106,6 +102,7 @@ chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterf
106
  with gr.Blocks(fill_height=True, css=css) as demo:
107
 
108
  gr.Markdown(DESCRIPTION)
 
109
  gr.ChatInterface(
110
  fn=chat_llama3_8b,
111
  chatbot=chatbot,
@@ -115,7 +112,7 @@ with gr.Blocks(fill_height=True, css=css) as demo:
115
  gr.Slider(minimum=0,
116
  maximum=1,
117
  step=0.1,
118
- value=0.5,
119
  label="Temperature",
120
  render=False),
121
  gr.Slider(minimum=128,
@@ -126,11 +123,11 @@ with gr.Blocks(fill_height=True, css=css) as demo:
126
  render=False ),
127
  ],
128
  examples=[
129
- ['How to setup a human base on Mars? Give short answer.'],
130
- ['Explain theory of relativity to me like I’m 8 years old.'],
131
- ['What is 9,000 * 9,000?'],
132
- ['Write a pun-filled happy birthday message to my friend Alex.'],
133
- ['Justify why a penguin might make a good king of the jungle.']
134
  ],
135
  cache_examples=False,
136
  )
@@ -138,5 +135,4 @@ with gr.Blocks(fill_height=True, css=css) as demo:
138
  gr.Markdown(LICENSE)
139
 
140
  if __name__ == "__main__":
141
- demo.launch()
142
-
 
1
+ # Ref: https://huggingface.co/spaces/ysharma/Chat_with_Meta_llama3_8b
2
+
3
  import gradio as gr
4
  import os
5
  import spaces
6
+ import torch
7
  from transformers import GemmaTokenizer, AutoModelForCausalLM
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
  from threading import Thread
10
 
 
 
 
11
 
12
  DESCRIPTION = '''
13
  <div>
14
+ <h1 style="text-align: center;">非公式Llama-3.1-Swallow-8B-Instruct-v0.1</h1>
15
+ <p>tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1の非公式デモだよ。 <a href="https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1"><b>tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1</b></a>.</p>
16
  </div>
17
  '''
18
 
19
  LICENSE = """
20
  <p/>
 
21
  ---
22
+ Built with Meta Llama 3.1
23
  """
24
 
25
  PLACEHOLDER = """
26
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
27
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3.1</h1>
28
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">なんでもきいてね</p>
29
  </div>
30
  """
31
 
 
35
  text-align: center;
36
  display: block;
37
  }
 
38
  #duplicate-button {
39
  margin: auto;
40
  color: white;
 
43
  }
44
  """
45
 
 
 
46
  # Load the tokenizer and model
47
+ tokenizer = AutoTokenizer.from_pretrained("Azzindani/Qwen2.5_1.5B_Instruct_ID_Legal")
48
+ model = AutoModelForCausalLM.from_pretrained("Azzindani/Qwen2.5_1.5B_Instruct_ID_Legal", torch_dtype=torch.bfloat16, device_map="auto")
 
 
 
 
49
 
50
+ @spaces.GPU()
51
  def chat_llama3_8b(message: str,
52
  history: list,
53
  temperature: float,
 
64
  str: The generated response.
65
  """
66
  conversation = []
67
+ conversation.append({"role": "system", "content": "あなたは誠実で優秀な日本人のアシスタントです。日本語で聞かれた場合、必ず日本語で返答してください。"})
68
  for user, assistant in history:
69
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
70
  conversation.append({"role": "user", "content": message})
71
 
72
+ input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
73
 
74
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
75
 
 
79
  max_new_tokens=max_new_tokens,
80
  do_sample=True,
81
  temperature=temperature,
82
+ top_p=0.9,
83
+ repetition_penalty=1.1,
84
  )
85
  # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
86
  if temperature == 0:
 
92
  outputs = []
93
  for text in streamer:
94
  outputs.append(text)
95
+ print(outputs)
96
  yield "".join(outputs)
97
 
98
 
 
102
  with gr.Blocks(fill_height=True, css=css) as demo:
103
 
104
  gr.Markdown(DESCRIPTION)
105
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
106
  gr.ChatInterface(
107
  fn=chat_llama3_8b,
108
  chatbot=chatbot,
 
112
  gr.Slider(minimum=0,
113
  maximum=1,
114
  step=0.1,
115
+ value=0.6,
116
  label="Temperature",
117
  render=False),
118
  gr.Slider(minimum=128,
 
123
  render=False ),
124
  ],
125
  examples=[
126
+ ['小学生にもわかるように相対性理論を教えてください。'],
127
+ ['宇宙の起源を知るための方法をステップ・バイ・ステップで教えてください。'],
128
+ ['1から100までの素数を求めるスクリプトをPythonで書いてください。'],
129
+ ['友達の陽葵にあげる誕生日プレゼントを考えてください。ただし、陽葵は中学生で、私は同じクラスの男性であることを考慮してください。'],
130
+ ['ペンギンがジャングルの王様であることを正当化するように説明してください。']
131
  ],
132
  cache_examples=False,
133
  )
 
135
  gr.Markdown(LICENSE)
136
 
137
  if __name__ == "__main__":
138
+ demo.launch()