Ctaake commited on
Commit
090abe0
·
verified ·
1 Parent(s): 561e94b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
4
- from transformers import AutoTokenizer,AutoModelForCausalLM
5
  from mySystemPrompt import SYSTEM_PROMPT
6
 
7
 
@@ -10,7 +10,6 @@ checkpoint = "CohereForAI/c4ai-command-r-plus"
10
  # Inference client with the model (And HF-token if needed)
11
  client = InferenceClient(checkpoint)
12
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
13
- model = AutoModelForCausalLM.from_pretrained(checkpoint)
14
  # Tokenizer chat template correction(Only works for mistral models)
15
  #chat_template = open("mistral-instruct.jinja").read()
16
  #chat_template = chat_template.replace(' ', '').replace('\n', '')
@@ -22,9 +21,9 @@ def format_prompt(message,chatbot,system_prompt):
22
  messages.append({"role": "user", "content":user_message})
23
  messages.append({"role": "assistant", "content":bot_message})
24
  messages.append({"role": "user", "content":message})
25
- #newPrompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
26
- #print(newPrompt)
27
- newPrompt = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
28
  return newPrompt
29
 
30
  def inference(message, history, systemPrompt=SYSTEM_PROMPT, temperature=0.9, maxTokens=512, topP=0.9, repPenalty=1.1):
@@ -41,18 +40,14 @@ def inference(message, history, systemPrompt=SYSTEM_PROMPT, temperature=0.9, max
41
  seed=random.randint(0, 999999999),
42
  )
43
  # Generating the response by passing the prompt in right format plus the client settings
44
- #stream = client.text_generation(format_prompt(message, history, systemPrompt),
45
- # **client_settings)
46
  # Reading the stream
47
- #partial_response = ""
48
- #for stream_part in stream:
49
- # partial_response += stream_part.token.text
50
- # yield partial_response
51
- gen_tokens = model.generate(
52
- format_prompt(message,history,systemPrompt),
53
- **client_settings)
54
- output = tokenizer.decode(gen_tokens[0])
55
- return output
56
 
57
 
58
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
4
+ from transformers import AutoTokenizer
5
  from mySystemPrompt import SYSTEM_PROMPT
6
 
7
 
 
10
  # Inference client with the model (And HF-token if needed)
11
  client = InferenceClient(checkpoint)
12
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 
13
  # Tokenizer chat template correction(Only works for mistral models)
14
  #chat_template = open("mistral-instruct.jinja").read()
15
  #chat_template = chat_template.replace(' ', '').replace('\n', '')
 
21
  messages.append({"role": "user", "content":user_message})
22
  messages.append({"role": "assistant", "content":bot_message})
23
  messages.append({"role": "user", "content":message})
24
+ newPrompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
25
+ print(newPrompt)
26
+ #newPrompt = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
27
  return newPrompt
28
 
29
  def inference(message, history, systemPrompt=SYSTEM_PROMPT, temperature=0.9, maxTokens=512, topP=0.9, repPenalty=1.1):
 
40
  seed=random.randint(0, 999999999),
41
  )
42
  # Generating the response by passing the prompt in right format plus the client settings
43
+ stream = client.text_generation(format_prompt(message, history, systemPrompt),
44
+ **client_settings)
45
  # Reading the stream
46
+ partial_response = ""
47
+ for stream_part in stream:
48
+ partial_response += stream_part.token.text
49
+ yield partial_response
50
+
 
 
 
 
51
 
52
 
53