Update app.py
Browse files
app.py
CHANGED
|
@@ -143,50 +143,23 @@ def talk(prompt, history):
|
|
| 143 |
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
| 144 |
]
|
| 145 |
# indicates the end of a sequence
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
#
|
| 151 |
# text += output["choices"][0]["text"]
|
| 152 |
# yield text
|
| 153 |
|
| 154 |
-
# model_input = model.create_chat_completion(messages = messages)
|
| 155 |
-
|
| 156 |
-
# input_ids = tokenizer.apply_chat_template(
|
| 157 |
-
# messages,
|
| 158 |
-
# add_generation_prompt=True,
|
| 159 |
-
# return_tensors="pt"
|
| 160 |
-
# )
|
| 161 |
# preparing tokens for model input
|
| 162 |
# add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
|
| 163 |
-
# print(input_ids)
|
| 164 |
-
# print("check7")
|
| 165 |
-
# print(input_ids.dtype)
|
| 166 |
|
| 167 |
# calling the model to generate response based on message/ input
|
| 168 |
# do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
|
| 169 |
# temperature controls randomness. more renadomness with higher temperature
|
| 170 |
# only the tokens comprising the top_p probability mass are considered for responses
|
| 171 |
# This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
# print("check10")
|
| 175 |
-
# t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 176 |
-
# to process multiple instances
|
| 177 |
-
# t.start()
|
| 178 |
-
# print("check11")
|
| 179 |
-
# start a thread
|
| 180 |
-
# outputs = []
|
| 181 |
-
# outputs = model_input
|
| 182 |
-
# return outputs
|
| 183 |
-
# print(model.tokenize(messages))
|
| 184 |
-
# tokens = model.tokenize(messages)
|
| 185 |
-
# for token in model.generate(tokens):
|
| 186 |
-
# print(model.detokenize([token]))
|
| 187 |
-
# input_ids = tokenizer(*messages)
|
| 188 |
-
|
| 189 |
-
# print(model.generate(tensor([[ 1, 529, 29989, 5205, 29989]])))
|
| 190 |
# start = time.time()
|
| 191 |
# NUM_TOKENS=0
|
| 192 |
# print('-'*4+'Start Generation'+'-'*4)
|
|
@@ -200,10 +173,7 @@ def talk(prompt, history):
|
|
| 200 |
# print(f'Time for complete generation: {time_generate}s')
|
| 201 |
# print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
| 202 |
# print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
|
| 208 |
TITLE = "AI Copilot for Diabetes Patients"
|
| 209 |
|
|
|
|
| 143 |
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
| 144 |
]
|
| 145 |
# indicates the end of a sequence
|
| 146 |
+
import pprint
|
| 147 |
+
stream = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
|
| 148 |
+
# print(output['choices'][0]['message']['content'])
|
| 149 |
+
pprint.pprint(stream)
|
| 150 |
+
# for output in stream:
|
| 151 |
# text += output["choices"][0]["text"]
|
| 152 |
# yield text
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
# preparing tokens for model input
|
| 155 |
# add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
# calling the model to generate response based on message/ input
|
| 158 |
# do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
|
| 159 |
# temperature controls randomness. more renadomness with higher temperature
|
| 160 |
# only the tokens comprising the top_p probability mass are considered for responses
|
| 161 |
# This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
|
| 162 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
# start = time.time()
|
| 164 |
# NUM_TOKENS=0
|
| 165 |
# print('-'*4+'Start Generation'+'-'*4)
|
|
|
|
| 173 |
# print(f'Time for complete generation: {time_generate}s')
|
| 174 |
# print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
| 175 |
# print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
| 176 |
+
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
TITLE = "AI Copilot for Diabetes Patients"
|
| 179 |
|