Spaces:
Paused
Paused
changed the model.generate() params and removed the dialogue template with just headers with prompt
Browse files
app.py
CHANGED
|
@@ -14,9 +14,7 @@ model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch
|
|
| 14 |
|
| 15 |
# Bloom LLM
|
| 16 |
def xgen(input_text,
|
| 17 |
-
history
|
| 18 |
-
tokenize: bool=True,
|
| 19 |
-
add_generation_prompt: bool=True):
|
| 20 |
"""
|
| 21 |
This will take an input text, encode with the tokenizer,
|
| 22 |
generate with the input_ids into the Bloom LLM, than decode
|
|
@@ -26,19 +24,14 @@ def xgen(input_text,
|
|
| 26 |
# # User's question
|
| 27 |
# input_text = "How was jupiter created in the solar system."
|
| 28 |
|
| 29 |
-
# Prompt template for LLM
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
"
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
# Be sure the dialogue template is in string formate for the tokenizer
|
| 36 |
-
prompt = ""
|
| 37 |
-
for dialogue in dialogue_template:
|
| 38 |
-
prompt += dialogue["content"] + " "
|
| 39 |
-
|
| 40 |
# token id's for prompt
|
| 41 |
-
input_ids = tokenizer(
|
| 42 |
|
| 43 |
# Bloom already comes in fp16
|
| 44 |
|
|
@@ -46,12 +39,16 @@ def xgen(input_text,
|
|
| 46 |
with torch.no_grad():
|
| 47 |
# Generate output from LLM
|
| 48 |
outputs = model.generate(**input_ids,
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# Decode the output tensors into string
|
| 52 |
outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
|
| 56 |
torch.cuda.empty_cache()
|
| 57 |
|
|
|
|
| 14 |
|
| 15 |
# Bloom LLM
|
| 16 |
def xgen(input_text,
|
| 17 |
+
history):
|
|
|
|
|
|
|
| 18 |
"""
|
| 19 |
This will take an input text, encode with the tokenizer,
|
| 20 |
generate with the input_ids into the Bloom LLM, than decode
|
|
|
|
| 24 |
# # User's question
|
| 25 |
# input_text = "How was jupiter created in the solar system."
|
| 26 |
|
| 27 |
+
# Prompt template for LLM "context"
|
| 28 |
+
header = (
|
| 29 |
+
"A chat between a curious human and an artificial intelligence assistant. "
|
| 30 |
+
"The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
|
| 31 |
+
)
|
| 32 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# token id's for prompt
|
| 34 |
+
input_ids = tokenizer(header + input_text, return_tensors='pt').to('cuda')
|
| 35 |
|
| 36 |
# Bloom already comes in fp16
|
| 37 |
|
|
|
|
| 39 |
with torch.no_grad():
|
| 40 |
# Generate output from LLM
|
| 41 |
outputs = model.generate(**input_ids,
|
| 42 |
+
max_new_tokens=256,
|
| 43 |
+
top_k=100,
|
| 44 |
+
eos_token_id=50256)
|
| 45 |
|
| 46 |
# Decode the output tensors into string
|
| 47 |
outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 48 |
|
| 49 |
+
output_text = outputs_decoded.strip().replace("Assistant:", "")
|
| 50 |
+
|
| 51 |
+
return output_text
|
| 52 |
|
| 53 |
torch.cuda.empty_cache()
|
| 54 |
|