sandz7 commited on
Commit
b968003
·
1 Parent(s): f4c3e35

changed the model.generate() params and removed the dialogue template with just headers with prompt

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -14,9 +14,7 @@ model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch
14
 
15
  # Bloom LLM
16
  def xgen(input_text,
17
- history,
18
- tokenize: bool=True,
19
- add_generation_prompt: bool=True):
20
  """
21
  This will take an input text, encode with the tokenizer,
22
  generate with the input_ids into the Bloom LLM, than decode
@@ -26,19 +24,14 @@ def xgen(input_text,
26
  # # User's question
27
  # input_text = "How was jupiter created in the solar system."
28
 
29
- # Prompt template for LLM
30
- dialogue_template = [
31
- {"role": "user",
32
- "content": input_text}
33
- ]
34
-
35
- # Be sure the dialogue template is in string formate for the tokenizer
36
- prompt = ""
37
- for dialogue in dialogue_template:
38
- prompt += dialogue["content"] + " "
39
-
40
  # token id's for prompt
41
- input_ids = tokenizer(prompt, return_tensors='pt').to('cuda')
42
 
43
  # Bloom already comes in fp16
44
 
@@ -46,12 +39,16 @@ def xgen(input_text,
46
  with torch.no_grad():
47
  # Generate output from LLM
48
  outputs = model.generate(**input_ids,
49
- max_new_tokens=256)
 
 
50
 
51
  # Decode the output tensors into string
52
  outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
 
54
- return outputs_decoded
 
 
55
 
56
  torch.cuda.empty_cache()
57
 
 
14
 
15
  # Bloom LLM
16
  def xgen(input_text,
17
+ history):
 
 
18
  """
19
  This will take an input text, encode with the tokenizer,
20
  generate with the input_ids into the Bloom LLM, than decode
 
24
  # # User's question
25
  # input_text = "How was jupiter created in the solar system."
26
 
27
+ # Prompt template for LLM "context"
28
+ header = (
29
+ "A chat between a curious human and an artificial intelligence assistant. "
30
+ "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
31
+ )
32
+
 
 
 
 
 
33
  # token id's for prompt
34
+ input_ids = tokenizer(header + input_text, return_tensors='pt').to('cuda')
35
 
36
  # Bloom already comes in fp16
37
 
 
39
  with torch.no_grad():
40
  # Generate output from LLM
41
  outputs = model.generate(**input_ids,
42
+ max_new_tokens=256,
43
+ top_k=100,
44
+ eos_token_id=50256)
45
 
46
  # Decode the output tensors into string
47
  outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
 
49
+ output_text = outputs_decoded.strip().replace("Assistant:", "")
50
+
51
+ return output_text
52
 
53
  torch.cuda.empty_cache()
54