Spaces:

sandz7
/

bubble_bee

Paused

sandz7 commited on May 15, 2024

Commit

b968003

1 Parent(s): f4c3e35

changed the model.generate() params and removed the dialogue template with just headers with prompt

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,9 +14,7 @@ model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch
 # Bloom LLM
 def xgen(input_text,
-         history,
-         tokenize: bool=True,
-         add_generation_prompt: bool=True):
     """
     This will take an input text, encode with the tokenizer,
     generate with the input_ids into the Bloom LLM, than decode
@@ -26,19 +24,14 @@ def xgen(input_text,
     # # User's question
     # input_text = "How was jupiter created in the solar system."
-    # Prompt template for LLM
-    dialogue_template = [
-        {"role": "user",
-        "content": input_text}
-    ]
-    # Be sure the dialogue template is in string formate for the tokenizer
-    prompt = ""
-    for dialogue in dialogue_template:
-        prompt += dialogue["content"] + " "
     # token id's for prompt
-    input_ids = tokenizer(prompt, return_tensors='pt').to('cuda')
     # Bloom already comes in fp16
@@ -46,12 +39,16 @@ def xgen(input_text,
     with torch.no_grad():
         # Generate output from LLM
         outputs = model.generate(**input_ids,
-                                max_new_tokens=256)
     # Decode the output tensors into string
     outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return outputs_decoded
 torch.cuda.empty_cache()

 # Bloom LLM
 def xgen(input_text,
+         history):
     """
     This will take an input text, encode with the tokenizer,
     generate with the input_ids into the Bloom LLM, than decode
     # # User's question
     # input_text = "How was jupiter created in the solar system."
+    # Prompt template for LLM "context"
+    header = (
+        "A chat between a curious human and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
+    )
     # token id's for prompt
+    input_ids = tokenizer(header + input_text, return_tensors='pt').to('cuda')
     # Bloom already comes in fp16
     with torch.no_grad():
         # Generate output from LLM
         outputs = model.generate(**input_ids,
+                                 max_new_tokens=256,
+                                 top_k=100,
+                                 eos_token_id=50256)
     # Decode the output tensors into string
     outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    output_text = outputs_decoded.strip().replace("Assistant:", "")
+    return output_text
 torch.cuda.empty_cache()