Spaces:

sandz7
/

bubble_bee

Paused

sandz7 commited on May 15, 2024

Commit

0ce2dc8

1 Parent(s): 2132820

cleaned up the output text to replace assistant and endoftext

Files changed (2) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pandas as pd
 import numpy as np
 import random
 import torch
 # Clear existing cache
 torch.cuda.empty_cache()
@@ -45,9 +46,11 @@ def xgen(input_text,
     # Decode the output tensors into string
     outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove header from the output
     output_text = outputs_decoded.replace(header, "").strip()
     return output_text

 import numpy as np
 import random
 import torch
+import re
 # Clear existing cache
 torch.cuda.empty_cache()
     # Decode the output tensors into string
     outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # CLEAN UP TEXT
     output_text = outputs_decoded.replace(header, "").strip()
+    output_text = re.sub(r'^Assistant:\s*', '', output_text)
+    output_text = output_text.replace('<|endoftext\>', '').strip()
     return output_text

steps.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	- This will have the xgen-7b-8k-inst LLM


1	+ This will have the xgen-7b-8k-inst LLM
2	+
3	+ xgen lacks context in what the user said in prompt, that can be fine tune or changed a bit in the training data