sandz7 commited on
Commit
0ce2dc8
·
1 Parent(s): 2132820

cleaned up the output text to replace assistant and endoftext

Browse files
Files changed (2) hide show
  1. app.py +5 -2
  2. steps.txt +3 -1
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  import numpy as np
5
  import random
6
  import torch
 
7
  # Clear existing cache
8
  torch.cuda.empty_cache()
9
 
@@ -45,9 +46,11 @@ def xgen(input_text,
45
 
46
  # Decode the output tensors into string
47
  outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
-
49
- # Remove header from the output
50
  output_text = outputs_decoded.replace(header, "").strip()
 
 
51
 
52
  return output_text
53
 
 
4
  import numpy as np
5
  import random
6
  import torch
7
+ import re
8
  # Clear existing cache
9
  torch.cuda.empty_cache()
10
 
 
46
 
47
  # Decode the output tensors into string
48
  outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+
50
+ # CLEAN UP TEXT
51
  output_text = outputs_decoded.replace(header, "").strip()
52
+ output_text = re.sub(r'^Assistant:\s*', '', output_text)
53
+ output_text = output_text.replace('<|endoftext\>', '').strip()
54
 
55
  return output_text
56
 
steps.txt CHANGED
@@ -1 +1,3 @@
1
- This will have the xgen-7b-8k-inst LLM
 
 
 
1
+ This will have the xgen-7b-8k-inst LLM
2
+
3
+ xgen lacks context in what the user said in prompt, that can be fine tune or changed a bit in the training data