Spaces:
Paused
Paused
cleaned up the output text to replace assistant and endoftext
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import pandas as pd
|
|
| 4 |
import numpy as np
|
| 5 |
import random
|
| 6 |
import torch
|
|
|
|
| 7 |
# Clear existing cache
|
| 8 |
torch.cuda.empty_cache()
|
| 9 |
|
|
@@ -45,9 +46,11 @@ def xgen(input_text,
|
|
| 45 |
|
| 46 |
# Decode the output tensors into string
|
| 47 |
outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 48 |
-
|
| 49 |
-
#
|
| 50 |
output_text = outputs_decoded.replace(header, "").strip()
|
|
|
|
|
|
|
| 51 |
|
| 52 |
return output_text
|
| 53 |
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import random
|
| 6 |
import torch
|
| 7 |
+
import re
|
| 8 |
# Clear existing cache
|
| 9 |
torch.cuda.empty_cache()
|
| 10 |
|
|
|
|
| 46 |
|
| 47 |
# Decode the output tensors into string
|
| 48 |
outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 49 |
+
|
| 50 |
+
# CLEAN UP TEXT
|
| 51 |
output_text = outputs_decoded.replace(header, "").strip()
|
| 52 |
+
output_text = re.sub(r'^Assistant:\s*', '', output_text)
|
| 53 |
+
output_text = output_text.replace('<|endoftext\>', '').strip()
|
| 54 |
|
| 55 |
return output_text
|
| 56 |
|
steps.txt
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
This will have the xgen-7b-8k-inst LLM
|
|
|
|
|
|
|
|
|
| 1 |
+
This will have the xgen-7b-8k-inst LLM
|
| 2 |
+
|
| 3 |
+
xgen lacks context in what the user said in prompt, that can be fine tune or changed a bit in the training data
|