Update README.md
Browse files
README.md
CHANGED
|
@@ -17,7 +17,7 @@ The model uses special tokens to structure its output:
|
|
| 17 |
|
| 18 |
### Input Format
|
| 19 |
The model expects input in the following format:
|
| 20 |
-
<|title|>{ticket_title}
|
| 21 |
|
| 22 |
|
| 23 |
### Output Parsing
|
|
@@ -71,12 +71,37 @@ tokenizer = T5Tokenizer.from_pretrained(model_path)
|
|
| 71 |
|
| 72 |
model.half()
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
def predict_ticket_summary(model:T5ForConditionalGeneration, title, description, max_length=128):
|
| 75 |
"""
|
| 76 |
Generate system and topic predictions for a ticket using quantized model
|
| 77 |
"""
|
| 78 |
# Format input text as used during training
|
| 79 |
-
input_text =
|
| 80 |
|
| 81 |
# Clear any previous tokenizer state and create fresh inputs
|
| 82 |
tokenizer.pad_token = tokenizer.eos_token # Ensure pad token is set
|
|
|
|
| 17 |
|
| 18 |
### Input Format
|
| 19 |
The model expects input in the following format:
|
| 20 |
+
<|title|>{ticket_title}</|title|><|desc|>{ticket_description}</|desc|>
|
| 21 |
|
| 22 |
|
| 23 |
### Output Parsing
|
|
|
|
| 71 |
|
| 72 |
model.half()
|
| 73 |
|
| 74 |
+
def create_model_input(short_description:str, description:str, max_chars = 1024) -> str:
|
| 75 |
+
# Convert the newlines to sentences
|
| 76 |
+
lines = description.split("\n")
|
| 77 |
+
|
| 78 |
+
for line in lines:
|
| 79 |
+
if line[-1] in [".", "!", "?"]:
|
| 80 |
+
line += " "
|
| 81 |
+
else:
|
| 82 |
+
line += ". "
|
| 83 |
+
description = "".join(lines)
|
| 84 |
+
|
| 85 |
+
# constrain the description to the specified length
|
| 86 |
+
total_chars = 0
|
| 87 |
+
sentences = []
|
| 88 |
+
for s in sent_tokenize(description):
|
| 89 |
+
total_chars += len(s)
|
| 90 |
+
if total_chars < max_chars:
|
| 91 |
+
sentences.append(s)
|
| 92 |
+
else:
|
| 93 |
+
break
|
| 94 |
+
|
| 95 |
+
description = " ".join(sentences)
|
| 96 |
+
|
| 97 |
+
return "<|title|>" + short_description + "</|title|><|desc|>" + description + "</|desc|>"
|
| 98 |
+
|
| 99 |
def predict_ticket_summary(model:T5ForConditionalGeneration, title, description, max_length=128):
|
| 100 |
"""
|
| 101 |
Generate system and topic predictions for a ticket using quantized model
|
| 102 |
"""
|
| 103 |
# Format input text as used during training
|
| 104 |
+
input_text = create_model_input(title, description, max_length)
|
| 105 |
|
| 106 |
# Clear any previous tokenizer state and create fresh inputs
|
| 107 |
tokenizer.pad_token = tokenizer.eos_token # Ensure pad token is set
|