KameronB commited on
Commit
c7035fa
·
verified ·
1 Parent(s): b26d6c0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -2
README.md CHANGED
@@ -17,7 +17,7 @@ The model uses special tokens to structure its output:
17
 
18
  ### Input Format
19
  The model expects input in the following format:
20
- <|title|>{ticket_title}<|title|><|description|>{ticket_description}<|description|>
21
 
22
 
23
  ### Output Parsing
@@ -71,12 +71,37 @@ tokenizer = T5Tokenizer.from_pretrained(model_path)
71
 
72
  model.half()
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  def predict_ticket_summary(model:T5ForConditionalGeneration, title, description, max_length=128):
75
  """
76
  Generate system and topic predictions for a ticket using quantized model
77
  """
78
  # Format input text as used during training
79
- input_text = f"<|title|>{title}<|title|><|description|>{description}<|description|>"
80
 
81
  # Clear any previous tokenizer state and create fresh inputs
82
  tokenizer.pad_token = tokenizer.eos_token # Ensure pad token is set
 
17
 
18
  ### Input Format
19
  The model expects input in the following format:
20
+ <|title|>{ticket_title}</|title|><|desc|>{ticket_description}</|desc|>
21
 
22
 
23
  ### Output Parsing
 
71
 
72
  model.half()
73
 
74
+ def create_model_input(short_description:str, description:str, max_chars = 1024) -> str:
75
+ # Convert the newlines to sentences
76
+ lines = description.split("\n")
77
+
78
+ for line in lines:
79
+ if line[-1] in [".", "!", "?"]:
80
+ line += " "
81
+ else:
82
+ line += ". "
83
+ description = "".join(lines)
84
+
85
+ # constrain the description to the specified length
86
+ total_chars = 0
87
+ sentences = []
88
+ for s in sent_tokenize(description):
89
+ total_chars += len(s)
90
+ if total_chars < max_chars:
91
+ sentences.append(s)
92
+ else:
93
+ break
94
+
95
+ description = " ".join(sentences)
96
+
97
+ return "<|title|>" + short_description + "</|title|><|desc|>" + description + "</|desc|>"
98
+
99
  def predict_ticket_summary(model:T5ForConditionalGeneration, title, description, max_length=128):
100
  """
101
  Generate system and topic predictions for a ticket using quantized model
102
  """
103
  # Format input text as used during training
104
+ input_text = create_model_input(title, description, max_length)
105
 
106
  # Clear any previous tokenizer state and create fresh inputs
107
  tokenizer.pad_token = tokenizer.eos_token # Ensure pad token is set