Upload handler.py
Browse files- handler.py +21 -0
handler.py
CHANGED
|
@@ -135,6 +135,27 @@ class _SimpleTokenizer:
|
|
| 135 |
tokens.append(self.eos_token_id)
|
| 136 |
return tokens
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def _summarise_intent(psi: Sequence[float], top_k: int = 4) -> list[str]:
|
| 140 |
"""Convert strongest symbolic dimensions into descriptors."""
|
|
|
|
| 135 |
tokens.append(self.eos_token_id)
|
| 136 |
return tokens
|
| 137 |
|
| 138 |
+
def decode(self, token_ids: Sequence[int]) -> str:
|
| 139 |
+
"""Convert token IDs back into a text string."""
|
| 140 |
+
|
| 141 |
+
characters: list[str] = []
|
| 142 |
+
for idx in token_ids:
|
| 143 |
+
if idx == self.eos_token_id:
|
| 144 |
+
break
|
| 145 |
+
if idx in {self.pad_token_id, self.bos_token_id}:
|
| 146 |
+
continue
|
| 147 |
+
|
| 148 |
+
if 0 <= idx < len(self._tokens):
|
| 149 |
+
token = self._tokens[idx]
|
| 150 |
+
if token not in {"<pad>", "<bos>", "<eos>", "<unk>"}:
|
| 151 |
+
characters.append(token)
|
| 152 |
+
else:
|
| 153 |
+
characters.append("?")
|
| 154 |
+
else:
|
| 155 |
+
characters.append("?")
|
| 156 |
+
|
| 157 |
+
return "".join(characters)
|
| 158 |
+
|
| 159 |
|
| 160 |
def _summarise_intent(psi: Sequence[float], top_k: int = 4) -> list[str]:
|
| 161 |
"""Convert strongest symbolic dimensions into descriptors."""
|