tagパース解除
Browse files- package/ai.py +1 -5
package/ai.py
CHANGED
|
@@ -163,14 +163,10 @@ class AI:
|
|
| 163 |
]
|
| 164 |
|
| 165 |
def _clean_text(text: str) -> str:
|
| 166 |
-
"""制御文字・不可視文字・置換文字
|
| 167 |
if not text:
|
| 168 |
return ""
|
| 169 |
|
| 170 |
-
# Llama 3.2の特殊トークンを除去
|
| 171 |
-
for special_token in LLAMA_SPECIAL_TOKENS:
|
| 172 |
-
text = text.replace(special_token, "")
|
| 173 |
-
|
| 174 |
# 制御文字(0x00-0x1F、0x7F-0x9F)を除去
|
| 175 |
# ただし、改行・タブ・復帰は許可
|
| 176 |
cleaned = []
|
|
|
|
| 163 |
]
|
| 164 |
|
| 165 |
def _clean_text(text: str) -> str:
|
| 166 |
+
"""制御文字・不可視文字・置換文字を厳密に取り除く(正規タグは保持)"""
|
| 167 |
if not text:
|
| 168 |
return ""
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
# 制御文字(0x00-0x1F、0x7F-0x9F)を除去
|
| 171 |
# ただし、改行・タブ・復帰は許可
|
| 172 |
cleaned = []
|