Update app.py
Browse files
app.py
CHANGED
|
@@ -62,6 +62,28 @@ llm_model = None
|
|
| 62 |
|
| 63 |
def trans(text):
|
| 64 |
text = "こんにちは"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
input_text = f"<2ja>{text}".encode('utf-8')
|
| 66 |
tokens = llm.tokenize(input_text)
|
| 67 |
print("Tokens:", tokens)
|
|
|
|
| 62 |
|
| 63 |
def trans(text):
|
| 64 |
text = "こんにちは"
|
| 65 |
+
|
| 66 |
+
# テキストに言語タグを付与し、バイト列に変換
|
| 67 |
+
input_text = f"<2ja>{text}".encode('utf-8')
|
| 68 |
+
|
| 69 |
+
# トークナイズ
|
| 70 |
+
tokens = llm.tokenize(input_text)
|
| 71 |
+
print("Tokens:", tokens)
|
| 72 |
+
|
| 73 |
+
# BOSトークンを使用(デコーダーのみのモデルを想定)
|
| 74 |
+
initial_tokens = [llm.token_bos()]
|
| 75 |
+
|
| 76 |
+
# 生成
|
| 77 |
+
buf = ""
|
| 78 |
+
for token in llm.generate(initial_tokens, top_p=0.95, temperature=0.0, repetition_penalty=1.0):
|
| 79 |
+
decoded = llm.detokenize([token]).decode('utf-8', errors='ignore')
|
| 80 |
+
buf += decoded
|
| 81 |
+
if token == llm.token_eos():
|
| 82 |
+
break
|
| 83 |
+
|
| 84 |
+
return buf
|
| 85 |
+
|
| 86 |
+
|
| 87 |
input_text = f"<2ja>{text}".encode('utf-8')
|
| 88 |
tokens = llm.tokenize(input_text)
|
| 89 |
print("Tokens:", tokens)
|