Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ model_id = "yuna1126/Tema_Q-R-0.4B-GGUF"
|
|
| 21 |
model_file = "Tema_Q-R-0.4B-f16.gguf"
|
| 22 |
|
| 23 |
# 入力制限文字数
|
| 24 |
-
MAX_INPUT_CHARS =
|
| 25 |
|
| 26 |
print("Downloading model...")
|
| 27 |
model_path = hf_hub_download(repo_id=model_id, filename=model_file)
|
|
@@ -37,19 +37,24 @@ llm = Llama(
|
|
| 37 |
print("Model loaded.")
|
| 38 |
|
| 39 |
def chat_response(message, history):
|
| 40 |
-
#
|
| 41 |
if len(message) > MAX_INPUT_CHARS:
|
| 42 |
-
yield f"入力が長すぎます。{MAX_INPUT_CHARS}文字以内で入力してください。
|
| 43 |
return
|
| 44 |
-
# ----------------------
|
| 45 |
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
try:
|
| 49 |
output = llm(
|
| 50 |
prompt,
|
| 51 |
max_tokens=512,
|
| 52 |
-
|
|
|
|
| 53 |
stream=True
|
| 54 |
)
|
| 55 |
|
|
|
|
| 21 |
model_file = "Tema_Q-R-0.4B-f16.gguf"
|
| 22 |
|
| 23 |
# 入力制限文字数
|
| 24 |
+
MAX_INPUT_CHARS = 700
|
| 25 |
|
| 26 |
print("Downloading model...")
|
| 27 |
model_path = hf_hub_download(repo_id=model_id, filename=model_file)
|
|
|
|
| 37 |
print("Model loaded.")
|
| 38 |
|
| 39 |
def chat_response(message, history):
|
| 40 |
+
# 文字数制限
|
| 41 |
if len(message) > MAX_INPUT_CHARS:
|
| 42 |
+
yield f"入力が長すぎます。{MAX_INPUT_CHARS}文字以内で入力してください。"
|
| 43 |
return
|
|
|
|
| 44 |
|
| 45 |
+
# テンプレートに合わせたプロンプト形式に変更
|
| 46 |
+
# <|im_start|>user
|
| 47 |
+
# メッセージ
|
| 48 |
+
# <|im_end|>
|
| 49 |
+
# <|im_start|>assistant
|
| 50 |
+
prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 51 |
|
| 52 |
try:
|
| 53 |
output = llm(
|
| 54 |
prompt,
|
| 55 |
max_tokens=512,
|
| 56 |
+
# 停止トークンもテンプレートに合わせて変更
|
| 57 |
+
stop=["<|im_end|>", "<|im_start|>"],
|
| 58 |
stream=True
|
| 59 |
)
|
| 60 |
|