Update app.py
Browse files
app.py
CHANGED
|
@@ -137,27 +137,6 @@ def generate_chat_response(messages, max_tokens=512, temperature=0.7):
|
|
| 137 |
eos_token_id=tokenizer.eos_token_id,
|
| 138 |
)
|
| 139 |
|
| 140 |
-
generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
|
| 141 |
-
response = tokenizer.decode(generated_ids, skip_special_tokens=True)
|
| 142 |
-
response = response.split("<|im_end|>")[0].strip()
|
| 143 |
-
|
| 144 |
-
# 如果生成内容过短,添加保底文字(方便判断是否真的没生成)
|
| 145 |
-
if len(response) < 10:
|
| 146 |
-
response = "(模型输出内容较短,以下是生成的回复) " + response
|
| 147 |
-
|
| 148 |
-
# 记录生成结果,方便调试
|
| 149 |
-
logger.info(f"生成内容长度: {len(response)} 字符,前50字: {response[:50]}")
|
| 150 |
-
|
| 151 |
-
del inputs, outputs
|
| 152 |
-
gc.collect()
|
| 153 |
-
|
| 154 |
-
# 临时强制返回固定内容,用于测试客户端是否能显示任何回复
|
| 155 |
-
response = "测试成功!这是来自模型的回复。\n当前时间:" + time.strftime("%Y-%m-%d %H:%M:%S")
|
| 156 |
-
|
| 157 |
-
# 如果你想保留模型生成的内容,可以注释上面一行,保留下面这行
|
| 158 |
-
# response = "【测试】\n" + response[:200] + "\n(已截断,仅用于测试)"
|
| 159 |
-
|
| 160 |
-
logger.info(f"生成内容长度: {len(response)} 字符,前50字: {response[:50]}")
|
| 161 |
|
| 162 |
return {"text": response}
|
| 163 |
|
|
|
|
| 137 |
eos_token_id=tokenizer.eos_token_id,
|
| 138 |
)
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
return {"text": response}
|
| 142 |
|