Spaces:
Sleeping
Sleeping
asemxin commited on
Commit ·
f25e14c
1
Parent(s): 1d06241
feat: 发图先问需求再按需 Vision 分析
Browse files- image_daemon.py +76 -20
image_daemon.py
CHANGED
|
@@ -24,6 +24,9 @@ _soul_prompt = "" # SOUL.md 内容
|
|
| 24 |
_chat_history = {} # {chat_id: [{role, content}, ...]}
|
| 25 |
MAX_HISTORY = 10 # 保留最近 N 轮
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
# ---------- 日志 ----------
|
| 28 |
def log(msg):
|
| 29 |
ts = time.strftime("%H:%M:%S")
|
|
@@ -197,6 +200,38 @@ def analyze_image_with_vision(img_data):
|
|
| 197 |
log(f"⚠️ Vision 异常: {e}")
|
| 198 |
return None
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
# ---------- 处理图片消息 ----------
|
| 201 |
def handle_image_message(message_id, chat_id, image_key):
|
| 202 |
"""下载 → Vision分析 → 上传 → 发送"""
|
|
@@ -212,22 +247,37 @@ def handle_image_message(message_id, chat_id, image_key):
|
|
| 212 |
if not img_data:
|
| 213 |
return
|
| 214 |
|
| 215 |
-
log(f"📥 {len(img_data)} bytes,
|
| 216 |
|
| 217 |
-
#
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
{"role": "assistant", "content": vision}
|
| 227 |
-
]
|
| 228 |
-
_chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
|
| 229 |
else:
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
# ---------- SOUL.md 加载 ----------
|
| 233 |
def load_soul():
|
|
@@ -309,22 +359,28 @@ def chat_with_llm(user_text, history=None):
|
|
| 309 |
|
| 310 |
# ---------- 处理文本消息 ----------
|
| 311 |
def handle_text_message(message_id, chat_id, text):
|
| 312 |
-
"""LLM (带历史) -> 发送"""
|
| 313 |
token = get_token()
|
| 314 |
if not token:
|
| 315 |
return
|
| 316 |
|
| 317 |
-
#
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
-
reply = chat_with_llm(text, history)
|
| 321 |
if reply:
|
| 322 |
-
|
| 323 |
history = history + [
|
| 324 |
{"role": "user", "content": text},
|
| 325 |
{"role": "assistant", "content": reply}
|
| 326 |
]
|
| 327 |
-
# 只保留最近 N 轮(每轮 2 条记录)
|
| 328 |
_chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
|
| 329 |
send_text(token, chat_id, reply)
|
| 330 |
|
|
|
|
| 24 |
_chat_history = {} # {chat_id: [{role, content}, ...]}
|
| 25 |
MAX_HISTORY = 10 # 保留最近 N 轮
|
| 26 |
|
| 27 |
+
# 待处理图片 (per chat_id) —— 用户发图后先问需求再分析
|
| 28 |
+
_pending_images = {} # {chat_id: base64_string}
|
| 29 |
+
|
| 30 |
# ---------- 日志 ----------
|
| 31 |
def log(msg):
|
| 32 |
ts = time.strftime("%H:%M:%S")
|
|
|
|
| 200 |
log(f"⚠️ Vision 异常: {e}")
|
| 201 |
return None
|
| 202 |
|
| 203 |
+
def analyze_image_with_request(b64, user_request):
|
| 204 |
+
"""根据用户需求对缓存的图片做针对性分析"""
|
| 205 |
+
if not API_KEY:
|
| 206 |
+
return None
|
| 207 |
+
try:
|
| 208 |
+
soul = _soul_prompt or "You are a helpful assistant."
|
| 209 |
+
prompt = f"这位信徒发来了一张图片,并希望你能:{user_request}\n请以你的风格,基于这张图片的内容作出答复。"
|
| 210 |
+
resp = requests.post(
|
| 211 |
+
f"{API_BASE_URL}/chat/completions",
|
| 212 |
+
headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
|
| 213 |
+
json={
|
| 214 |
+
"model": MODEL_NAME,
|
| 215 |
+
"messages": [{
|
| 216 |
+
"role": "user",
|
| 217 |
+
"content": [
|
| 218 |
+
{"type": "text", "text": soul + "\n\n---\n\n" + prompt},
|
| 219 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}
|
| 220 |
+
]
|
| 221 |
+
}],
|
| 222 |
+
"stream": False
|
| 223 |
+
},
|
| 224 |
+
timeout=30
|
| 225 |
+
)
|
| 226 |
+
if resp.status_code == 200:
|
| 227 |
+
reply = resp.json()["choices"][0]["message"]["content"]
|
| 228 |
+
log(f"📸 针对分析完成: {reply[:60]}...")
|
| 229 |
+
return reply
|
| 230 |
+
log(f"⚠️ 针对 Vision 失败 ({resp.status_code})")
|
| 231 |
+
except Exception as e:
|
| 232 |
+
log(f"⚠️ 针对 Vision 异常: {e}")
|
| 233 |
+
return None
|
| 234 |
+
|
| 235 |
# ---------- 处理图片消息 ----------
|
| 236 |
def handle_image_message(message_id, chat_id, image_key):
|
| 237 |
"""下载 → Vision分析 → 上传 → 发送"""
|
|
|
|
| 247 |
if not img_data:
|
| 248 |
return
|
| 249 |
|
| 250 |
+
log(f"📥 {len(img_data)} bytes, 存储并问询需求...")
|
| 251 |
|
| 252 |
+
# 存储 base64 到待处理缓存
|
| 253 |
+
b64 = base64.b64encode(img_data).decode("utf-8")
|
| 254 |
+
_pending_images[chat_id] = b64
|
| 255 |
+
|
| 256 |
+
# 以人设风格问用户需求
|
| 257 |
+
soul = _soul_prompt or ""
|
| 258 |
+
ask_prompt = f"{soul}\n\n---\n\n这位信徒初次发来一张图片。请用一句话、中文回复,问对方希望你就这张图片做什么(例如:描述内容、翻译文字、分析意义等)。不要自行分析图片。"
|
| 259 |
+
if not API_KEY:
|
| 260 |
+
question = "幸会!你发来了一张图片。你希望老讲为你做什么呢?"
|
|
|
|
|
|
|
|
|
|
| 261 |
else:
|
| 262 |
+
try:
|
| 263 |
+
resp = requests.post(
|
| 264 |
+
f"{API_BASE_URL}/chat/completions",
|
| 265 |
+
headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
|
| 266 |
+
json={"model": MODEL_NAME, "messages": [{"role": "user", "content": ask_prompt}], "stream": False},
|
| 267 |
+
timeout=20
|
| 268 |
+
)
|
| 269 |
+
question = resp.json()["choices"][0]["message"]["content"] if resp.status_code == 200 else "你希望老讲就这张图进行什么分析呢?"
|
| 270 |
+
except Exception:
|
| 271 |
+
question = "你希望老讲就这张图进行什么分析呢?"
|
| 272 |
+
log(f"💬 问询用户需求: {question[:60]}")
|
| 273 |
+
result = send_text(token, chat_id, question)
|
| 274 |
+
log(f"📤 问询已发 (code={result.get('code', '?')})")
|
| 275 |
+
# 将问询写入历史
|
| 276 |
+
history = _chat_history.get(chat_id, [])
|
| 277 |
+
_chat_history[chat_id] = (history + [
|
| 278 |
+
{"role": "user", "content": "[user sent an image]"},
|
| 279 |
+
{"role": "assistant", "content": question}
|
| 280 |
+
])[-(MAX_HISTORY * 2):]
|
| 281 |
|
| 282 |
# ---------- SOUL.md 加载 ----------
|
| 283 |
def load_soul():
|
|
|
|
| 359 |
|
| 360 |
# ---------- 处理文本消息 ----------
|
| 361 |
def handle_text_message(message_id, chat_id, text):
|
| 362 |
+
"""LLM (带历史) -> 发送,如有待处理图片则做针对 Vision"""
|
| 363 |
token = get_token()
|
| 364 |
if not token:
|
| 365 |
return
|
| 366 |
|
| 367 |
+
# 检查是否有待分析的图片
|
| 368 |
+
b64 = _pending_images.pop(chat_id, None)
|
| 369 |
+
if b64:
|
| 370 |
+
log(f"📸 检测到待处理图片,按需求分析: {text[:40]}")
|
| 371 |
+
reply = analyze_image_with_request(b64, text)
|
| 372 |
+
if not reply:
|
| 373 |
+
reply = chat_with_llm(text, _chat_history.get(chat_id, []))
|
| 374 |
+
else:
|
| 375 |
+
history = _chat_history.get(chat_id, [])
|
| 376 |
+
reply = chat_with_llm(text, history)
|
| 377 |
|
|
|
|
| 378 |
if reply:
|
| 379 |
+
history = _chat_history.get(chat_id, [])
|
| 380 |
history = history + [
|
| 381 |
{"role": "user", "content": text},
|
| 382 |
{"role": "assistant", "content": reply}
|
| 383 |
]
|
|
|
|
| 384 |
_chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
|
| 385 |
send_text(token, chat_id, reply)
|
| 386 |
|