asemxin commited on
Commit
f25e14c
·
1 Parent(s): 1d06241

feat: 发图先问需求再按需 Vision 分析

Browse files
Files changed (1) hide show
  1. image_daemon.py +76 -20
image_daemon.py CHANGED
@@ -24,6 +24,9 @@ _soul_prompt = "" # SOUL.md 内容
24
  _chat_history = {} # {chat_id: [{role, content}, ...]}
25
  MAX_HISTORY = 10 # 保留最近 N 轮
26
 
 
 
 
27
  # ---------- 日志 ----------
28
  def log(msg):
29
  ts = time.strftime("%H:%M:%S")
@@ -197,6 +200,38 @@ def analyze_image_with_vision(img_data):
197
  log(f"⚠️ Vision 异常: {e}")
198
  return None
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  # ---------- 处理图片消息 ----------
201
  def handle_image_message(message_id, chat_id, image_key):
202
  """下载 → Vision分析 → 上传 → 发送"""
@@ -212,22 +247,37 @@ def handle_image_message(message_id, chat_id, image_key):
212
  if not img_data:
213
  return
214
 
215
- log(f"📥 {len(img_data)} bytes, Vision 分析中...")
216
 
217
- # 调用 Vision 分析
218
- vision = analyze_image_with_vision(img_data)
219
- if vision:
220
- result = send_text(token, chat_id, vision)
221
- log(f"📤 已发送感悟 (code={result.get('code', '?')})")
222
- # 将图片对话写入历史,以便用户后续文字追问时能引用
223
- history = _chat_history.get(chat_id, [])
224
- history = history + [
225
- {"role": "user", "content": "[用户了一张图片]"},
226
- {"role": "assistant", "content": vision}
227
- ]
228
- _chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
229
  else:
230
- log("⚠️ Vision 分析失败,无回复")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  # ---------- SOUL.md 加载 ----------
233
  def load_soul():
@@ -309,22 +359,28 @@ def chat_with_llm(user_text, history=None):
309
 
310
  # ---------- 处理文本消息 ----------
311
  def handle_text_message(message_id, chat_id, text):
312
- """LLM (带历史) -> 发送"""
313
  token = get_token()
314
  if not token:
315
  return
316
 
317
- # 获取该用户历史
318
- history = _chat_history.get(chat_id, [])
 
 
 
 
 
 
 
 
319
 
320
- reply = chat_with_llm(text, history)
321
  if reply:
322
- # 更新历史
323
  history = history + [
324
  {"role": "user", "content": text},
325
  {"role": "assistant", "content": reply}
326
  ]
327
- # 只保留最近 N 轮(每轮 2 条记录)
328
  _chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
329
  send_text(token, chat_id, reply)
330
 
 
24
  _chat_history = {} # {chat_id: [{role, content}, ...]}
25
  MAX_HISTORY = 10 # 保留最近 N 轮
26
 
27
+ # 待处理图片 (per chat_id) —— 用户发图后先问需求再分析
28
+ _pending_images = {} # {chat_id: base64_string}
29
+
30
  # ---------- 日志 ----------
31
  def log(msg):
32
  ts = time.strftime("%H:%M:%S")
 
200
  log(f"⚠️ Vision 异常: {e}")
201
  return None
202
 
203
+ def analyze_image_with_request(b64, user_request):
204
+ """根据用户需求对缓存的图片做针对性分析"""
205
+ if not API_KEY:
206
+ return None
207
+ try:
208
+ soul = _soul_prompt or "You are a helpful assistant."
209
+ prompt = f"这位信徒发来了一张图片,并希望你能:{user_request}\n请以你的风格,基于这张图片的内容作出答复。"
210
+ resp = requests.post(
211
+ f"{API_BASE_URL}/chat/completions",
212
+ headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
213
+ json={
214
+ "model": MODEL_NAME,
215
+ "messages": [{
216
+ "role": "user",
217
+ "content": [
218
+ {"type": "text", "text": soul + "\n\n---\n\n" + prompt},
219
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}
220
+ ]
221
+ }],
222
+ "stream": False
223
+ },
224
+ timeout=30
225
+ )
226
+ if resp.status_code == 200:
227
+ reply = resp.json()["choices"][0]["message"]["content"]
228
+ log(f"📸 针对分析完成: {reply[:60]}...")
229
+ return reply
230
+ log(f"⚠️ 针对 Vision 失败 ({resp.status_code})")
231
+ except Exception as e:
232
+ log(f"⚠️ 针对 Vision 异常: {e}")
233
+ return None
234
+
235
  # ---------- 处理图片消息 ----------
236
  def handle_image_message(message_id, chat_id, image_key):
237
  """下载 → Vision分析 → 上传 → 发送"""
 
247
  if not img_data:
248
  return
249
 
250
+ log(f"📥 {len(img_data)} bytes, 存储并问询需求...")
251
 
252
+ # 存储 base64 到待处理缓存
253
+ b64 = base64.b64encode(img_data).decode("utf-8")
254
+ _pending_images[chat_id] = b64
255
+
256
+ # 以人设风格问用户需求
257
+ soul = _soul_prompt or ""
258
+ ask_prompt = f"{soul}\n\n---\n\n这位信徒初次发来一张图片。请用一句话、中文回复,问对方希望你就这张图片做什么(例如:描述内容、翻译文字、分析意义等)。不要自行分析图片。"
259
+ if not API_KEY:
260
+ question = "幸会!你了一张图片。你希望老讲为你做什么呢?"
 
 
 
261
  else:
262
+ try:
263
+ resp = requests.post(
264
+ f"{API_BASE_URL}/chat/completions",
265
+ headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
266
+ json={"model": MODEL_NAME, "messages": [{"role": "user", "content": ask_prompt}], "stream": False},
267
+ timeout=20
268
+ )
269
+ question = resp.json()["choices"][0]["message"]["content"] if resp.status_code == 200 else "你希望老讲就这张图进行什么分析呢?"
270
+ except Exception:
271
+ question = "你希望老讲就这张图进行什么分析呢?"
272
+ log(f"💬 问询用户需求: {question[:60]}")
273
+ result = send_text(token, chat_id, question)
274
+ log(f"📤 问询已发 (code={result.get('code', '?')})")
275
+ # 将问询写入历史
276
+ history = _chat_history.get(chat_id, [])
277
+ _chat_history[chat_id] = (history + [
278
+ {"role": "user", "content": "[user sent an image]"},
279
+ {"role": "assistant", "content": question}
280
+ ])[-(MAX_HISTORY * 2):]
281
 
282
  # ---------- SOUL.md 加载 ----------
283
  def load_soul():
 
359
 
360
  # ---------- 处理文本消息 ----------
361
  def handle_text_message(message_id, chat_id, text):
362
+ """LLM (带历史) -> 发送,如有待处理图片则做针对 Vision"""
363
  token = get_token()
364
  if not token:
365
  return
366
 
367
+ # 检查是否有待分析图片
368
+ b64 = _pending_images.pop(chat_id, None)
369
+ if b64:
370
+ log(f"📸 检测到待处理图片,按需求分析: {text[:40]}")
371
+ reply = analyze_image_with_request(b64, text)
372
+ if not reply:
373
+ reply = chat_with_llm(text, _chat_history.get(chat_id, []))
374
+ else:
375
+ history = _chat_history.get(chat_id, [])
376
+ reply = chat_with_llm(text, history)
377
 
 
378
  if reply:
379
+ history = _chat_history.get(chat_id, [])
380
  history = history + [
381
  {"role": "user", "content": text},
382
  {"role": "assistant", "content": reply}
383
  ]
 
384
  _chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
385
  send_text(token, chat_id, reply)
386