Spaces:

ASEM12345
/

MoltBotMKX

Sleeping

App Files Files Community

asemxin commited on 29 days ago

Commit

f25e14c

1 Parent(s): 1d06241

feat: 发图先问需求再按需 Vision 分析

Browse files

Files changed (1) hide show

image_daemon.py +76 -20

image_daemon.py CHANGED Viewed

@@ -24,6 +24,9 @@ _soul_prompt = ""     # SOUL.md 内容
 _chat_history = {}    # {chat_id: [{role, content}, ...]}
 MAX_HISTORY = 10      # 保留最近 N 轮
 # ---------- 日志 ----------
 def log(msg):
     ts = time.strftime("%H:%M:%S")
@@ -197,6 +200,38 @@ def analyze_image_with_vision(img_data):
         log(f"⚠️ Vision 异常: {e}")
     return None
 # ---------- 处理图片消息 ----------
 def handle_image_message(message_id, chat_id, image_key):
     """下载 → Vision分析 → 上传 → 发送"""
@@ -212,22 +247,37 @@ def handle_image_message(message_id, chat_id, image_key):
     if not img_data:
         return
-    log(f"📥 {len(img_data)} bytes, Vision 分析中...")
-    # 调用 Vision 分析
-    vision = analyze_image_with_vision(img_data)
-    if vision:
-        result = send_text(token, chat_id, vision)
-        log(f"📤 已发送感悟 (code={result.get('code', '?')})")
-        # 将图片对话写入历史，以便用户后续文字追问时能引用
-        history = _chat_history.get(chat_id, [])
-        history = history + [
-            {"role": "user", "content": "[用户发送了一张图片]"},
-            {"role": "assistant", "content": vision}
-        ]
-        _chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
     else:
-        log("⚠️ Vision 分析失败，无回复")
 # ---------- SOUL.md 加载 ----------
 def load_soul():
@@ -309,22 +359,28 @@ def chat_with_llm(user_text, history=None):
 # ---------- 处理文本消息 ----------
 def handle_text_message(message_id, chat_id, text):
-    """LLM (带历史) -> 发送"""
     token = get_token()
     if not token:
         return
-    # 获取该用户的历史
-    history = _chat_history.get(chat_id, [])
-    reply = chat_with_llm(text, history)
     if reply:
-        # 更新历史
         history = history + [
             {"role": "user", "content": text},
             {"role": "assistant", "content": reply}
         ]
-        # 只保留最近 N 轮（每轮 2 条记录）
         _chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
         send_text(token, chat_id, reply)

 _chat_history = {}    # {chat_id: [{role, content}, ...]}
 MAX_HISTORY = 10      # 保留最近 N 轮
+# 待处理图片 (per chat_id)  —— 用户发图后先问需求再分析
+_pending_images = {}  # {chat_id: base64_string}
 # ---------- 日志 ----------
 def log(msg):
     ts = time.strftime("%H:%M:%S")
         log(f"⚠️ Vision 异常: {e}")
     return None
+def analyze_image_with_request(b64, user_request):
+    """根据用户需求对缓存的图片做针对性分析"""
+    if not API_KEY:
+        return None
+    try:
+        soul = _soul_prompt or "You are a helpful assistant."
+        prompt = f"这位信徒发来了一张图片，并希望你能：{user_request}\n请以你的风格，基于这张图片的内容作出答复。"
+        resp = requests.post(
+            f"{API_BASE_URL}/chat/completions",
+            headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
+            json={
+                "model": MODEL_NAME,
+                "messages": [{
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": soul + "\n\n---\n\n" + prompt},
+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}
+                    ]
+                }],
+                "stream": False
+            },
+            timeout=30
+        )
+        if resp.status_code == 200:
+            reply = resp.json()["choices"][0]["message"]["content"]
+            log(f"📸 针对分析完成: {reply[:60]}...")
+            return reply
+        log(f"⚠️ 针对 Vision 失败 ({resp.status_code})")
+    except Exception as e:
+        log(f"⚠️ 针对 Vision 异常: {e}")
+    return None
 # ---------- 处理图片消息 ----------
 def handle_image_message(message_id, chat_id, image_key):
     """下载 → Vision分析 → 上传 → 发送"""
     if not img_data:
         return
+    log(f"📥 {len(img_data)} bytes, 存储并问询需求...")
+    # 存储 base64 到待处理缓存
+    b64 = base64.b64encode(img_data).decode("utf-8")
+    _pending_images[chat_id] = b64
+    # 以人设风格问用户需求
+    soul = _soul_prompt or ""
+    ask_prompt = f"{soul}\n\n---\n\n这位信徒初次发来一张图片。请用一句话、中文回复，问对方希望你就这张图片做什么（例如：描述内容、翻译文字、分析意义等）。不要自行分析图片。"
+    if not API_KEY:
+        question = "幸会！你发来了一张图片。你希望老讲为你做什么呢？"
     else:
+        try:
+            resp = requests.post(
+                f"{API_BASE_URL}/chat/completions",
+                headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
+                json={"model": MODEL_NAME, "messages": [{"role": "user", "content": ask_prompt}], "stream": False},
+                timeout=20
+            )
+            question = resp.json()["choices"][0]["message"]["content"] if resp.status_code == 200 else "你希望老讲就这张图进行什么分析呢？"
+        except Exception:
+            question = "你希望老讲就这张图进行什么分析呢？"
+    log(f"💬 问询用户需求: {question[:60]}")
+    result = send_text(token, chat_id, question)
+    log(f"📤 问询已发 (code={result.get('code', '?')})")
+    # 将问询写入历史
+    history = _chat_history.get(chat_id, [])
+    _chat_history[chat_id] = (history + [
+        {"role": "user", "content": "[user sent an image]"},
+        {"role": "assistant", "content": question}
+    ])[-(MAX_HISTORY * 2):]
 # ---------- SOUL.md 加载 ----------
 def load_soul():
 # ---------- 处理文本消息 ----------
 def handle_text_message(message_id, chat_id, text):
+    """LLM (带历史) -> 发送，如有待处理图片则做针对 Vision"""
     token = get_token()
     if not token:
         return
+    # 检查是否有待分析的图片
+    b64 = _pending_images.pop(chat_id, None)
+    if b64:
+        log(f"📸 检测到待处理图片，按需求分析: {text[:40]}")
+        reply = analyze_image_with_request(b64, text)
+        if not reply:
+            reply = chat_with_llm(text, _chat_history.get(chat_id, []))
+    else:
+        history = _chat_history.get(chat_id, [])
+        reply = chat_with_llm(text, history)
     if reply:
+        history = _chat_history.get(chat_id, [])
         history = history + [
             {"role": "user", "content": text},
             {"role": "assistant", "content": reply}
         ]
         _chat_history[chat_id] = history[-(MAX_HISTORY * 2):]
         send_text(token, chat_id, reply)