daVinci-MagiHuman

Runtime error

App Files Files Community

jiadisu commited on Mar 24

Commit

ed1bb3b

1 Parent(s): 5149b6a

update prompt

Browse files

Files changed (1) hide show

app.py +31 -5

app.py CHANGED Viewed

@@ -32,16 +32,42 @@ with open(_PROMPT_FILE, "r") as f:
     print(f"[Rewrite] Loaded system prompt from {os.path.basename(_PROMPT_FILE)}, length={len(_REWRITE_SYSTEM_PROMPT)} chars")
     print(f"[Rewrite] System prompt preview: {_REWRITE_SYSTEM_PROMPT[:200]}...")
-def rewrite_prompt(user_prompt: str) -> str:
-    """Rewrite user prompt into the model's required format via LLM API."""
-    print(f"[Rewrite] Starting rewrite, input length={len(user_prompt)} chars")
     print(f"[Rewrite] User prompt: {user_prompt[:100]}...")
     try:
         resp = _rewrite_client.chat.completions.create(
             model=_REWRITE_MODEL,
             messages=[
                 {"role": "system", "content": _REWRITE_SYSTEM_PROMPT},
-                {"role": "user", "content": user_prompt},
             ],
             temperature=0.1,
             max_tokens=4096,
@@ -71,7 +97,7 @@ def run_generation(image, prompt, seed, seconds):
     # Step 1: Rewrite prompt via LLM
     print(f"[Generate] Request received: seed={seed} seconds={seconds} prompt={prompt[:50]!r}")
-    rewritten = rewrite_prompt(prompt.strip())
     # Yield immediately so the user sees the rewritten prompt + "Generating..."
     yield None, rewritten, "Generating video, please wait ..."

     print(f"[Rewrite] Loaded system prompt from {os.path.basename(_PROMPT_FILE)}, length={len(_REWRITE_SYSTEM_PROMPT)} chars")
     print(f"[Rewrite] System prompt preview: {_REWRITE_SYSTEM_PROMPT[:200]}...")
+def _pil_to_base64_url(image) -> str:
+    """Convert a PIL Image to a base64 data URL for the vision API."""
+    import base64
+    import io
+    buf = io.BytesIO()
+    image.save(buf, format="PNG")
+    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+    return f"data:image/png;base64,{b64}"
+def rewrite_prompt(user_prompt: str, image=None) -> str:
+    """Rewrite user prompt into the model's required format via LLM API.
+    Sends both the text prompt and the reference image to the LLM
+    so it can describe the character and scene accurately.
+    """
+    print(f"[Rewrite] Starting rewrite, input length={len(user_prompt)} chars, has_image={image is not None}")
     print(f"[Rewrite] User prompt: {user_prompt[:100]}...")
     try:
+        # Build user message with text + image
+        user_content = []
+        if image is not None:
+            user_content.append({
+                "type": "image_url",
+                "image_url": {"url": _pil_to_base64_url(image)},
+            })
+        user_content.append({
+            "type": "text",
+            "text": user_prompt,
+        })
         resp = _rewrite_client.chat.completions.create(
             model=_REWRITE_MODEL,
             messages=[
                 {"role": "system", "content": _REWRITE_SYSTEM_PROMPT},
+                {"role": "user", "content": user_content},
             ],
             temperature=0.1,
             max_tokens=4096,
     # Step 1: Rewrite prompt via LLM
     print(f"[Generate] Request received: seed={seed} seconds={seconds} prompt={prompt[:50]!r}")
+    rewritten = rewrite_prompt(prompt.strip(), image=image)
     # Yield immediately so the user sees the rewritten prompt + "Generating..."
     yield None, rewritten, "Generating video, please wait ..."