jiadisu commited on
Commit
ed1bb3b
·
1 Parent(s): 5149b6a

update prompt

Browse files
Files changed (1) hide show
  1. app.py +31 -5
app.py CHANGED
@@ -32,16 +32,42 @@ with open(_PROMPT_FILE, "r") as f:
32
  print(f"[Rewrite] Loaded system prompt from {os.path.basename(_PROMPT_FILE)}, length={len(_REWRITE_SYSTEM_PROMPT)} chars")
33
  print(f"[Rewrite] System prompt preview: {_REWRITE_SYSTEM_PROMPT[:200]}...")
34
 
35
- def rewrite_prompt(user_prompt: str) -> str:
36
- """Rewrite user prompt into the model's required format via LLM API."""
37
- print(f"[Rewrite] Starting rewrite, input length={len(user_prompt)} chars")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  print(f"[Rewrite] User prompt: {user_prompt[:100]}...")
39
  try:
 
 
 
 
 
 
 
 
 
 
 
 
40
  resp = _rewrite_client.chat.completions.create(
41
  model=_REWRITE_MODEL,
42
  messages=[
43
  {"role": "system", "content": _REWRITE_SYSTEM_PROMPT},
44
- {"role": "user", "content": user_prompt},
45
  ],
46
  temperature=0.1,
47
  max_tokens=4096,
@@ -71,7 +97,7 @@ def run_generation(image, prompt, seed, seconds):
71
 
72
  # Step 1: Rewrite prompt via LLM
73
  print(f"[Generate] Request received: seed={seed} seconds={seconds} prompt={prompt[:50]!r}")
74
- rewritten = rewrite_prompt(prompt.strip())
75
 
76
  # Yield immediately so the user sees the rewritten prompt + "Generating..."
77
  yield None, rewritten, "Generating video, please wait ..."
 
32
  print(f"[Rewrite] Loaded system prompt from {os.path.basename(_PROMPT_FILE)}, length={len(_REWRITE_SYSTEM_PROMPT)} chars")
33
  print(f"[Rewrite] System prompt preview: {_REWRITE_SYSTEM_PROMPT[:200]}...")
34
 
35
+ def _pil_to_base64_url(image) -> str:
36
+ """Convert a PIL Image to a base64 data URL for the vision API."""
37
+ import base64
38
+ import io
39
+ buf = io.BytesIO()
40
+ image.save(buf, format="PNG")
41
+ b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
42
+ return f"data:image/png;base64,{b64}"
43
+
44
+
45
+ def rewrite_prompt(user_prompt: str, image=None) -> str:
46
+ """Rewrite user prompt into the model's required format via LLM API.
47
+
48
+ Sends both the text prompt and the reference image to the LLM
49
+ so it can describe the character and scene accurately.
50
+ """
51
+ print(f"[Rewrite] Starting rewrite, input length={len(user_prompt)} chars, has_image={image is not None}")
52
  print(f"[Rewrite] User prompt: {user_prompt[:100]}...")
53
  try:
54
+ # Build user message with text + image
55
+ user_content = []
56
+ if image is not None:
57
+ user_content.append({
58
+ "type": "image_url",
59
+ "image_url": {"url": _pil_to_base64_url(image)},
60
+ })
61
+ user_content.append({
62
+ "type": "text",
63
+ "text": user_prompt,
64
+ })
65
+
66
  resp = _rewrite_client.chat.completions.create(
67
  model=_REWRITE_MODEL,
68
  messages=[
69
  {"role": "system", "content": _REWRITE_SYSTEM_PROMPT},
70
+ {"role": "user", "content": user_content},
71
  ],
72
  temperature=0.1,
73
  max_tokens=4096,
 
97
 
98
  # Step 1: Rewrite prompt via LLM
99
  print(f"[Generate] Request received: seed={seed} seconds={seconds} prompt={prompt[:50]!r}")
100
+ rewritten = rewrite_prompt(prompt.strip(), image=image)
101
 
102
  # Yield immediately so the user sees the rewritten prompt + "Generating..."
103
  yield None, rewritten, "Generating video, please wait ..."