Spaces:
Running
Running
update system prompt
Browse files
app.py
CHANGED
|
@@ -421,19 +421,22 @@ def detect_objects(image, dino_proc, dino_mod, threshold=0.3) -> tuple:
|
|
| 421 |
def fuse_captions(cap1: str, cap2: str, objects: str, qwen_tok, qwen_mod) -> str:
|
| 422 |
|
| 423 |
system_prompt = (
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
| 437 |
|
| 438 |
try:
|
| 439 |
messages = [
|
|
|
|
| 421 |
def fuse_captions(cap1: str, cap2: str, objects: str, qwen_tok, qwen_mod) -> str:
|
| 422 |
|
| 423 |
system_prompt = (
|
| 424 |
+
"You write image captions. "
|
| 425 |
+
"Look at the two captions and detected objects provided. "
|
| 426 |
+
"Write ONE caption that covers: who is in the image, what they are doing, "
|
| 427 |
+
"what objects are around them, and where the scene is taking place. "
|
| 428 |
+
"Use simple, everyday words. Write 2 to 3 sentences. "
|
| 429 |
+
"Only describe what is clearly visible. "
|
| 430 |
+
"Do not guess, invent, or add dramatic language. "
|
| 431 |
+
"Return ONLY the caption, nothing else."
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
user_prompt = (
|
| 435 |
+
f"Caption A: {cap1}\n"
|
| 436 |
+
f"Caption B: {cap2}\n"
|
| 437 |
+
f"{objects}\n\n"
|
| 438 |
+
"Write a clear, natural caption covering the person, action, objects and setting:"
|
| 439 |
+
)
|
| 440 |
|
| 441 |
try:
|
| 442 |
messages = [
|