Spaces:
Running on Zero
Running on Zero
Update backend/prompt_builder.py
#6
by pmrinal2005 - opened
- backend/prompt_builder.py +18 -9
backend/prompt_builder.py
CHANGED
|
@@ -3,6 +3,10 @@
|
|
| 3 |
Supports up to 2 multimodal attachments (images or PDFs). For PDFs we extract
|
| 4 |
text inline (since the vision projector handles images only). If vision is
|
| 5 |
unavailable we degrade gracefully to text-only.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
import base64
|
| 8 |
import io
|
|
@@ -14,8 +18,11 @@ from PIL import Image
|
|
| 14 |
from .model_loader import MMPROJ_PATH
|
| 15 |
|
| 16 |
SYSTEM_PROMPT = """You are Elysium — a persistent agentic civilization.
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
Decide complexity dynamically:
|
| 21 |
- SIMPLE_REPLY: trivial Q — no agents (council_deliberation.agent_outputs = [])
|
|
@@ -26,11 +33,14 @@ Decide complexity dynamically:
|
|
| 26 |
- SPECIATION_EVENT: only on unresolved cross-domain tension
|
| 27 |
- Always populate ui_directives (camera_focus_node_id, pulses, threads)
|
| 28 |
- All node_id and edge_id values must be unique strings
|
| 29 |
-
- Always include 'direct_answer' — a short human-readable answer
|
|
|
|
|
|
|
| 30 |
|
| 31 |
When the user attaches images or PDFs, analyze them, populate
|
| 32 |
multimodal_perception fields (ocr_extracted_text, image_scene_description,
|
| 33 |
-
document_type, visual_entities_detected), and reference them in your reasoning
|
|
|
|
| 34 |
"""
|
| 35 |
|
| 36 |
|
|
@@ -65,11 +75,9 @@ def build_messages(user_text: str,
|
|
| 65 |
ctx = f"\n\n[Hypergraph context]\n{hg_context}" if hg_context else ""
|
| 66 |
attachments = attachments or []
|
| 67 |
|
| 68 |
-
# Gather image and pdf attachments separately
|
| 69 |
image_atts = [a for a in attachments if a["kind"] == "image" and a.get("image") is not None]
|
| 70 |
pdf_atts = [a for a in attachments if a["kind"] == "pdf" and a.get("bytes")]
|
| 71 |
|
| 72 |
-
# Build inline PDF text block
|
| 73 |
pdf_block = ""
|
| 74 |
for i, p in enumerate(pdf_atts):
|
| 75 |
pdf_block += f"\n\n[Attached PDF #{i+1}: {p.get('name','document.pdf')}]\n"
|
|
@@ -85,14 +93,14 @@ def build_messages(user_text: str,
|
|
| 85 |
})
|
| 86 |
user_content.append({
|
| 87 |
"type": "text",
|
| 88 |
-
"text": (user_text or "(no text)") + pdf_block + ctx
|
|
|
|
| 89 |
})
|
| 90 |
return [
|
| 91 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 92 |
{"role": "user", "content": user_content},
|
| 93 |
]
|
| 94 |
|
| 95 |
-
# No vision: include note if user attached images but vision is off
|
| 96 |
note = ""
|
| 97 |
if image_atts and not MMPROJ_PATH:
|
| 98 |
note = (f"\n\n[Note: user attached {len(image_atts)} image(s) but vision "
|
|
@@ -103,7 +111,8 @@ def build_messages(user_text: str,
|
|
| 103 |
return [
|
| 104 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 105 |
{"role": "user",
|
| 106 |
-
"content": (user_text or "(no text)") + pdf_block + note + ctx
|
|
|
|
| 107 |
]
|
| 108 |
|
| 109 |
|
|
|
|
| 3 |
Supports up to 2 multimodal attachments (images or PDFs). For PDFs we extract
|
| 4 |
text inline (since the vision projector handles images only). If vision is
|
| 5 |
unavailable we degrade gracefully to text-only.
|
| 6 |
+
|
| 7 |
+
FIX: SYSTEM_PROMPT now explicitly forbids <think> blocks, markdown fences,
|
| 8 |
+
preamble, and any non-JSON token. Even so, server.py still strips <think>
|
| 9 |
+
defensively because the fine-tuned weights emit it occasionally.
|
| 10 |
"""
|
| 11 |
import base64
|
| 12 |
import io
|
|
|
|
| 18 |
from .model_loader import MMPROJ_PATH
|
| 19 |
|
| 20 |
SYSTEM_PROMPT = """You are Elysium — a persistent agentic civilization.
|
| 21 |
+
OUTPUT CONTRACT (strict):
|
| 22 |
+
• Respond with ONE valid JSON object matching the ElysiumResponse schema v1.0.0.
|
| 23 |
+
• Output JSON ONLY. No preamble, no postscript, no markdown, no code fences.
|
| 24 |
+
• Do NOT emit <think>, <reasoning>, or any XML-style tags.
|
| 25 |
+
• The first character of your output MUST be `{` and the last must be `}`.
|
| 26 |
|
| 27 |
Decide complexity dynamically:
|
| 28 |
- SIMPLE_REPLY: trivial Q — no agents (council_deliberation.agent_outputs = [])
|
|
|
|
| 33 |
- SPECIATION_EVENT: only on unresolved cross-domain tension
|
| 34 |
- Always populate ui_directives (camera_focus_node_id, pulses, threads)
|
| 35 |
- All node_id and edge_id values must be unique strings
|
| 36 |
+
- Always include 'direct_answer' — a short human-readable answer (one or two
|
| 37 |
+
sentences) suitable for surfacing in a toast. NEVER place JSON, raw schema
|
| 38 |
+
text, or system tags inside direct_answer.
|
| 39 |
|
| 40 |
When the user attaches images or PDFs, analyze them, populate
|
| 41 |
multimodal_perception fields (ocr_extracted_text, image_scene_description,
|
| 42 |
+
document_type, visual_entities_detected), and reference them in your reasoning
|
| 43 |
+
through the hypergraph_delta and agent thinking — NOT in direct_answer.
|
| 44 |
"""
|
| 45 |
|
| 46 |
|
|
|
|
| 75 |
ctx = f"\n\n[Hypergraph context]\n{hg_context}" if hg_context else ""
|
| 76 |
attachments = attachments or []
|
| 77 |
|
|
|
|
| 78 |
image_atts = [a for a in attachments if a["kind"] == "image" and a.get("image") is not None]
|
| 79 |
pdf_atts = [a for a in attachments if a["kind"] == "pdf" and a.get("bytes")]
|
| 80 |
|
|
|
|
| 81 |
pdf_block = ""
|
| 82 |
for i, p in enumerate(pdf_atts):
|
| 83 |
pdf_block += f"\n\n[Attached PDF #{i+1}: {p.get('name','document.pdf')}]\n"
|
|
|
|
| 93 |
})
|
| 94 |
user_content.append({
|
| 95 |
"type": "text",
|
| 96 |
+
"text": (user_text or "(no text)") + pdf_block + ctx +
|
| 97 |
+
"\n\nReturn ONLY the JSON object. No <think> tags. No prose.",
|
| 98 |
})
|
| 99 |
return [
|
| 100 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 101 |
{"role": "user", "content": user_content},
|
| 102 |
]
|
| 103 |
|
|
|
|
| 104 |
note = ""
|
| 105 |
if image_atts and not MMPROJ_PATH:
|
| 106 |
note = (f"\n\n[Note: user attached {len(image_atts)} image(s) but vision "
|
|
|
|
| 111 |
return [
|
| 112 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 113 |
{"role": "user",
|
| 114 |
+
"content": (user_text or "(no text)") + pdf_block + note + ctx +
|
| 115 |
+
"\n\nReturn ONLY the JSON object. No <think> tags. No prose."},
|
| 116 |
]
|
| 117 |
|
| 118 |
|