Jofthomas pandora-s commited on
Commit
8bd9cd3
·
verified ·
1 Parent(s): 2b5656d

System Prompt and User Instruction (#1)

Browse files

- System Prompt and User Instruction (076d2f1e0739c7a648469749b74e7d93664ac6fb)
- fix markdown sys prompt (7f86bf5e23f801df9c36caf15c7d14c67da38ffe)


Co-authored-by: pandora <pandora-s@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -85,11 +85,20 @@ model = Mistral3ForConditionalGeneration.from_pretrained(
85
  ).eval()
86
 
87
 
88
- SYSTEM_PROMPT_TEXT = (
89
- "You are a world-class geolocation expert. Given a street-view style image, "
90
- "think step by step about visual clues and infer approximate coordinates. "
91
- "When you conclude, output your answer inside [ANSWER]lat,lng[/ANSWER]."
92
- )
 
 
 
 
 
 
 
 
 
93
  @spaces.GPU(duration=120)
94
  def llm_decode_image_return_text(image_bytes: bytes) -> str:
95
  print(f"[llm] decode start. image_bytes={len(image_bytes)} bytes")
@@ -101,7 +110,7 @@ def llm_decode_image_return_text(image_bytes: bytes) -> str:
101
  messages = [
102
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
103
  {"role": "user", "content": [
104
- {"type": "text", "text": "Please analyze this image and provide coordinates."},
105
  {"type": "image_url", "image_url": {"url": data_url}},
106
  ]},
107
  ]
@@ -140,7 +149,7 @@ def llm_stream_image_text(image_bytes: bytes):
140
  messages = [
141
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
142
  {"role": "user", "content": [
143
- {"type": "text", "text": "Please analyze this image and provide coordinates."},
144
  {"type": "image_url", "image_url": {"url": data_url}},
145
  ]},
146
  ]
 
85
  ).eval()
86
 
87
 
88
+ # SYSTEM_PROMPT_TEXT = (
89
+ # "You are a world-class geolocation expert. Given a street-view style image, "
90
+ # "think step by step about visual clues and infer approximate coordinates. "
91
+ # "When you conclude, output your answer inside [ANSWER]lat,lng[/ANSWER]."
92
+ # )
93
+ SYSTEM_PROMPT_TEXT = """First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.
94
+
95
+ Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response."""
96
+
97
+ USER_INSTRUCTION = """You are a world-class geolocation expert. Given a street-view style image, think step by step about visual clues and infer approximate coordinates.
98
+ When you conclude, output your final answer inside [ANSWER]lat,lng[/ANSWER].
99
+
100
+ Please analyze this image and provide coordinates in the required format."""
101
+
102
  @spaces.GPU(duration=120)
103
  def llm_decode_image_return_text(image_bytes: bytes) -> str:
104
  print(f"[llm] decode start. image_bytes={len(image_bytes)} bytes")
 
110
  messages = [
111
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
112
  {"role": "user", "content": [
113
+ {"type": "text", "text": USER_INSTRUCTION},
114
  {"type": "image_url", "image_url": {"url": data_url}},
115
  ]},
116
  ]
 
149
  messages = [
150
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
151
  {"role": "user", "content": [
152
+ {"type": "text", "text": USER_INSTRUCTION},
153
  {"type": "image_url", "image_url": {"url": data_url}},
154
  ]},
155
  ]