pandora-s commited on
Commit
076d2f1
·
verified ·
1 Parent(s): 2b5656d

System Prompt and User Instruction

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -85,11 +85,21 @@ model = Mistral3ForConditionalGeneration.from_pretrained(
85
  ).eval()
86
 
87
 
88
- SYSTEM_PROMPT_TEXT = (
89
- "You are a world-class geolocation expert. Given a street-view style image, "
90
- "think step by step about visual clues and infer approximate coordinates. "
91
- "When you conclude, output your answer inside [ANSWER]lat,lng[/ANSWER]."
92
- )
 
 
 
 
 
 
 
 
 
 
93
  @spaces.GPU(duration=120)
94
  def llm_decode_image_return_text(image_bytes: bytes) -> str:
95
  print(f"[llm] decode start. image_bytes={len(image_bytes)} bytes")
@@ -101,7 +111,7 @@ def llm_decode_image_return_text(image_bytes: bytes) -> str:
101
  messages = [
102
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
103
  {"role": "user", "content": [
104
- {"type": "text", "text": "Please analyze this image and provide coordinates."},
105
  {"type": "image_url", "image_url": {"url": data_url}},
106
  ]},
107
  ]
@@ -140,7 +150,7 @@ def llm_stream_image_text(image_bytes: bytes):
140
  messages = [
141
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
142
  {"role": "user", "content": [
143
- {"type": "text", "text": "Please analyze this image and provide coordinates."},
144
  {"type": "image_url", "image_url": {"url": data_url}},
145
  ]},
146
  ]
 
85
  ).eval()
86
 
87
 
88
+ # SYSTEM_PROMPT_TEXT = (
89
+ # "You are a world-class geolocation expert. Given a street-view style image, "
90
+ # "think step by step about visual clues and infer approximate coordinates. "
91
+ # "When you conclude, output your answer inside [ANSWER]lat,lng[/ANSWER]."
92
+ # )
93
+ SYSTEM_PROMPT_TEXT = """# HOW YOU SHOULD THINK AND ANSWER
94
+ First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.
95
+
96
+ Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response."""
97
+
98
+ USER_INSTRUCTION = """You are a world-class geolocation expert. Given a street-view style image, think step by step about visual clues and infer approximate coordinates.
99
+ When you conclude, output your final answer inside [ANSWER]lat,lng[/ANSWER].
100
+
101
+ Please analyze this image and provide coordinates in the required format."""
102
+
103
  @spaces.GPU(duration=120)
104
  def llm_decode_image_return_text(image_bytes: bytes) -> str:
105
  print(f"[llm] decode start. image_bytes={len(image_bytes)} bytes")
 
111
  messages = [
112
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
113
  {"role": "user", "content": [
114
+ {"type": "text", "text": USER_INSTRUCTION},
115
  {"type": "image_url", "image_url": {"url": data_url}},
116
  ]},
117
  ]
 
150
  messages = [
151
  {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT_TEXT}]},
152
  {"role": "user", "content": [
153
+ {"type": "text", "text": USER_INSTRUCTION},
154
  {"type": "image_url", "image_url": {"url": data_url}},
155
  ]},
156
  ]