nickdigger commited on
Commit
e1b17e8
Β·
verified Β·
1 Parent(s): 21dc1e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +412 -142
app.py CHANGED
@@ -1,6 +1,3 @@
1
- # app.py – Minimal Dark Edition for ZeroGPU
2
- # πŸ’„ UI simplification only – all logic unchanged
3
-
4
  try:
5
  import spaces
6
  if not hasattr(spaces, 'GPU'):
@@ -26,14 +23,17 @@ from datetime import datetime
26
  from typing import Optional
27
  from urllib.parse import urlparse
28
 
29
- # ===== Utility functions (unchanged) =====
30
  def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
 
31
  if not raw_url_or_path:
32
  return raw_url_or_path
 
33
  try:
34
  parsed = urlparse(raw_url_or_path)
35
  except Exception:
36
  parsed = None
 
37
  if parsed and parsed.scheme and parsed.netloc:
38
  full = raw_url_or_path
39
  if "/file=" in full and "/gradio_api/file=" not in full:
@@ -41,6 +41,7 @@ def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
41
  if "file=" in full and "/gradio_api/file=" not in full and "/gradio_api" not in full:
42
  full = full.replace("file=", "gradio_api/file=")
43
  return full
 
44
  if raw_url_or_path.startswith("/tmp/") or raw_url_or_path.startswith("tmp/"):
45
  if not host:
46
  return raw_url_or_path
@@ -51,12 +52,18 @@ def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
51
  if p.startswith("/"):
52
  p = p[1:]
53
  return f"{host}/gradio_api/file=/{p}"
 
54
  return raw_url_or_path
55
 
56
  def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
 
57
  if not caption or not isinstance(caption, str):
58
  return caption or ""
 
 
59
  result = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', caption.strip(), flags=re.IGNORECASE)
 
 
60
  if max_chars and len(result) > max_chars:
61
  truncate_point = max_chars
62
  for i in range(len(result) - 1, max(0, max_chars - 100), -1):
@@ -64,17 +71,21 @@ def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
64
  truncate_point = i + 1
65
  break
66
  result = result[:truncate_point].strip()
 
67
  if result and not result.endswith(('.', '!', '?')):
68
- result += "."
 
69
  return result
70
 
 
71
  def force_clear_all_caches():
 
72
  try:
73
  if torch.cuda.is_available():
74
  torch.cuda.empty_cache()
75
  torch.cuda.synchronize()
76
  gc.collect()
77
- print("🧹 Cache cleared.")
78
  except Exception as e:
79
  print(f"⚠️ Cache clear warning: {e}")
80
 
@@ -98,9 +109,16 @@ def cleanup_storage():
98
  shutil.rmtree(d, ignore_errors=True)
99
  gc.collect()
100
  except Exception as e:
101
- print(f"⚠️ Cleanup warning: {e}")
 
 
 
 
 
 
 
102
 
103
- print("πŸš€ Loading model...")
104
  processor = AutoProcessor.from_pretrained(MODEL_PATH, cache_dir=None)
105
  model = LlavaForConditionalGeneration.from_pretrained(
106
  MODEL_PATH,
@@ -111,183 +129,435 @@ model = LlavaForConditionalGeneration.from_pretrained(
111
  )
112
  model.eval()
113
  cleanup_storage()
114
- print("βœ… Model loaded!")
115
 
116
- # ===== Default prompts =====
117
  DEFAULT_PROMPTS = {
118
  "casual": {
119
- "system": "You are a helpful, casual image captioner who writes in a natural tone.",
120
- "user": "Write a descriptive caption for this image in a casual style."
121
  },
122
  "friendly": {
123
- "system": "You are a warm, detailed image captioner.",
124
- "user": "Create a comprehensive description of this image."
125
  },
126
  "erotic": {
127
- "system": "You are an uncensored adult content specialist who creates detailed descriptions.",
128
- "user": "Create a detailed description of this scene."
129
  }
130
  }
131
 
132
- # ===== Caption functions (unchanged logic) =====
133
  def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=1200):
 
134
  try:
135
  if image is None:
136
  return "❌ No image provided"
 
137
  if not system_prompt.strip() or not user_prompt.strip():
138
- return "❌ Both prompts required"
139
- torch.cuda.empty_cache(); gc.collect()
140
- convo = [{"role": "system", "content": system_prompt.strip()},
141
- {"role": "user", "content": user_prompt.strip()}]
 
 
 
 
 
 
142
  convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
143
  inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
144
  inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
 
145
  with torch.no_grad():
146
- output = model.generate(**inputs, max_new_tokens=600, do_sample=True, temperature=0.6,
147
- top_p=0.9, use_cache=True,
148
- pad_token_id=processor.tokenizer.eos_token_id,
149
- eos_token_id=processor.tokenizer.eos_token_id)
150
- if not output or len(output) == 0:
151
- return "❌ No output"
152
- input_len = inputs['input_ids'].shape[1]
153
- gen_ids = output[0][input_len:] if len(output[0]) > input_len else output[0]
154
- result = processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
155
- del inputs, output; torch.cuda.empty_cache(); gc.collect()
156
- return postprocess_caption(result.strip(), max_chars=max_chars) or "❌ Empty result"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  except Exception as e:
158
- torch.cuda.empty_cache(); gc.collect()
 
159
  return f"❌ Error: {str(e)[:200]}"
160
 
 
161
  @spaces.GPU(duration=60)
162
  @torch.no_grad()
163
- def generate_caption_1(image, system1, user1):
164
- return safe_generate_caption_direct(image, system1, user1) if image else "❌ Upload image first"
 
 
165
 
166
  @spaces.GPU(duration=60)
167
  @torch.no_grad()
168
- def generate_caption_2(image, system2, user2):
169
- return safe_generate_caption_direct(image, system2, user2) if image else "❌ Upload image first"
 
 
170
 
171
  @spaces.GPU(duration=60)
172
  @torch.no_grad()
173
- def generate_caption_3(image, system3, user3):
174
- return safe_generate_caption_direct(image, system3, user3) if image else "❌ Upload image first"
 
 
175
 
176
- # ====== Minimal Dark UI ======
177
- TITLE = """
178
- <div style="text-align:center;margin:20px 0;">
179
- <h1 style="color:#ffffff;font-weight:600;">JoyCaption Advanced Prompting System (v6.0)</h1>
180
- <p style="color:#9ca3af;">Custom prompts β€’ Template helpers β€’ Professional control</p>
181
- <hr style="border-color:#374151;margin-top:10px;">
182
- </div>
183
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- dark_css = """
186
- body {
187
- background: #0f172a;
188
- color: #e5e7eb;
189
- }
190
- .gradio-container {
191
- max-width: 1200px !important;
192
- margin: auto !important;
193
- }
194
- input, textarea {
195
- background-color: #1e293b !important;
196
- color: #f1f5f9 !important;
197
- border: 1px solid #334155 !important;
198
- border-radius: 6px !important;
199
- font-size: 13px !important;
200
- }
201
- input:focus, textarea:focus {
202
- border-color: #60a5fa !important;
203
- outline: none !important;
204
- }
205
- button {
206
- background-color: #2563eb !important;
207
- color: #ffffff !important;
208
- border: none !important;
209
- border-radius: 6px !important;
210
- padding: 8px 14px !important;
211
- font-weight: 500 !important;
212
- cursor: pointer !important;
213
- }
214
- button:hover {
215
- background-color: #1d4ed8 !important;
216
- }
217
- .caption-section {
218
- background: #1e293b !important;
219
- border: 1px solid #334155 !important;
220
- border-radius: 8px !important;
221
- padding: 12px !important;
222
- margin: 6px 0 !important;
223
- }
224
- """
225
 
226
- # ===== Build Interface =====
227
- with gr.Blocks(title="JoyCaption", theme=None, css=dark_css) as demo:
228
- gr.HTML(TITLE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  with gr.Row():
 
231
  with gr.Column(scale=1):
232
- image_input = gr.Image(type="pil", label="", height=400)
233
- keywords_input = gr.Textbox(placeholder="Enter keywords")
234
- custom_instruction_input = gr.Textbox(placeholder="Custom instructions")
235
- avoid_input = gr.Textbox(placeholder="Things to avoid mentioning")
236
- question_input = gr.Textbox(placeholder="Ask a question about the image")
237
- ask_btn = gr.Button("Ask Question")
238
- qa_output = gr.Textbox(lines=4, show_copy_button=True, placeholder="Answer will appear here")
239
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  with gr.Column(scale=1):
241
- with gr.Group(elem_classes=["caption-section"]):
242
- system1 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["casual"]["system"], placeholder="System prompt")
243
- user1 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["casual"]["user"], placeholder="User prompt")
244
- gen1_btn = gr.Button("Generate Casual Caption")
245
- out1 = gr.Textbox(lines=5, show_copy_button=True, placeholder="Casual caption output")
246
-
247
- with gr.Group(elem_classes=["caption-section"]):
248
- system2 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["friendly"]["system"], placeholder="System prompt")
249
- user2 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["friendly"]["user"], placeholder="User prompt")
250
- gen2_btn = gr.Button("Generate Friendly Caption")
251
- out2 = gr.Textbox(lines=5, show_copy_button=True, placeholder="Friendly caption output")
252
-
253
- with gr.Group(elem_classes=["caption-section"]):
254
- system3 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["erotic"]["system"], placeholder="System prompt")
255
- user3 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["erotic"]["user"], placeholder="User prompt")
256
- gen3_btn = gr.Button("Generate Erotic Caption")
257
- out3 = gr.Textbox(lines=5, show_copy_button=True, placeholder="Erotic caption output")
258
-
259
- export_btn = gr.Button("Export All Data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  export_out = gr.Textbox(visible=False)
261
  export_file = gr.File(visible=False)
262
-
263
- # ===== Link Buttons =====
264
  gen1_btn.click(generate_caption_1, [image_input, system1, user1], out1)
265
  gen2_btn.click(generate_caption_2, [image_input, system2, user2], out2)
266
  gen3_btn.click(generate_caption_3, [image_input, system3, user3], out3)
267
- ask_btn.click(lambda img, q: generate_caption_1(img, "You are a helpful assistant.", q), [image_input, question_input], qa_output)
268
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  def handle_export(k, c, a, q, c1, c2, c3, qa, img):
270
- data = {
271
- "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
272
- "data": {
273
- "keywords": k.strip(),
274
- "instructions": c.strip(),
275
- "avoid": a.strip(),
276
- "question": q.strip(),
277
- "caption1": c1.strip(),
278
- "caption2": c2.strip(),
279
- "caption3": c3.strip(),
280
- "qa": qa.strip()
281
- }
282
- }
283
- js = json.dumps(data, indent=2)
284
- fn = f"joycaption_{time.strftime('%Y%m%d_%H%M%S')}.json"
285
- path = os.path.join(tempfile.gettempdir(), fn)
286
- with open(path, "w", encoding="utf-8") as f:
287
- f.write(js)
288
- return gr.update(value="βœ… Exported successfully", visible=True), gr.update(value=path, visible=True)
289
-
290
  export_btn.click(handle_export, [keywords_input, custom_instruction_input, avoid_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  if __name__ == "__main__":
293
- demo.launch()
 
 
 
 
1
  try:
2
  import spaces
3
  if not hasattr(spaces, 'GPU'):
 
23
  from typing import Optional
24
  from urllib.parse import urlparse
25
 
26
+ # ===== BUILT-IN UTILITY FUNCTIONS =====
27
  def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
28
+ """Convert local image paths to URLs for export"""
29
  if not raw_url_or_path:
30
  return raw_url_or_path
31
+
32
  try:
33
  parsed = urlparse(raw_url_or_path)
34
  except Exception:
35
  parsed = None
36
+
37
  if parsed and parsed.scheme and parsed.netloc:
38
  full = raw_url_or_path
39
  if "/file=" in full and "/gradio_api/file=" not in full:
 
41
  if "file=" in full and "/gradio_api/file=" not in full and "/gradio_api" not in full:
42
  full = full.replace("file=", "gradio_api/file=")
43
  return full
44
+
45
  if raw_url_or_path.startswith("/tmp/") or raw_url_or_path.startswith("tmp/"):
46
  if not host:
47
  return raw_url_or_path
 
52
  if p.startswith("/"):
53
  p = p[1:]
54
  return f"{host}/gradio_api/file=/{p}"
55
+
56
  return raw_url_or_path
57
 
58
  def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
59
+ """Minimal caption post-processing - just basic cleanup"""
60
  if not caption or not isinstance(caption, str):
61
  return caption or ""
62
+
63
+ # Only remove leading "a photo of" phrases
64
  result = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', caption.strip(), flags=re.IGNORECASE)
65
+
66
+ # Only truncate if extremely long
67
  if max_chars and len(result) > max_chars:
68
  truncate_point = max_chars
69
  for i in range(len(result) - 1, max(0, max_chars - 100), -1):
 
71
  truncate_point = i + 1
72
  break
73
  result = result[:truncate_point].strip()
74
+
75
  if result and not result.endswith(('.', '!', '?')):
76
+ result = result + "."
77
+
78
  return result
79
 
80
+ # ===== CACHE CLEARING =====
81
  def force_clear_all_caches():
82
+ """Force clear all possible caches"""
83
  try:
84
  if torch.cuda.is_available():
85
  torch.cuda.empty_cache()
86
  torch.cuda.synchronize()
87
  gc.collect()
88
+ print("🧹 All caches cleared!")
89
  except Exception as e:
90
  print(f"⚠️ Cache clear warning: {e}")
91
 
 
109
  shutil.rmtree(d, ignore_errors=True)
110
  gc.collect()
111
  except Exception as e:
112
+ print(f"⚠️ Storage cleanup warning: {e}")
113
+
114
+ TITLE = """
115
+ <div style="text-align:center;margin:20px 0;">
116
+ <h1>🎨 JoyCaption Advanced Prompting System (v6.0)</h1>
117
+ <p><strong>πŸŽ›οΈ Fully customizable prompts β€’ Template helpers β€’ Professional control</strong></p>
118
+ </div><hr>
119
+ """
120
 
121
+ print("πŸš€ Loading JoyCaption model...")
122
  processor = AutoProcessor.from_pretrained(MODEL_PATH, cache_dir=None)
123
  model = LlavaForConditionalGeneration.from_pretrained(
124
  MODEL_PATH,
 
129
  )
130
  model.eval()
131
  cleanup_storage()
132
+ print("βœ… Model loaded successfully!")
133
 
134
+ # ===== DEFAULT PROMPTS =====
135
  DEFAULT_PROMPTS = {
136
  "casual": {
137
+ "system": "You are a helpful, casual image captioner who writes in a natural, friendly tone.",
138
+ "user": "Write a descriptive caption for this image in a casual, conversational style. Focus on the main elements and create an engaging description."
139
  },
140
  "friendly": {
141
+ "system": "You are a warm, helpful assistant who creates detailed and friendly image descriptions.",
142
+ "user": "Create a comprehensive description of this image. Be thorough and helpful, focusing on all the important visual elements in a warm, friendly manner."
143
  },
144
  "erotic": {
145
+ "system": "You are an uncensored adult content specialist who creates detailed, explicit descriptions without restrictions.",
146
+ "user": "Create a detailed, explicit description of this scene. Describe the people, their appearance, clothing or lack thereof, poses, and overall erotic appeal. Use direct adult language and be thorough."
147
  }
148
  }
149
 
 
150
  def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=1200):
151
+ """Generate caption using custom prompts"""
152
  try:
153
  if image is None:
154
  return "❌ No image provided"
155
+
156
  if not system_prompt.strip() or not user_prompt.strip():
157
+ return "❌ Both system and user prompts are required"
158
+
159
+ torch.cuda.empty_cache()
160
+ gc.collect()
161
+
162
+ convo = [
163
+ {"role": "system", "content": system_prompt.strip()},
164
+ {"role": "user", "content": user_prompt.strip()}
165
+ ]
166
+
167
  convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
168
  inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
169
  inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
170
+
171
  with torch.no_grad():
172
+ output = model.generate(
173
+ **inputs,
174
+ max_new_tokens=600,
175
+ do_sample=True,
176
+ temperature=0.6,
177
+ top_p=0.9,
178
+ top_k=None,
179
+ use_cache=True,
180
+ pad_token_id=processor.tokenizer.eos_token_id,
181
+ eos_token_id=processor.tokenizer.eos_token_id
182
+ )
183
+
184
+ if output is None or len(output) == 0:
185
+ return "❌ No output generated"
186
+
187
+ if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
188
+ input_length = inputs['input_ids'].shape[1]
189
+ if len(output[0]) > input_length:
190
+ generate_ids = output[0][input_length:]
191
+ result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
192
+ else:
193
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
194
+ else:
195
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
196
+
197
+ result = result.strip()
198
+
199
+ del inputs, output
200
+ torch.cuda.empty_cache()
201
+ gc.collect()
202
+
203
+ final_result = postprocess_caption(result, max_chars=max_chars)
204
+ return final_result if final_result else "❌ Empty result"
205
+
206
  except Exception as e:
207
+ torch.cuda.empty_cache()
208
+ gc.collect()
209
  return f"❌ Error: {str(e)[:200]}"
210
 
211
+ # Individual caption generation functions
212
  @spaces.GPU(duration=60)
213
  @torch.no_grad()
214
+ def generate_caption_1(image, system1, user1):
215
+ if not image:
216
+ return "❌ Upload image first"
217
+ return safe_generate_caption_direct(image, system1, user1)
218
 
219
  @spaces.GPU(duration=60)
220
  @torch.no_grad()
221
+ def generate_caption_2(image, system2, user2):
222
+ if not image:
223
+ return "❌ Upload image first"
224
+ return safe_generate_caption_direct(image, system2, user2)
225
 
226
  @spaces.GPU(duration=60)
227
  @torch.no_grad()
228
+ def generate_caption_3(image, system3, user3):
229
+ if not image:
230
+ return "❌ Upload image first"
231
+ return safe_generate_caption_direct(image, system3, user3)
232
 
233
+ @spaces.GPU(duration=40)
234
+ @torch.no_grad()
235
+ def answer_question(image, question):
236
+ """Q&A function"""
237
+ if not image:
238
+ return "❌ Upload image first"
239
+ if not question or not question.strip():
240
+ return "❌ Please ask a question"
241
+
242
+ try:
243
+ torch.cuda.empty_cache()
244
+ gc.collect()
245
+
246
+ convo = [
247
+ {"role": "system", "content": "You are a helpful image captioner."},
248
+ {"role": "user", "content": question.strip()}
249
+ ]
250
+
251
+ convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
252
+ inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
253
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
254
+
255
+ with torch.no_grad():
256
+ output = model.generate(
257
+ **inputs,
258
+ max_new_tokens=300,
259
+ do_sample=True,
260
+ temperature=0.6,
261
+ top_p=0.9,
262
+ top_k=None,
263
+ use_cache=True,
264
+ pad_token_id=processor.tokenizer.eos_token_id,
265
+ eos_token_id=processor.tokenizer.eos_token_id
266
+ )
267
+
268
+ if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
269
+ input_length = inputs['input_ids'].shape[1]
270
+ if len(output[0]) > input_length:
271
+ generate_ids = output[0][input_length:]
272
+ result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
273
+ else:
274
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
275
+ else:
276
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
277
+
278
+ result = result.strip()
279
+
280
+ del inputs, output
281
+ torch.cuda.empty_cache()
282
+ gc.collect()
283
+
284
+ final_result = postprocess_caption(result, max_chars=500)
285
+ return final_result if final_result else "❌ No answer generated"
286
+
287
+ except Exception as e:
288
+ torch.cuda.empty_cache()
289
+ gc.collect()
290
+ return f"❌ Q&A Error: {str(e)[:200]}"
291
 
292
+ # Helper functions for template insertion
293
+ def insert_template(current_text, template_text, field_content):
294
+ """Insert template at the end of current text if not already present"""
295
+ if not field_content.strip():
296
+ return current_text
297
+
298
+ formatted_template = template_text.format(content=field_content.strip())
299
+
300
+ # Check if this template is already in the text (prevent duplicates)
301
+ if formatted_template in current_text:
302
+ return current_text
303
+
304
+ # Add template at the end with proper spacing
305
+ if current_text.strip():
306
+ return current_text.rstrip() + " " + formatted_template
307
+ else:
308
+ return formatted_template
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
+ def create_template_functions():
311
+ """Create template insertion functions for each button type"""
312
+
313
+ def insert_key(system_text, user_text, keywords_content):
314
+ template = "Pay attention to these keywords: {content}."
315
+ return (
316
+ insert_template(system_text, template, keywords_content),
317
+ insert_template(user_text, template, keywords_content)
318
+ )
319
+
320
+ def insert_que(system_text, user_text, question_content):
321
+ template = "Answer this question: {content}."
322
+ return (
323
+ insert_template(system_text, template, question_content),
324
+ insert_template(user_text, template, question_content)
325
+ )
326
+
327
+ def insert_use(system_text, user_text, custom_content):
328
+ template = "Make sure that you mention: {content}."
329
+ return (
330
+ insert_template(system_text, template, custom_content),
331
+ insert_template(user_text, template, custom_content)
332
+ )
333
+
334
+ def insert_not(system_text, user_text, avoid_content):
335
+ template = "Do NOT mention: {content}."
336
+ return (
337
+ insert_template(system_text, template, avoid_content),
338
+ insert_template(user_text, template, avoid_content)
339
+ )
340
+
341
+ return insert_key, insert_que, insert_use, insert_not
342
 
343
+ # Export function
344
+ def export_joycaption_data(keywords, custom_instructions, avoid, question, cap1, cap2, cap3, qa_answer, image_path=""):
345
+ try:
346
+ data = {"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "source":"JoyCaption","data":{}}
347
+ if keywords.strip(): data["data"]["keywords"]=keywords.strip()
348
+ if custom_instructions.strip(): data["data"]["custom_instructions"]=custom_instructions.strip()
349
+ if avoid.strip(): data["data"]["avoid"]=avoid.strip()
350
+ if question.strip(): data["data"]["question"]=question.strip()
351
+ if image_path.strip():
352
+ data["data"]["image_local_path"]=image_path
353
+ image_url=fix_image_url(image_path, host=(SPACE_HOST or ""))
354
+ if image_url: data["data"]["image_url"]=image_url
355
+ if cap1.strip(): data["data"]["caption_casual"]=cap1.strip()
356
+ if cap2.strip(): data["data"]["caption_friendly"]=cap2.strip()
357
+ if cap3.strip(): data["data"]["caption_erotic"]=cap3.strip()
358
+ if qa_answer.strip(): data["data"]["qa_answer"]=qa_answer.strip()
359
+ if not data["data"]:
360
+ return "❌ No data to export", None
361
+ js = json.dumps(data, indent=2, ensure_ascii=False)
362
+ fn = f"joycaption_{time.strftime('%Y%m%d_%H%M%S')}.json"
363
+ return f"βœ… Exported {len(data['data'])} fields", (js, fn)
364
+ except Exception as e:
365
+ return f"❌ Export failed: {str(e)}", None
366
+
367
+ # Create the Gradio interface
368
+ with gr.Blocks(title="JoyCaption Advanced Prompting System", theme=gr.themes.Soft()) as demo:
369
+ """) as demo:
370
+
371
+ gr.HTML(TITLE)
372
+
373
+ # Get template functions
374
+ insert_key, insert_que, insert_use, insert_not = create_template_functions()
375
+
376
  with gr.Row():
377
+ # Left Column - Input Fields
378
  with gr.Column(scale=1):
379
+ image_input = gr.Image(type="pil", label="πŸ“Έ Upload Image", height=400)
380
+
381
+ keywords_input = gr.Textbox(
382
+ label="🏷️ Keywords",
383
+ lines=2,
384
+ placeholder="Enter keywords (available as 'key' template)",
385
+ info="Use 'key' button to insert into prompts"
386
+ )
387
+
388
+ custom_instruction_input = gr.Textbox(
389
+ label="🎯 Custom Instruction",
390
+ lines=2,
391
+ placeholder="Enter custom instructions (available as 'use' template)",
392
+ info="Use 'use' button to insert into prompts"
393
+ )
394
+
395
+ avoid_input = gr.Textbox(
396
+ label="🚫 Avoid",
397
+ lines=2,
398
+ placeholder="Things to avoid mentioning (available as 'not' template)",
399
+ info="Use 'not' button to insert into prompts"
400
+ )
401
+
402
+ question_input = gr.Textbox(
403
+ label="❓ Question",
404
+ lines=2,
405
+ placeholder="Ask a question about the image (available as 'que' template)",
406
+ info="Use 'que' button to insert into prompts"
407
+ )
408
+
409
+ ask_btn = gr.Button("❓ Ask Question", variant="secondary")
410
+ qa_output = gr.Textbox(label="Q&A Answer", lines=4, show_copy_button=True)
411
+
412
+ # Right Column - Caption Generation
413
  with gr.Column(scale=1):
414
+
415
+ # Caption 1 - Casual
416
+ gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>πŸ“ Casual Caption</h4>")
417
+
418
+ system1 = gr.Textbox(
419
+ label="System Prompt",
420
+ lines=2,
421
+ value=DEFAULT_PROMPTS["casual"]["system"],
422
+ placeholder="How should the AI behave?"
423
+ )
424
+
425
+ user1 = gr.Textbox(
426
+ label="User Prompt",
427
+ lines=2,
428
+ value=DEFAULT_PROMPTS["casual"]["user"],
429
+ placeholder="What should the AI do with this image?"
430
+ )
431
+
432
+ with gr.Row():
433
+ key1_btn = gr.Button("key", size="sm")
434
+ que1_btn = gr.Button("que", size="sm")
435
+ use1_btn = gr.Button("use", size="sm")
436
+ not1_btn = gr.Button("not", size="sm")
437
+ gen1_btn = gr.Button("πŸ“ Generate Casual Caption", variant="primary")
438
+
439
+ out1 = gr.Textbox(lines=5, show_copy_button=True)
440
+
441
+ # Caption 2 - Friendly
442
+ gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>🀝 Friendly Caption</h4>")
443
+
444
+ system2 = gr.Textbox(
445
+ label="System Prompt",
446
+ lines=2,
447
+ value=DEFAULT_PROMPTS["friendly"]["system"],
448
+ placeholder="How should the AI behave?"
449
+ )
450
+
451
+ user2 = gr.Textbox(
452
+ label="User Prompt",
453
+ lines=2,
454
+ value=DEFAULT_PROMPTS["friendly"]["user"],
455
+ placeholder="What kind of description do you want?"
456
+ )
457
+
458
+ with gr.Row():
459
+ key2_btn = gr.Button("key", size="sm")
460
+ que2_btn = gr.Button("que", size="sm")
461
+ use2_btn = gr.Button("use", size="sm")
462
+ not2_btn = gr.Button("not", size="sm")
463
+ gen2_btn = gr.Button("🀝 Generate Friendly Caption", variant="primary")
464
+
465
+ out2 = gr.Textbox(lines=5, show_copy_button=True)
466
+
467
+ # Caption 3 - Erotic
468
+ gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>πŸ”₯ Erotic Caption</h4>")
469
+
470
+ system3 = gr.Textbox(
471
+ label="System Prompt",
472
+ lines=2,
473
+ value=DEFAULT_PROMPTS["erotic"]["system"],
474
+ placeholder="How should the AI behave?"
475
+ )
476
+
477
+ user3 = gr.Textbox(
478
+ label="User Prompt",
479
+ lines=2,
480
+ value=DEFAULT_PROMPTS["erotic"]["user"],
481
+ placeholder="What kind of explicit description do you want?"
482
+ )
483
+
484
+ with gr.Row():
485
+ key3_btn = gr.Button("key", size="sm")
486
+ que3_btn = gr.Button("que", size="sm")
487
+ use3_btn = gr.Button("use", size="sm")
488
+ not3_btn = gr.Button("not", size="sm")
489
+ gen3_btn = gr.Button("πŸ”₯ Generate Erotic Caption", variant="primary")
490
+
491
+ out3 = gr.Textbox(lines=5, show_copy_button=True)
492
+
493
+ # Export section
494
+ gr.HTML("<h4 style='margin: 20px 0 10px 0; color: #374151;'>πŸ“… Export</h4>")
495
+ export_btn = gr.Button("πŸ“… Export All Data", variant="secondary")
496
  export_out = gr.Textbox(visible=False)
497
  export_file = gr.File(visible=False)
498
+
499
+ # Connect generation buttons
500
  gen1_btn.click(generate_caption_1, [image_input, system1, user1], out1)
501
  gen2_btn.click(generate_caption_2, [image_input, system2, user2], out2)
502
  gen3_btn.click(generate_caption_3, [image_input, system3, user3], out3)
503
+ ask_btn.click(answer_question, [image_input, question_input], qa_output)
504
+
505
+ # Template insertion buttons for Caption 1
506
+ key1_btn.click(lambda s, u, k: insert_key(s, u, k), [system1, user1, keywords_input], [system1, user1])
507
+ que1_btn.click(lambda s, u, q: insert_que(s, u, q), [system1, user1, question_input], [system1, user1])
508
+ use1_btn.click(lambda s, u, c: insert_use(s, u, c), [system1, user1, custom_instruction_input], [system1, user1])
509
+ not1_btn.click(lambda s, u, a: insert_not(s, u, a), [system1, user1, avoid_input], [system1, user1])
510
+
511
+ # Template insertion buttons for Caption 2
512
+ key2_btn.click(lambda s, u, k: insert_key(s, u, k), [system2, user2, keywords_input], [system2, user2])
513
+ que2_btn.click(lambda s, u, q: insert_que(s, u, q), [system2, user2, question_input], [system2, user2])
514
+ use2_btn.click(lambda s, u, c: insert_use(s, u, c), [system2, user2, custom_instruction_input], [system2, user2])
515
+ not2_btn.click(lambda s, u, a: insert_not(s, u, a), [system2, user2, avoid_input], [system2, user2])
516
+
517
+ # Template insertion buttons for Caption 3
518
+ key3_btn.click(lambda s, u, k: insert_key(s, u, k), [system3, user3, keywords_input], [system3, user3])
519
+ que3_btn.click(lambda s, u, q: insert_que(s, u, q), [system3, user3, question_input], [system3, user3])
520
+ use3_btn.click(lambda s, u, c: insert_use(s, u, c), [system3, user3, custom_instruction_input], [system3, user3])
521
+ not3_btn.click(lambda s, u, a: insert_not(s, u, a), [system3, user3, avoid_input], [system3, user3])
522
+
523
+ # Export functionality
524
  def handle_export(k, c, a, q, c1, c2, c3, qa, img):
525
+ msg, fd = export_joycaption_data(k, c, a, q, c1, c2, c3, qa, img)
526
+ if fd:
527
+ js, fn = fd
528
+ p = os.path.join(tempfile.gettempdir(), fn)
529
+ with open(p, "w", encoding="utf-8") as f:
530
+ f.write(js)
531
+ return gr.update(value=msg, visible=True), gr.update(value=p, visible=True)
532
+ return gr.update(value=msg, visible=True), gr.update(visible=False)
533
+
 
 
 
 
 
 
 
 
 
 
 
534
  export_btn.click(handle_export, [keywords_input, custom_instruction_input, avoid_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
535
+
536
+ # Instructions
537
+ gr.HTML("<hr>")
538
+ gr.Markdown("""
539
+ ## **πŸŽ›οΈ Advanced Prompting System Guide**
540
+
541
+ ### **πŸ“ How to Use:**
542
+ 1. **Upload image** and fill in the input fields (Keywords, Custom Instruction, Avoid, Question)
543
+ 2. **Edit prompts** in the System/User prompt textboxes for each caption type
544
+ 3. **Use template buttons** to insert formatted text:
545
+ - **`key`** β†’ "Pay attention to these keywords: [your keywords]"
546
+ - **`use`** β†’ "Make sure that you mention: [your custom instruction]"
547
+ - **`not`** β†’ "Do NOT mention: [things to avoid]"
548
+ - **`que`** β†’ "Answer this question: [your question]"
549
+ 4. **Generate captions** with fully customized prompts
550
+ 5. **Export all results** as JSON
551
+
552
+ ### **✨ Features:**
553
+ - 🎨 **3 Caption Tones**: Casual, Friendly, Erotic with custom defaults
554
+ - πŸŽ›οΈ **Full Prompt Control**: Edit both system and user prompts
555
+ - πŸ”§ **Template Helpers**: One-click insertion of formatted instructions
556
+ - 🚫 **Duplicate Prevention**: Each template can only be added once per field
557
+ - πŸ“₯ **Complete Export**: All prompts, inputs, and outputs saved
558
+
559
+ **Pro Tip**: Start with the default prompts and enhance them using the template buttons!
560
+ """)
561
 
562
  if __name__ == "__main__":
563
+ demo.launch()