nickdigger commited on
Commit
989cc3a
Β·
verified Β·
1 Parent(s): 6fe2bf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -206
app.py CHANGED
@@ -15,18 +15,40 @@ except Exception:
15
 
16
  import gradio as gr
17
  import torch
18
- from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor
19
  from PIL import Image
20
  import tempfile, gc, os, shutil, json, time
21
  from pathlib import Path
22
- from threading import Thread
23
- from typing import Generator
24
  from hf_space_utils import fix_image_url, postprocess_caption
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # ===== Storage cleanup setup =====
27
  _tmpdir = tempfile.gettempdir()
28
  os.environ["HF_HOME"] = os.path.join(_tmpdir, "hf_cache")
29
- os.environ["TRANSFORMERS_CACHE"] = os.path.join(_tmpdir, "transformers_cache")
30
  os.environ["HF_DATASETS_CACHE"] = os.path.join(_tmpdir, "datasets_cache")
31
  os.environ["TORCH_HOME"] = os.path.join(_tmpdir, "torch_cache")
32
 
@@ -46,189 +68,200 @@ def cleanup_storage():
46
 
47
  TITLE = """
48
  <div style="text-align:center;margin:20px 0;">
49
- <h1>🎨 JoyCaption Three-Tone + Q&A (v4.0 ZeroGPU)</h1>
50
- <p><strong>βœ… Restored working prompts β€’ Natural model behavior β€’ Proven parameters</strong></p>
51
  </div><hr>
52
  """
53
 
54
- print("πŸš€ Loading JoyCaption model...")
55
- processor = AutoProcessor.from_pretrained(MODEL_PATH)
56
- model = LlavaForConditionalGeneration.from_pretrained(MODEL_PATH, torch_dtype="bfloat16", device_map=0)
 
 
 
 
 
 
 
 
 
57
  model.eval()
58
  cleanup_storage()
59
  print("βœ… Model loaded successfully!")
60
 
61
- # ===== WORKING CAPTION PROMPTS (from beta-one space) =====
62
- CAPTION_PROMPTS = {
63
- "engaging": "Write a descriptive caption for this image in a casual tone.",
64
- "erotic": "Write a detailed, explicit description of this scene. Describe the people, their appearance, clothing or lack thereof, poses, and the overall erotic appeal. Use direct adult language and be thorough.",
65
- "custom": "Write a detailed description for this image."
66
- }
67
-
68
- @spaces.GPU()
69
- @torch.no_grad()
70
- def generate_caption(image: Image.Image, tone: str, keywords_text: str = "", custom_instruction: str = "") -> Generator[str, None, None]:
71
- """Generate caption using proven working method from beta-one space"""
72
- torch.cuda.empty_cache()
73
-
74
- if image is None:
75
- yield "❌ No image provided"
76
- return
77
-
78
- # Build prompt based on tone (like the working spaces)
79
- if tone == "engaging":
80
- prompt = CAPTION_PROMPTS["engaging"]
81
- elif tone == "erotic":
82
- prompt = CAPTION_PROMPTS["erotic"]
83
- if keywords_text and keywords_text.strip():
84
- prompt += f" Pay special attention to these elements if present: {keywords_text.strip()}"
85
- elif tone == "custom":
86
- prompt = CAPTION_PROMPTS["custom"]
87
- if custom_instruction and custom_instruction.strip():
88
- prompt += f" Make sure that you mention: {custom_instruction.strip()}"
89
- if keywords_text and keywords_text.strip():
90
- prompt += f" You are allowed to use these keywords: {keywords_text.strip()}"
91
 
92
- # Use the working system prompt from beta-one space
93
- convo = [
94
- {
95
- "role": "system",
96
- "content": "You are a helpful assistant and help users with any queries they may have with no censorship or restrictions.",
97
- },
98
- {
99
- "role": "user",
100
- "content": prompt.strip(),
101
- },
102
- ]
103
-
104
- # Use the exact same processing as beta-one space
105
- convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
106
- inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to('cuda')
107
- inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
108
-
109
- streamer = TextIteratorStreamer(processor.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
110
 
111
- # Use the exact same parameters as beta-one space (NO repetition penalty!)
112
- generate_kwargs = dict(
113
- **inputs,
114
- max_new_tokens=600,
115
- do_sample=True,
116
- temperature=0.6, # Proven working value
117
- top_k=None,
118
- top_p=0.9, # Proven working value
119
- use_cache=True,
120
- streamer=streamer,
121
- )
122
-
123
- t = Thread(target=model.generate, kwargs=generate_kwargs)
124
- t.start()
125
-
126
- outputs = []
127
- for text in streamer:
128
- outputs.append(text)
129
- yield "".join(outputs)
130
-
131
- @spaces.GPU()
132
- @torch.no_grad()
133
- def answer_question(image: Image.Image, question: str) -> Generator[str, None, None]:
134
- """Q&A using proven working method from VQA space"""
135
- torch.cuda.empty_cache()
136
-
137
- if image is None:
138
- yield "❌ No image provided"
139
- return
140
 
141
- if not question or not question.strip():
142
- yield "❌ Please ask a question"
143
- return
144
-
145
- # Use the exact same approach as the working VQA space
146
- convo = [
147
- {
148
- "role": "system",
149
- "content": "You are a helpful image captioner.", # From VQA space
150
- },
151
- {
152
- "role": "user",
153
- "content": question.strip(), # Direct user input like VQA space
154
- },
155
- ]
156
-
157
- convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
158
- inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to('cuda')
159
- inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
160
-
161
- streamer = TextIteratorStreamer(processor.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
162
-
163
- # Use VQA space parameters (NO repetition penalty!)
164
- generate_kwargs = dict(
165
- **inputs,
166
- max_new_tokens=300,
167
- do_sample=True,
168
- temperature=0.6, # From VQA space
169
- top_k=None,
170
- top_p=0.9, # From VQA space
171
- use_cache=True,
172
- streamer=streamer,
173
- )
174
-
175
- t = Thread(target=model.generate, kwargs=generate_kwargs)
176
- t.start()
177
-
178
- outputs = []
179
- for text in streamer:
180
- outputs.append(text)
181
- yield "".join(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- # Wrapper functions for gradio (non-streaming for simplicity)
184
- def generate_engaging_caption(image, custom_instruction=""):
185
- if not image:
186
- return "❌ Upload image first"
187
-
188
- result = ""
189
- for chunk in generate_caption(image, "engaging", custom_instruction=custom_instruction):
190
- result = chunk
191
-
192
- # Apply the same postprocessing
193
- final_result = postprocess_caption(result, max_chars=1000)
194
- return final_result if final_result else "❌ No result generated"
195
 
196
- def generate_erotic_caption(image, keywords_text="", custom_instruction=""):
197
- if not image:
198
- return "❌ Upload image first"
199
-
200
- result = ""
201
- for chunk in generate_caption(image, "erotic", keywords_text=keywords_text, custom_instruction=custom_instruction):
202
- result = chunk
203
-
204
- final_result = postprocess_caption(result, max_chars=1000)
205
- return final_result if final_result else "❌ No result generated"
206
 
207
- def generate_custom_caption(image, keywords_text="", custom_instruction=""):
208
- if not image:
209
- return "❌ Upload image first"
210
-
211
- result = ""
212
- for chunk in generate_caption(image, "custom", keywords_text=keywords_text, custom_instruction=custom_instruction):
213
- result = chunk
214
-
215
- final_result = postprocess_caption(result, max_chars=1000)
216
- return final_result if final_result else "❌ No result generated"
217
 
218
- def ask_question(image, question):
 
 
 
219
  if not image:
220
  return "❌ Upload image first"
221
  if not question or not question.strip():
222
  return "❌ Please ask a question"
223
-
224
- result = ""
225
- for chunk in answer_question(image, question):
226
- result = chunk
227
-
228
- final_result = postprocess_caption(result, max_chars=400)
229
- return final_result if final_result else "❌ No answer generated"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- # ===== Export function =====
232
  def export_joycaption_data(keywords, custom_instructions, question, engaging_caption, erotic_caption, custom_caption, qa_answer, image_path=""):
233
  try:
234
  data = {"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "source":"JoyCaption","data":{}}
@@ -257,31 +290,26 @@ with gr.Blocks(title="JoyCaption Three-Tone + Q&A", theme=gr.themes.Soft()) as d
257
  with gr.Row():
258
  with gr.Column(scale=1):
259
  image_input = gr.Image(type="pil", label="πŸ“Έ Upload Image", height=400)
260
- keywords_input = gr.Textbox(label="🏷️ Keywords", lines=2, placeholder="Optional: Keywords for erotic/custom captions")
261
- custom_instruction_input = gr.Textbox(label="🎯 Custom Instruction", lines=2, placeholder="Optional: Custom instruction for third caption")
262
- question_input = gr.Textbox(label="❓ Ask Question", lines=2, placeholder="Ask anything about the image")
263
  ask_btn = gr.Button("❓ Ask", variant="secondary")
264
- qa_output = gr.Textbox(label="Q&A Answer", lines=4, show_copy_button=True)
265
-
266
  with gr.Column(scale=1):
267
- g1 = gr.Button("πŸ“ Casual Descriptive", variant="primary", size="lg")
268
- out1 = gr.Textbox(label="Casual Caption", lines=6, show_copy_button=True)
269
-
270
- g2 = gr.Button("πŸ”₯ Erotic", variant="secondary", size="lg")
271
- out2 = gr.Textbox(label="Erotic Caption", lines=6, show_copy_button=True)
272
-
273
- g3 = gr.Button("🎯 Custom Instruction", variant="secondary", size="lg")
274
- out3 = gr.Textbox(label="Custom Caption", lines=6, show_copy_button=True)
275
-
276
- export_btn = gr.Button("πŸ“₯ Export All Data", variant="secondary")
277
  export_out = gr.Textbox(visible=False)
278
  export_file = gr.File(visible=False)
279
 
280
- # Connect buttons
281
- g1.click(generate_engaging_caption, [image_input, custom_instruction_input], out1)
282
- g2.click(generate_erotic_caption, [image_input, keywords_input, custom_instruction_input], out2)
283
- g3.click(generate_custom_caption, [image_input, keywords_input, custom_instruction_input], out3)
284
- ask_btn.click(ask_question, [image_input, question_input], qa_output)
285
 
286
  def handle_export(k, c, q, e1, e2, e3, qa, img):
287
  msg, fd = export_joycaption_data(k,c,q,e1,e2,e3,qa,img)
@@ -293,26 +321,18 @@ with gr.Blocks(title="JoyCaption Three-Tone + Q&A", theme=gr.themes.Soft()) as d
293
  return gr.update(value=msg,visible=True), gr.update(visible=False)
294
 
295
  export_btn.click(handle_export, [keywords_input, custom_instruction_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
296
-
297
- gr.HTML("<hr><h2>πŸ“‹ Usage Instructions</h2>")
298
  gr.Markdown("""
299
- ### **How to Use:**
300
- 1. **πŸ“Έ Upload an image** in the left panel
301
- 2. **🎯 Optional**: Add keywords or custom instructions
302
- 3. **Click caption buttons** to generate different styles
303
- 4. **❓ Ask questions** about the image using natural language
304
- 5. **πŸ“₯ Export** all results as JSON
305
-
306
- ### **Caption Types:**
307
- - **πŸ“ Casual Descriptive**: Natural, conversational descriptions
308
- - **πŸ”₯ Erotic**: Explicit adult content descriptions (uses keywords)
309
- - **🎯 Custom Instruction**: Follows your specific instructions (uses both keywords and custom instruction)
310
 
311
- ### **✨ Key Improvements:**
312
- - Uses **proven working prompts** from original JoyCaption spaces
313
- - **Natural model behavior** without over-engineering
314
- - **No repetition penalties** that caused glitches
315
- - **Same parameters** as working reference spaces (temp 0.6, top-p 0.9)
 
316
  """)
317
 
318
  if __name__ == "__main__":
 
15
 
16
  import gradio as gr
17
  import torch
18
+ from transformers import LlavaForConditionalGeneration, AutoProcessor
19
  from PIL import Image
20
  import tempfile, gc, os, shutil, json, time
21
  from pathlib import Path
 
 
22
  from hf_space_utils import fix_image_url, postprocess_caption
23
 
24
+ # ===== AGGRESSIVE CACHE CLEARING =====
25
+ def force_clear_all_caches():
26
+ """Force clear all possible caches"""
27
+ try:
28
+ # Clear CUDA cache
29
+ if torch.cuda.is_available():
30
+ torch.cuda.empty_cache()
31
+ torch.cuda.synchronize()
32
+
33
+ # Clear Python cache
34
+ gc.collect()
35
+
36
+ # Clear transformers cache
37
+ from transformers.utils import TRANSFORMERS_CACHE
38
+ if os.path.exists(TRANSFORMERS_CACHE):
39
+ shutil.rmtree(TRANSFORMERS_CACHE, ignore_errors=True)
40
+
41
+ print("🧹 All caches cleared!")
42
+ except Exception as e:
43
+ print(f"⚠️ Cache clear warning: {e}")
44
+
45
+ # Force clear at startup
46
+ force_clear_all_caches()
47
+
48
  # ===== Storage cleanup setup =====
49
  _tmpdir = tempfile.gettempdir()
50
  os.environ["HF_HOME"] = os.path.join(_tmpdir, "hf_cache")
51
+ os.environ["TRANSFORMERS_CACHE"] = os.path.join(_tmpdir, "transformers_cache")
52
  os.environ["HF_DATASETS_CACHE"] = os.path.join(_tmpdir, "datasets_cache")
53
  os.environ["TORCH_HOME"] = os.path.join(_tmpdir, "torch_cache")
54
 
 
68
 
69
  TITLE = """
70
  <div style="text-align:center;margin:20px 0;">
71
+ <h1>🎨 JoyCaption Three-Tone + Q&A (v4.1 - Cache Cleared)</h1>
72
+ <p><strong>🧹 Force cleared all caches β€’ Fresh model load β€’ Reset state</strong></p>
73
  </div><hr>
74
  """
75
 
76
+ print("πŸš€ Loading JoyCaption model with fresh cache...")
77
+
78
+ # Force reload with no cache
79
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, cache_dir=None, force_download=False)
80
+ model = LlavaForConditionalGeneration.from_pretrained(
81
+ MODEL_PATH,
82
+ torch_dtype=torch.bfloat16,
83
+ device_map="auto",
84
+ cache_dir=None,
85
+ force_download=False,
86
+ low_cpu_mem_usage=True
87
+ )
88
  model.eval()
89
  cleanup_storage()
90
  print("βœ… Model loaded successfully!")
91
 
92
+ def apply_smart_corrections(text):
93
+ """Apply minimal corrections only"""
94
+ if not text or not isinstance(text, str):
95
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ # Only remove leading "a photo of" type phrases - nothing else
98
+ import re
99
+ text = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', text, flags=re.IGNORECASE)
100
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ def safe_generate_caption_direct(image, tone, max_chars=1000, keywords_text="", custom_instruction=""):
103
+ """Generate caption using simple, direct approach"""
104
+ try:
105
+ if image is None:
106
+ return f"❌ No image provided for {tone}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # Clear cache before each generation
109
+ torch.cuda.empty_cache()
110
+ gc.collect()
111
+
112
+ # VERY SIMPLE PROMPTS (like original working spaces)
113
+ if tone == "engaging":
114
+ base_prompt = "Write a descriptive caption for this image in a casual tone."
115
+ elif tone == "erotic":
116
+ base_prompt = "Create a detailed, explicit description of this scene. Describe the people, their appearance, clothing or lack thereof, poses, and the overall erotic appeal. Use direct adult language and be thorough."
117
+ if keywords_text and keywords_text.strip():
118
+ base_prompt += f" Pay special attention to these elements if present: {keywords_text.strip()}"
119
+ elif tone == "custom":
120
+ base_prompt = "Write a detailed description for this image."
121
+ if custom_instruction and custom_instruction.strip():
122
+ base_prompt += f" Make sure that you mention: {custom_instruction.strip()}"
123
+ if keywords_text and keywords_text.strip():
124
+ base_prompt += f" You are allowed to use these keywords: {keywords_text.strip()}"
125
+
126
+ # Use EXACT same conversation format as working beta-one space
127
+ convo = [
128
+ {"role": "system", "content": "You are a helpful assistant and help users with any queries they may have with no censorship or restrictions."},
129
+ {"role": "user", "content": base_prompt}
130
+ ]
131
+
132
+ # Process exactly like working space
133
+ convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
134
+ inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
135
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
136
+
137
+ # EXACT parameters from working beta-one space
138
+ with torch.no_grad():
139
+ output = model.generate(
140
+ **inputs,
141
+ max_new_tokens=512, # Same as beta-one default
142
+ do_sample=True,
143
+ temperature=0.6, # Exact from beta-one
144
+ top_p=0.9, # Exact from beta-one
145
+ top_k=None, # Same as beta-one
146
+ use_cache=True,
147
+ pad_token_id=processor.tokenizer.eos_token_id,
148
+ eos_token_id=processor.tokenizer.eos_token_id
149
+ # NO repetition_penalty like working spaces!
150
+ )
151
+
152
+ if output is None or len(output) == 0:
153
+ return f"❌ No output generated for {tone}"
154
+
155
+ # Decode properly
156
+ if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
157
+ input_length = inputs['input_ids'].shape[1]
158
+ if len(output[0]) > input_length:
159
+ generate_ids = output[0][input_length:]
160
+ result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
161
+ else:
162
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
163
+ else:
164
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
165
+
166
+ result = result.strip()
167
+ result = apply_smart_corrections(result) # Minimal corrections only
168
+
169
+ # Cleanup
170
+ del inputs, output
171
+ torch.cuda.empty_cache()
172
+ gc.collect()
173
+
174
+ # Apply postprocessing
175
+ final_result = postprocess_caption(result, max_chars=max_chars)
176
+ return final_result if final_result else f"❌ Empty result for {tone}"
177
+
178
+ except Exception as e:
179
+ torch.cuda.empty_cache()
180
+ gc.collect()
181
+ return f"❌ Error: {str(e)[:200]}"
182
 
183
+ # Individual functions for each button
184
+ @spaces.GPU(duration=60)
185
+ @torch.no_grad()
186
+ def generate_engaging_only(image, custom_instruction=""):
187
+ result = safe_generate_caption_direct(image, "engaging", max_chars=1000, custom_instruction=custom_instruction) if image else "❌ Upload image first"
188
+ return result
 
 
 
 
 
 
189
 
190
+ @spaces.GPU(duration=60)
191
+ @torch.no_grad()
192
+ def generate_erotic_only(image, keywords_text="", custom_instruction=""):
193
+ result = safe_generate_caption_direct(image, "erotic", max_chars=1000, keywords_text=keywords_text, custom_instruction=custom_instruction) if image else "❌ Upload image first"
194
+ return result
 
 
 
 
 
195
 
196
+ @spaces.GPU(duration=60)
197
+ @torch.no_grad()
198
+ def generate_custom_only(image, keywords_text="", custom_instruction=""):
199
+ result = safe_generate_caption_direct(image, "custom", max_chars=1000, keywords_text=keywords_text, custom_instruction=custom_instruction) if image else "❌ Upload image first"
200
+ return result
 
 
 
 
 
201
 
202
+ @spaces.GPU(duration=40)
203
+ @torch.no_grad()
204
+ def answer_question(image, question):
205
+ """Q&A with simple approach like VQA space"""
206
  if not image:
207
  return "❌ Upload image first"
208
  if not question or not question.strip():
209
  return "❌ Please ask a question"
210
+
211
+ try:
212
+ torch.cuda.empty_cache()
213
+ gc.collect()
214
+
215
+ # Simple Q&A like VQA space
216
+ convo = [
217
+ {"role": "system", "content": "You are a helpful image captioner."},
218
+ {"role": "user", "content": question.strip()}
219
+ ]
220
+
221
+ convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
222
+ inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
223
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
224
+
225
+ with torch.no_grad():
226
+ output = model.generate(
227
+ **inputs,
228
+ max_new_tokens=200,
229
+ do_sample=True,
230
+ temperature=0.6, # Same as VQA space
231
+ top_p=0.9, # Same as VQA space
232
+ top_k=None,
233
+ use_cache=True,
234
+ pad_token_id=processor.tokenizer.eos_token_id,
235
+ eos_token_id=processor.tokenizer.eos_token_id
236
+ )
237
+
238
+ # Decode result
239
+ if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
240
+ input_length = inputs['input_ids'].shape[1]
241
+ if len(output[0]) > input_length:
242
+ generate_ids = output[0][input_length:]
243
+ result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
244
+ else:
245
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
246
+ else:
247
+ result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
248
+
249
+ result = result.strip()
250
+
251
+ # Cleanup
252
+ del inputs, output
253
+ torch.cuda.empty_cache()
254
+ gc.collect()
255
+
256
+ final_result = postprocess_caption(result, max_chars=300)
257
+ return final_result if final_result else "❌ No answer generated"
258
+
259
+ except Exception as e:
260
+ torch.cuda.empty_cache()
261
+ gc.collect()
262
+ return f"❌ Q&A Error: {str(e)[:200]}"
263
 
264
+ # ===== Export =====
265
  def export_joycaption_data(keywords, custom_instructions, question, engaging_caption, erotic_caption, custom_caption, qa_answer, image_path=""):
266
  try:
267
  data = {"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "source":"JoyCaption","data":{}}
 
290
  with gr.Row():
291
  with gr.Column(scale=1):
292
  image_input = gr.Image(type="pil", label="πŸ“Έ Upload Image", height=400)
293
+ keywords_input = gr.Textbox(label="🏷️ Keywords", lines=2)
294
+ custom_instruction_input = gr.Textbox(label="🎯 Custom Instruction", lines=2)
295
+ question_input = gr.Textbox(label="❓ Ask Question", lines=2)
296
  ask_btn = gr.Button("❓ Ask", variant="secondary")
297
+ qa_output = gr.Textbox(label="Q&A", lines=4, show_copy_button=True)
 
298
  with gr.Column(scale=1):
299
+ g1 = gr.Button("πŸ“ Casual Descriptive", variant="primary")
300
+ out1 = gr.Textbox(lines=7, show_copy_button=True)
301
+ g2 = gr.Button("πŸ”₯ Erotic", variant="secondary")
302
+ out2 = gr.Textbox(lines=7, show_copy_button=True)
303
+ g3 = gr.Button("🎯 Custom Instruction", variant="secondary")
304
+ out3 = gr.Textbox(lines=7, show_copy_button=True)
305
+ export_btn = gr.Button("πŸ“₯ Export All Data")
 
 
 
306
  export_out = gr.Textbox(visible=False)
307
  export_file = gr.File(visible=False)
308
 
309
+ g1.click(generate_engaging_only, [image_input, custom_instruction_input], out1)
310
+ g2.click(generate_erotic_only, [image_input, keywords_input, custom_instruction_input], out2)
311
+ g3.click(generate_custom_only, [image_input, keywords_input, custom_instruction_input], out3)
312
+ ask_btn.click(answer_question, [image_input, question_input], qa_output)
 
313
 
314
  def handle_export(k, c, q, e1, e2, e3, qa, img):
315
  msg, fd = export_joycaption_data(k,c,q,e1,e2,e3,qa,img)
 
321
  return gr.update(value=msg,visible=True), gr.update(visible=False)
322
 
323
  export_btn.click(handle_export, [keywords_input, custom_instruction_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
324
+
325
+ gr.HTML("<hr>")
326
  gr.Markdown("""
327
+ ### **🧹 Cache Cleared Version**
328
+ This version aggressively clears all caches and forces fresh model loading to eliminate any persistent issues from previous versions.
 
 
 
 
 
 
 
 
 
329
 
330
+ **What's different:**
331
+ - 🧹 All caches cleared at startup
332
+ - πŸ”„ Fresh model load with no cached weights
333
+ - πŸ’Ύ Cache cleared before each generation
334
+ - 🎯 Exact parameters from working reference spaces
335
+ - πŸ“ Max tokens: 512 (same as beta-one space default)
336
  """)
337
 
338
  if __name__ == "__main__":