Spaces:

nickdigger
/

joy-caption-enhanced

Running on Zero

App Files Files Community

nickdigger commited on Oct 24, 2025

Commit

d0aa398

verified ·

1 Parent(s): c31d3ec

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -286

app.py CHANGED Viewed

@@ -18,22 +18,17 @@ import torch
 from transformers import LlavaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import tempfile, gc, os, shutil, json, time, re
-from pathlib import Path
-from datetime import datetime
-from typing import Optional
 from urllib.parse import urlparse
 # ===== BUILT-IN UTILITY FUNCTIONS =====
 def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
-    """Convert local image paths to URLs for export"""
     if not raw_url_or_path:
         return raw_url_or_path
     try:
         parsed = urlparse(raw_url_or_path)
     except Exception:
         parsed = None
     if parsed and parsed.scheme and parsed.netloc:
         full = raw_url_or_path
         if "/file=" in full and "/gradio_api/file=" not in full:
@@ -41,7 +36,6 @@ def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
         if "file=" in full and "/gradio_api/file=" not in full and "/gradio_api" not in full:
             full = full.replace("file=", "gradio_api/file=")
         return full
     if raw_url_or_path.startswith("/tmp/") or raw_url_or_path.startswith("tmp/"):
         if not host:
             return raw_url_or_path
@@ -52,18 +46,12 @@ def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
         if p.startswith("/"):
             p = p[1:]
         return f"{host}/gradio_api/file=/{p}"
     return raw_url_or_path
 def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
-    """Minimal caption post-processing - just basic cleanup"""
     if not caption or not isinstance(caption, str):
         return caption or ""
-    # Only remove leading "a photo of" phrases
     result = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', caption.strip(), flags=re.IGNORECASE)
-    # Only truncate if extremely long
     if max_chars and len(result) > max_chars:
         truncate_point = max_chars
         for i in range(len(result) - 1, max(0, max_chars - 100), -1):
@@ -71,27 +59,22 @@ def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
                 truncate_point = i + 1
                 break
         result = result[:truncate_point].strip()
     if result and not result.endswith(('.', '!', '?')):
-        result = result + "."
     return result
-# ===== CACHE CLEARING =====
 def force_clear_all_caches():
-    """Force clear all possible caches"""
     try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
         gc.collect()
-        print("🧹 All caches cleared!")
     except Exception as e:
         print(f"⚠️ Cache clear warning: {e}")
 force_clear_all_caches()
-# ===== Setup =====
 _tmpdir = tempfile.gettempdir()
 os.environ["HF_HOME"] = os.path.join(_tmpdir, "hf_cache")
 os.environ["TRANSFORMERS_CACHE"] = os.path.join(_tmpdir, "transformers_cache")
@@ -148,26 +131,20 @@ DEFAULT_PROMPTS = {
 }
 def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=1200):
-    """Generate caption using custom prompts"""
     try:
         if image is None:
             return "❌ No image provided"
         if not system_prompt.strip() or not user_prompt.strip():
             return "❌ Both system and user prompts are required"
         torch.cuda.empty_cache()
         gc.collect()
         convo = [
             {"role": "system", "content": system_prompt.strip()},
             {"role": "user", "content": user_prompt.strip()}
         ]
         convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
         inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
         inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
         with torch.no_grad():
             output = model.generate(
                 **inputs,
@@ -180,167 +157,90 @@ def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=12
                 pad_token_id=processor.tokenizer.eos_token_id,
                 eos_token_id=processor.tokenizer.eos_token_id
             )
-        if output is None or len(output) == 0:
             return "❌ No output generated"
-        if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
-            input_length = inputs['input_ids'].shape[1]
-            if len(output[0]) > input_length:
-                generate_ids = output[0][input_length:]
-                result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
-            else:
-                result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
-        else:
-            result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
-        result = result.strip()
         del inputs, output
         torch.cuda.empty_cache()
         gc.collect()
-        final_result = postprocess_caption(result, max_chars=max_chars)
-        return final_result if final_result else "❌ Empty result"
     except Exception as e:
         torch.cuda.empty_cache()
         gc.collect()
         return f"❌ Error: {str(e)[:200]}"
-# Individual caption generation functions
 @spaces.GPU(duration=60)
 @torch.no_grad()
 def generate_caption_1(image, system1, user1):
-    if not image:
-        return "❌ Upload image first"
     return safe_generate_caption_direct(image, system1, user1)
 @spaces.GPU(duration=60)
 @torch.no_grad()
 def generate_caption_2(image, system2, user2):
-    if not image:
-        return "❌ Upload image first"
     return safe_generate_caption_direct(image, system2, user2)
 @spaces.GPU(duration=60)
 @torch.no_grad()
 def generate_caption_3(image, system3, user3):
-    if not image:
-        return "❌ Upload image first"
     return safe_generate_caption_direct(image, system3, user3)
 @spaces.GPU(duration=40)
 @torch.no_grad()
 def answer_question(image, question):
-    """Q&A function"""
-    if not image:
-        return "❌ Upload image first"
-    if not question or not question.strip():
-        return "❌ Please ask a question"
     try:
         torch.cuda.empty_cache()
         gc.collect()
-        convo = [
-            {"role": "system", "content": "You are a helpful image captioner."},
-            {"role": "user", "content": question.strip()}
-        ]
         convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
         inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
         inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
-        with torch.no_grad():
-            output = model.generate(
-                **inputs,
-                max_new_tokens=300,
-                do_sample=True,
-                temperature=0.6,
-                top_p=0.9,
-                top_k=None,
-                use_cache=True,
-                pad_token_id=processor.tokenizer.eos_token_id,
-                eos_token_id=processor.tokenizer.eos_token_id
-            )
-        if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
-            input_length = inputs['input_ids'].shape[1]
-            if len(output[0]) > input_length:
-                generate_ids = output[0][input_length:]
-                result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
-            else:
-                result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
-        else:
-            result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
-        result = result.strip()
         del inputs, output
         torch.cuda.empty_cache()
         gc.collect()
-        final_result = postprocess_caption(result, max_chars=500)
-        return final_result if final_result else "❌ No answer generated"
     except Exception as e:
         torch.cuda.empty_cache()
         gc.collect()
         return f"❌ Q&A Error: {str(e)[:200]}"
-# Helper functions for template insertion
 def insert_template(current_text, template_text, field_content):
-    """Insert template at the end of current text if not already present"""
-    if not field_content.strip():
-        return current_text
     formatted_template = template_text.format(content=field_content.strip())
-    # Check if this template is already in the text (prevent duplicates)
-    if formatted_template in current_text:
-        return current_text
-    # Add template at the end with proper spacing
-    if current_text.strip():
-        return current_text.rstrip() + " " + formatted_template
-    else:
-        return formatted_template
 def create_template_functions():
-    """Create template insertion functions for each button type"""
     def insert_key(system_text, user_text, keywords_content):
         template = "Pay attention to these keywords: {content}."
-        return (
-            insert_template(system_text, template, keywords_content),
-            insert_template(user_text, template, keywords_content)
-        )
     def insert_que(system_text, user_text, question_content):
         template = "Answer this question: {content}."
-        return (
-            insert_template(system_text, template, question_content),
-            insert_template(user_text, template, question_content)
-        )
     def insert_use(system_text, user_text, custom_content):
         template = "Make sure that you mention: {content}."
-        return (
-            insert_template(system_text, template, custom_content),
-            insert_template(user_text, template, custom_content)
-        )
     def insert_not(system_text, user_text, avoid_content):
         template = "Do NOT mention: {content}."
-        return (
-            insert_template(system_text, template, avoid_content),
-            insert_template(user_text, template, avoid_content)
-        )
     return insert_key, insert_que, insert_use, insert_not
-# Export function
 def export_joycaption_data(keywords, custom_instructions, avoid, question, cap1, cap2, cap3, qa_answer, image_path=""):
     try:
         data = {"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "source":"JoyCaption","data":{}}
@@ -356,184 +256,84 @@ def export_joycaption_data(keywords, custom_instructions, avoid, question, cap1,
         if cap2.strip(): data["data"]["caption_friendly"]=cap2.strip()
         if cap3.strip(): data["data"]["caption_erotic"]=cap3.strip()
         if qa_answer.strip(): data["data"]["qa_answer"]=qa_answer.strip()
-        if not data["data"]:
-            return "❌ No data to export", None
         js = json.dumps(data, indent=2, ensure_ascii=False)
         fn = f"joycaption_{time.strftime('%Y%m%d_%H%M%S')}.json"
         return f"✅ Exported {len(data['data'])} fields", (js, fn)
     except Exception as e:
         return f"❌ Export failed: {str(e)}", None
-# Create the Gradio interface
 with gr.Blocks(title="JoyCaption Advanced Prompting System", theme=gr.themes.Soft()) as demo:
     gr.HTML(TITLE)
-    # Get template functions
     insert_key, insert_que, insert_use, insert_not = create_template_functions()
     with gr.Row():
-        # Left Column - Input Fields
         with gr.Column(scale=1):
-            image_input = gr.Image(type="pil", label="📸 Upload Image", height=400)
-            keywords_input = gr.Textbox(
-                label="🏷️ Keywords",
-                lines=2,
-                placeholder="Enter keywords (available as 'key' template)",
-                info="Use 'key' button to insert into prompts"
-            )
-            custom_instruction_input = gr.Textbox(
-                label="🎯 Custom Instruction",
-                lines=2,
-                placeholder="Enter custom instructions (available as 'use' template)",
-                info="Use 'use' button to insert into prompts"
-            )
-            avoid_input = gr.Textbox(
-                label="🚫 Avoid",
-                lines=2,
-                placeholder="Things to avoid mentioning (available as 'not' template)",
-                info="Use 'not' button to insert into prompts"
-            )
-            question_input = gr.Textbox(
-                label="❓ Question",
-                lines=2,
-                placeholder="Ask a question about the image (available as 'que' template)",
-                info="Use 'que' button to insert into prompts"
-            )
-            ask_btn = gr.Button("❓ Ask Question", variant="secondary")
-            qa_output = gr.Textbox(label="Q&A Answer", lines=4, show_copy_button=True)
-        # Right Column - Caption Generation
-        with gr.Column(scale=1):
-            # Caption 1 - Casual
-            gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>📝 Casual Caption</h4>")
-            system1 = gr.Textbox(
-                label="System Prompt",
-                lines=2,
-                value=DEFAULT_PROMPTS["casual"]["system"],
-                placeholder="How should the AI behave?"
-            )
-            user1 = gr.Textbox(
-                label="User Prompt",
-                lines=2,
-                value=DEFAULT_PROMPTS["casual"]["user"],
-                placeholder="What should the AI do with this image?"
-            )
-            with gr.Row():
-                key1_btn = gr.Button("key", size="sm")
-                que1_btn = gr.Button("que", size="sm")
-                use1_btn = gr.Button("use", size="sm")
-                not1_btn = gr.Button("not", size="sm")
-                gen1_btn = gr.Button("📝 Generate Casual Caption", variant="primary")
-            out1 = gr.Textbox(lines=5, show_copy_button=True)
-            # Caption 2 - Friendly
-            gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>🤝 Friendly Caption</h4>")
-            system2 = gr.Textbox(
-                label="System Prompt",
-                lines=2,
-                value=DEFAULT_PROMPTS["friendly"]["system"],
-                placeholder="How should the AI behave?"
-            )
-            user2 = gr.Textbox(
-                label="User Prompt",
-                lines=2,
-                value=DEFAULT_PROMPTS["friendly"]["user"],
-                placeholder="What kind of description do you want?"
-            )
-            with gr.Row():
-                key2_btn = gr.Button("key", size="sm")
-                que2_btn = gr.Button("que", size="sm")
-                use2_btn = gr.Button("use", size="sm")
-                not2_btn = gr.Button("not", size="sm")
-                gen2_btn = gr.Button("🤝 Generate Friendly Caption", variant="primary")
-            out2 = gr.Textbox(lines=5, show_copy_button=True)
-            # Caption 3 - Erotic
-            gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>🔥 Erotic Caption</h4>")
-            system3 = gr.Textbox(
-                label="System Prompt",
-                lines=2,
-                value=DEFAULT_PROMPTS["erotic"]["system"],
-                placeholder="How should the AI behave?"
-            )
-            user3 = gr.Textbox(
-                label="User Prompt",
-                lines=2,
-                value=DEFAULT_PROMPTS["erotic"]["user"],
-                placeholder="What kind of explicit description do you want?"
-            )
             with gr.Row():
-                key3_btn = gr.Button("key", size="sm")
-                que3_btn = gr.Button("que", size="sm")
-                use3_btn = gr.Button("use", size="sm")
-                not3_btn = gr.Button("not", size="sm")
-                gen3_btn = gr.Button("🔥 Generate Erotic Caption", variant="primary")
-            out3 = gr.Textbox(lines=5, show_copy_button=True)
-            # Export section
-            gr.HTML("<h4 style='margin: 20px 0 10px 0; color: #374151;'>📅 Export</h4>")
-            export_btn = gr.Button("📅 Export All Data", variant="secondary")
             export_out = gr.Textbox(visible=False)
             export_file = gr.File(visible=False)
-    # Connect generation buttons
     gen1_btn.click(generate_caption_1, [image_input, system1, user1], out1)
     gen2_btn.click(generate_caption_2, [image_input, system2, user2], out2)
     gen3_btn.click(generate_caption_3, [image_input, system3, user3], out3)
     ask_btn.click(answer_question, [image_input, question_input], qa_output)
-    # Template insertion buttons for Caption 1
-    key1_btn.click(lambda s, u, k: insert_key(s, u, k), [system1, user1, keywords_input], [system1, user1])
-    que1_btn.click(lambda s, u, q: insert_que(s, u, q), [system1, user1, question_input], [system1, user1])
-    use1_btn.click(lambda s, u, c: insert_use(s, u, c), [system1, user1, custom_instruction_input], [system1, user1])
-    not1_btn.click(lambda s, u, a: insert_not(s, u, a), [system1, user1, avoid_input], [system1, user1])
-    # Template insertion buttons for Caption 2
-    key2_btn.click(lambda s, u, k: insert_key(s, u, k), [system2, user2, keywords_input], [system2, user2])
-    que2_btn.click(lambda s, u, q: insert_que(s, u, q), [system2, user2, question_input], [system2, user2])
-    use2_btn.click(lambda s, u, c: insert_use(s, u, c), [system2, user2, custom_instruction_input], [system2, user2])
-    not2_btn.click(lambda s, u, a: insert_not(s, u, a), [system2, user2, avoid_input], [system2, user2])
-    # Template insertion buttons for Caption 3
-    key3_btn.click(lambda s, u, k: insert_key(s, u, k), [system3, user3, keywords_input], [system3, user3])
-    que3_btn.click(lambda s, u, q: insert_que(s, u, q), [system3, user3, question_input], [system3, user3])
-    use3_btn.click(lambda s, u, c: insert_use(s, u, c), [system3, user3, custom_instruction_input], [system3, user3])
-    not3_btn.click(lambda s, u, a: insert_not(s, u, a), [system3, user3, avoid_input], [system3, user3])
-    # Export functionality
     def handle_export(k, c, a, q, c1, c2, c3, qa, img):
         msg, fd = export_joycaption_data(k, c, a, q, c1, c2, c3, qa, img)
         if fd:
             js, fn = fd
             p = os.path.join(tempfile.gettempdir(), fn)
-            with open(p, "w", encoding="utf-8") as f:
-                f.write(js)
             return gr.update(value=msg, visible=True), gr.update(value=p, visible=True)
         return gr.update(value=msg, visible=True), gr.update(visible=False)
-    export_btn.click(handle_export, [keywords_input, custom_instruction_input, avoid_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
-    # Simple instructions
-    gr.HTML("<hr><h3>Instructions</h3><p>Upload an image, customize the prompts, use template buttons (key/use/not/que) to add formatted text, then generate captions.</p>")
 if __name__ == "__main__":
-    demo.launch()

 from transformers import LlavaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import tempfile, gc, os, shutil, json, time, re
 from urllib.parse import urlparse
+from typing import Optional
 # ===== BUILT-IN UTILITY FUNCTIONS =====
 def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
     if not raw_url_or_path:
         return raw_url_or_path
     try:
         parsed = urlparse(raw_url_or_path)
     except Exception:
         parsed = None
     if parsed and parsed.scheme and parsed.netloc:
         full = raw_url_or_path
         if "/file=" in full and "/gradio_api/file=" not in full:
         if "file=" in full and "/gradio_api/file=" not in full and "/gradio_api" not in full:
             full = full.replace("file=", "gradio_api/file=")
         return full
     if raw_url_or_path.startswith("/tmp/") or raw_url_or_path.startswith("tmp/"):
         if not host:
             return raw_url_or_path
         if p.startswith("/"):
             p = p[1:]
         return f"{host}/gradio_api/file=/{p}"
     return raw_url_or_path
 def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
     if not caption or not isinstance(caption, str):
         return caption or ""
     result = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', caption.strip(), flags=re.IGNORECASE)
     if max_chars and len(result) > max_chars:
         truncate_point = max_chars
         for i in range(len(result) - 1, max(0, max_chars - 100), -1):
                 truncate_point = i + 1
                 break
         result = result[:truncate_point].strip()
     if result and not result.endswith(('.', '!', '?')):
+        result += "."
     return result
 def force_clear_all_caches():
     try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
         gc.collect()
     except Exception as e:
         print(f"⚠️ Cache clear warning: {e}")
 force_clear_all_caches()
+# ===== SETUP =====
 _tmpdir = tempfile.gettempdir()
 os.environ["HF_HOME"] = os.path.join(_tmpdir, "hf_cache")
 os.environ["TRANSFORMERS_CACHE"] = os.path.join(_tmpdir, "transformers_cache")
 }
 def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=1200):
     try:
         if image is None:
             return "❌ No image provided"
         if not system_prompt.strip() or not user_prompt.strip():
             return "❌ Both system and user prompts are required"
         torch.cuda.empty_cache()
         gc.collect()
         convo = [
             {"role": "system", "content": system_prompt.strip()},
             {"role": "user", "content": user_prompt.strip()}
         ]
         convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
         inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
         inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
         with torch.no_grad():
             output = model.generate(
                 **inputs,
                 pad_token_id=processor.tokenizer.eos_token_id,
                 eos_token_id=processor.tokenizer.eos_token_id
             )
+        if not output or len(output) == 0:
             return "❌ No output generated"
+        input_length = inputs['input_ids'].shape[1]
+        result = processor.tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
         del inputs, output
         torch.cuda.empty_cache()
         gc.collect()
+        return postprocess_caption(result, max_chars=max_chars) or "❌ Empty result"
     except Exception as e:
         torch.cuda.empty_cache()
         gc.collect()
         return f"❌ Error: {str(e)[:200]}"
+# ===== GENERATION FUNCTIONS =====
 @spaces.GPU(duration=60)
 @torch.no_grad()
 def generate_caption_1(image, system1, user1):
+    if not image: return "❌ Upload image first"
     return safe_generate_caption_direct(image, system1, user1)
 @spaces.GPU(duration=60)
 @torch.no_grad()
 def generate_caption_2(image, system2, user2):
+    if not image: return "❌ Upload image first"
     return safe_generate_caption_direct(image, system2, user2)
 @spaces.GPU(duration=60)
 @torch.no_grad()
 def generate_caption_3(image, system3, user3):
+    if not image: return "❌ Upload image first"
     return safe_generate_caption_direct(image, system3, user3)
 @spaces.GPU(duration=40)
 @torch.no_grad()
 def answer_question(image, question):
+    if not image: return "❌ Upload image first"
+    if not question.strip(): return "❌ Please ask a question"
     try:
         torch.cuda.empty_cache()
         gc.collect()
+        convo = [{"role": "system", "content": "You are a helpful image captioner."},
+                 {"role": "user", "content": question.strip()}]
         convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
         inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
         inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
+        output = model.generate(**inputs, max_new_tokens=300, do_sample=True, temperature=0.6, top_p=0.9)
+        input_length = inputs['input_ids'].shape[1]
+        result = processor.tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
         del inputs, output
         torch.cuda.empty_cache()
         gc.collect()
+        return postprocess_caption(result, max_chars=500) or "❌ No answer generated"
     except Exception as e:
         torch.cuda.empty_cache()
         gc.collect()
         return f"❌ Q&A Error: {str(e)[:200]}"
+# ===== TEMPLATE FUNCTIONS =====
 def insert_template(current_text, template_text, field_content):
+    if not field_content.strip(): return current_text
     formatted_template = template_text.format(content=field_content.strip())
+    if formatted_template in current_text: return current_text
+    return (current_text.rstrip() + " " + formatted_template).strip()
 def create_template_functions():
     def insert_key(system_text, user_text, keywords_content):
         template = "Pay attention to these keywords: {content}."
+        return (insert_template(system_text, template, keywords_content),
+                insert_template(user_text, template, keywords_content))
     def insert_que(system_text, user_text, question_content):
         template = "Answer this question: {content}."
+        return (insert_template(system_text, template, question_content),
+                insert_template(user_text, template, question_content))
     def insert_use(system_text, user_text, custom_content):
         template = "Make sure that you mention: {content}."
+        return (insert_template(system_text, template, custom_content),
+                insert_template(user_text, template, custom_content))
     def insert_not(system_text, user_text, avoid_content):
         template = "Do NOT mention: {content}."
+        return (insert_template(system_text, template, avoid_content),
+                insert_template(user_text, template, avoid_content))
     return insert_key, insert_que, insert_use, insert_not
+# ===== EXPORT =====
 def export_joycaption_data(keywords, custom_instructions, avoid, question, cap1, cap2, cap3, qa_answer, image_path=""):
     try:
         data = {"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "source":"JoyCaption","data":{}}
         if cap2.strip(): data["data"]["caption_friendly"]=cap2.strip()
         if cap3.strip(): data["data"]["caption_erotic"]=cap3.strip()
         if qa_answer.strip(): data["data"]["qa_answer"]=qa_answer.strip()
+        if not data["data"]: return "❌ No data to export", None
         js = json.dumps(data, indent=2, ensure_ascii=False)
         fn = f"joycaption_{time.strftime('%Y%m%d_%H%M%S')}.json"
         return f"✅ Exported {len(data['data'])} fields", (js, fn)
     except Exception as e:
         return f"❌ Export failed: {str(e)}", None
+# ===== GRADIO UI =====
 with gr.Blocks(title="JoyCaption Advanced Prompting System", theme=gr.themes.Soft()) as demo:
     gr.HTML(TITLE)
     insert_key, insert_que, insert_use, insert_not = create_template_functions()
     with gr.Row():
         with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="📸 Image", height=400)
+            keywords_input = gr.Textbox(label="🏷️ Keywords", lines=2, placeholder="e.g. beach, sunset")
+            custom_instruction_input = gr.Textbox(label="🎯 Custom", lines=2, placeholder="Add extra instructions")
+            avoid_input = gr.Textbox(label="🚫 Avoid", lines=2, placeholder="What to avoid")
+            question_input = gr.Textbox(label="❓ Question", lines=2, placeholder="Ask about image")
+            ask_btn = gr.Button("Ask", variant="secondary")
+            qa_output = gr.Textbox(label="Answer", lines=3, show_copy_button=True)
+            gr.Markdown("---")
+            gr.Markdown("**Insert Template**")
             with gr.Row():
+                key_btn = gr.Button("key", size="sm")
+                que_btn = gr.Button("que", size="sm")
+                use_btn = gr.Button("use", size="sm")
+                not_btn = gr.Button("not", size="sm")
+        with gr.Column(scale=1):
+            with gr.Tab("📝 Casual"):
+                system1 = gr.Textbox(label="System", lines=2, value=DEFAULT_PROMPTS["casual"]["system"])
+                user1 = gr.Textbox(label="User", lines=2, value=DEFAULT_PROMPTS["casual"]["user"])
+                gen1_btn = gr.Button("Generate Casual", variant="primary")
+                out1 = gr.Textbox(lines=5, show_copy_button=True)
+            with gr.Tab("🤝 Friendly"):
+                system2 = gr.Textbox(label="System", lines=2, value=DEFAULT_PROMPTS["friendly"]["system"])
+                user2 = gr.Textbox(label="User", lines=2, value=DEFAULT_PROMPTS["friendly"]["user"])
+                gen2_btn = gr.Button("Generate Friendly", variant="primary")
+                out2 = gr.Textbox(lines=5, show_copy_button=True)
+            with gr.Tab("🔥 Erotic"):
+                system3 = gr.Textbox(label="System", lines=2, value=DEFAULT_PROMPTS["erotic"]["system"])
+                user3 = gr.Textbox(label="User", lines=2, value=DEFAULT_PROMPTS["erotic"]["user"])
+                gen3_btn = gr.Button("Generate Erotic", variant="primary")
+                out3 = gr.Textbox(lines=5, show_copy_button=True)
+            gr.Markdown("---")
+            export_btn = gr.Button("📦 Export All", variant="secondary")
             export_out = gr.Textbox(visible=False)
             export_file = gr.File(visible=False)
+    # === Event Bindings ===
     gen1_btn.click(generate_caption_1, [image_input, system1, user1], out1)
     gen2_btn.click(generate_caption_2, [image_input, system2, user2], out2)
     gen3_btn.click(generate_caption_3, [image_input, system3, user3], out3)
     ask_btn.click(answer_question, [image_input, question_input], qa_output)
+    # Shared template bar
+    key_btn.click(lambda s1,u1,k: insert_key(s1,u1,k), [system1,user1,keywords_input], [system1,user1])
+    que_btn.click(lambda s1,u1,q: insert_que(s1,u1,q), [system1,user1,question_input], [system1,user1])
+    use_btn.click(lambda s1,u1,c: insert_use(s1,u1,c), [system1,user1,custom_instruction_input], [system1,user1])
+    not_btn.click(lambda s1,u1,a: insert_not(s1,u1,a), [system1,user1,avoid_input], [system1,user1])
     def handle_export(k, c, a, q, c1, c2, c3, qa, img):
         msg, fd = export_joycaption_data(k, c, a, q, c1, c2, c3, qa, img)
         if fd:
             js, fn = fd
             p = os.path.join(tempfile.gettempdir(), fn)
+            with open(p, "w", encoding="utf-8") as f: f.write(js)
             return gr.update(value=msg, visible=True), gr.update(value=p, visible=True)
         return gr.update(value=msg, visible=True), gr.update(visible=False)
+    export_btn.click(handle_export,
+        [keywords_input, custom_instruction_input, avoid_input, question_input,
+         out1, out2, out3, qa_output, image_input],
+        [export_out, export_file]
+    )
 if __name__ == "__main__":
+    demo.launch()