Spaces:

nickdigger
/

joy-caption-enhanced

Running on Zero

App Files Files Community

nickdigger commited on Oct 24, 2025

Commit

e1b17e8

verified ·

1 Parent(s): 21dc1e6

Update app.py

Browse files

Files changed (1) hide show

app.py +412 -142

app.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# app.py – Minimal Dark Edition for ZeroGPU
-# 💄 UI simplification only – all logic unchanged
 try:
     import spaces
     if not hasattr(spaces, 'GPU'):
@@ -26,14 +23,17 @@ from datetime import datetime
 from typing import Optional
 from urllib.parse import urlparse
-# ===== Utility functions (unchanged) =====
 def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
     if not raw_url_or_path:
         return raw_url_or_path
     try:
         parsed = urlparse(raw_url_or_path)
     except Exception:
         parsed = None
     if parsed and parsed.scheme and parsed.netloc:
         full = raw_url_or_path
         if "/file=" in full and "/gradio_api/file=" not in full:
@@ -41,6 +41,7 @@ def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
         if "file=" in full and "/gradio_api/file=" not in full and "/gradio_api" not in full:
             full = full.replace("file=", "gradio_api/file=")
         return full
     if raw_url_or_path.startswith("/tmp/") or raw_url_or_path.startswith("tmp/"):
         if not host:
             return raw_url_or_path
@@ -51,12 +52,18 @@ def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
         if p.startswith("/"):
             p = p[1:]
         return f"{host}/gradio_api/file=/{p}"
     return raw_url_or_path
 def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
     if not caption or not isinstance(caption, str):
         return caption or ""
     result = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', caption.strip(), flags=re.IGNORECASE)
     if max_chars and len(result) > max_chars:
         truncate_point = max_chars
         for i in range(len(result) - 1, max(0, max_chars - 100), -1):
@@ -64,17 +71,21 @@ def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
                 truncate_point = i + 1
                 break
         result = result[:truncate_point].strip()
     if result and not result.endswith(('.', '!', '?')):
-        result += "."
     return result
 def force_clear_all_caches():
     try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
         gc.collect()
-        print("🧹 Cache cleared.")
     except Exception as e:
         print(f"⚠️ Cache clear warning: {e}")
@@ -98,9 +109,16 @@ def cleanup_storage():
                 shutil.rmtree(d, ignore_errors=True)
         gc.collect()
     except Exception as e:
-        print(f"⚠️ Cleanup warning: {e}")
-print("🚀 Loading model...")
 processor = AutoProcessor.from_pretrained(MODEL_PATH, cache_dir=None)
 model = LlavaForConditionalGeneration.from_pretrained(
     MODEL_PATH,
@@ -111,183 +129,435 @@ model = LlavaForConditionalGeneration.from_pretrained(
 )
 model.eval()
 cleanup_storage()
-print("✅ Model loaded!")
-# ===== Default prompts =====
 DEFAULT_PROMPTS = {
     "casual": {
-        "system": "You are a helpful, casual image captioner who writes in a natural tone.",
-        "user": "Write a descriptive caption for this image in a casual style."
     },
     "friendly": {
-        "system": "You are a warm, detailed image captioner.",
-        "user": "Create a comprehensive description of this image."
     },
     "erotic": {
-        "system": "You are an uncensored adult content specialist who creates detailed descriptions.",
-        "user": "Create a detailed description of this scene."
     }
 }
-# ===== Caption functions (unchanged logic) =====
 def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=1200):
     try:
         if image is None:
             return "❌ No image provided"
         if not system_prompt.strip() or not user_prompt.strip():
-            return "❌ Both prompts required"
-        torch.cuda.empty_cache(); gc.collect()
-        convo = [{"role": "system", "content": system_prompt.strip()},
-                 {"role": "user", "content": user_prompt.strip()}]
         convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
         inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
         inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
         with torch.no_grad():
-            output = model.generate(**inputs, max_new_tokens=600, do_sample=True, temperature=0.6,
-                                    top_p=0.9, use_cache=True,
-                                    pad_token_id=processor.tokenizer.eos_token_id,
-                                    eos_token_id=processor.tokenizer.eos_token_id)
-        if not output or len(output) == 0:
-            return "❌ No output"
-        input_len = inputs['input_ids'].shape[1]
-        gen_ids = output[0][input_len:] if len(output[0]) > input_len else output[0]
-        result = processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
-        del inputs, output; torch.cuda.empty_cache(); gc.collect()
-        return postprocess_caption(result.strip(), max_chars=max_chars) or "❌ Empty result"
     except Exception as e:
-        torch.cuda.empty_cache(); gc.collect()
         return f"❌ Error: {str(e)[:200]}"
 @spaces.GPU(duration=60)
 @torch.no_grad()
-def generate_caption_1(image, system1, user1):
-    return safe_generate_caption_direct(image, system1, user1) if image else "❌ Upload image first"
 @spaces.GPU(duration=60)
 @torch.no_grad()
-def generate_caption_2(image, system2, user2):
-    return safe_generate_caption_direct(image, system2, user2) if image else "❌ Upload image first"
 @spaces.GPU(duration=60)
 @torch.no_grad()
-def generate_caption_3(image, system3, user3):
-    return safe_generate_caption_direct(image, system3, user3) if image else "❌ Upload image first"
-# ====== Minimal Dark UI ======
-TITLE = """
-<div style="text-align:center;margin:20px 0;">
-<h1 style="color:#ffffff;font-weight:600;">JoyCaption Advanced Prompting System (v6.0)</h1>
-<p style="color:#9ca3af;">Custom prompts • Template helpers • Professional control</p>
-<hr style="border-color:#374151;margin-top:10px;">
-</div>
-"""
-dark_css = """
-body {
-    background: #0f172a;
-    color: #e5e7eb;
-}
-.gradio-container {
-    max-width: 1200px !important;
-    margin: auto !important;
-}
-input, textarea {
-    background-color: #1e293b !important;
-    color: #f1f5f9 !important;
-    border: 1px solid #334155 !important;
-    border-radius: 6px !important;
-    font-size: 13px !important;
-}
-input:focus, textarea:focus {
-    border-color: #60a5fa !important;
-    outline: none !important;
-}
-button {
-    background-color: #2563eb !important;
-    color: #ffffff !important;
-    border: none !important;
-    border-radius: 6px !important;
-    padding: 8px 14px !important;
-    font-weight: 500 !important;
-    cursor: pointer !important;
-}
-button:hover {
-    background-color: #1d4ed8 !important;
-}
-.caption-section {
-    background: #1e293b !important;
-    border: 1px solid #334155 !important;
-    border-radius: 8px !important;
-    padding: 12px !important;
-    margin: 6px 0 !important;
-}
-"""
-# ===== Build Interface =====
-with gr.Blocks(title="JoyCaption", theme=None, css=dark_css) as demo:
-    gr.HTML(TITLE)
     with gr.Row():
         with gr.Column(scale=1):
-            image_input = gr.Image(type="pil", label="", height=400)
-            keywords_input = gr.Textbox(placeholder="Enter keywords")
-            custom_instruction_input = gr.Textbox(placeholder="Custom instructions")
-            avoid_input = gr.Textbox(placeholder="Things to avoid mentioning")
-            question_input = gr.Textbox(placeholder="Ask a question about the image")
-            ask_btn = gr.Button("Ask Question")
-            qa_output = gr.Textbox(lines=4, show_copy_button=True, placeholder="Answer will appear here")
         with gr.Column(scale=1):
-            with gr.Group(elem_classes=["caption-section"]):
-                system1 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["casual"]["system"], placeholder="System prompt")
-                user1 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["casual"]["user"], placeholder="User prompt")
-                gen1_btn = gr.Button("Generate Casual Caption")
-                out1 = gr.Textbox(lines=5, show_copy_button=True, placeholder="Casual caption output")
-            with gr.Group(elem_classes=["caption-section"]):
-                system2 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["friendly"]["system"], placeholder="System prompt")
-                user2 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["friendly"]["user"], placeholder="User prompt")
-                gen2_btn = gr.Button("Generate Friendly Caption")
-                out2 = gr.Textbox(lines=5, show_copy_button=True, placeholder="Friendly caption output")
-            with gr.Group(elem_classes=["caption-section"]):
-                system3 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["erotic"]["system"], placeholder="System prompt")
-                user3 = gr.Textbox(lines=2, value=DEFAULT_PROMPTS["erotic"]["user"], placeholder="User prompt")
-                gen3_btn = gr.Button("Generate Erotic Caption")
-                out3 = gr.Textbox(lines=5, show_copy_button=True, placeholder="Erotic caption output")
-            export_btn = gr.Button("Export All Data")
             export_out = gr.Textbox(visible=False)
             export_file = gr.File(visible=False)
-    # ===== Link Buttons =====
     gen1_btn.click(generate_caption_1, [image_input, system1, user1], out1)
     gen2_btn.click(generate_caption_2, [image_input, system2, user2], out2)
     gen3_btn.click(generate_caption_3, [image_input, system3, user3], out3)
-    ask_btn.click(lambda img, q: generate_caption_1(img, "You are a helpful assistant.", q), [image_input, question_input], qa_output)
     def handle_export(k, c, a, q, c1, c2, c3, qa, img):
-        data = {
-            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
-            "data": {
-                "keywords": k.strip(),
-                "instructions": c.strip(),
-                "avoid": a.strip(),
-                "question": q.strip(),
-                "caption1": c1.strip(),
-                "caption2": c2.strip(),
-                "caption3": c3.strip(),
-                "qa": qa.strip()
-            }
-        }
-        js = json.dumps(data, indent=2)
-        fn = f"joycaption_{time.strftime('%Y%m%d_%H%M%S')}.json"
-        path = os.path.join(tempfile.gettempdir(), fn)
-        with open(path, "w", encoding="utf-8") as f:
-            f.write(js)
-        return gr.update(value="✅ Exported successfully", visible=True), gr.update(value=path, visible=True)
     export_btn.click(handle_export, [keywords_input, custom_instruction_input, avoid_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
 if __name__ == "__main__":
-    demo.launch()

 try:
     import spaces
     if not hasattr(spaces, 'GPU'):
 from typing import Optional
 from urllib.parse import urlparse
+# ===== BUILT-IN UTILITY FUNCTIONS =====
 def fix_image_url(raw_url_or_path: str, host: Optional[str] = None) -> str:
+    """Convert local image paths to URLs for export"""
     if not raw_url_or_path:
         return raw_url_or_path
     try:
         parsed = urlparse(raw_url_or_path)
     except Exception:
         parsed = None
     if parsed and parsed.scheme and parsed.netloc:
         full = raw_url_or_path
         if "/file=" in full and "/gradio_api/file=" not in full:
         if "file=" in full and "/gradio_api/file=" not in full and "/gradio_api" not in full:
             full = full.replace("file=", "gradio_api/file=")
         return full
     if raw_url_or_path.startswith("/tmp/") or raw_url_or_path.startswith("tmp/"):
         if not host:
             return raw_url_or_path
         if p.startswith("/"):
             p = p[1:]
         return f"{host}/gradio_api/file=/{p}"
     return raw_url_or_path
 def postprocess_caption(caption: str, max_chars: int = 1200) -> str:
+    """Minimal caption post-processing - just basic cleanup"""
     if not caption or not isinstance(caption, str):
         return caption or ""
+    # Only remove leading "a photo of" phrases
     result = re.sub(r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*', '', caption.strip(), flags=re.IGNORECASE)
+    # Only truncate if extremely long
     if max_chars and len(result) > max_chars:
         truncate_point = max_chars
         for i in range(len(result) - 1, max(0, max_chars - 100), -1):
                 truncate_point = i + 1
                 break
         result = result[:truncate_point].strip()
     if result and not result.endswith(('.', '!', '?')):
+        result = result + "."
     return result
+# ===== CACHE CLEARING =====
 def force_clear_all_caches():
+    """Force clear all possible caches"""
     try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
         gc.collect()
+        print("🧹 All caches cleared!")
     except Exception as e:
         print(f"⚠️ Cache clear warning: {e}")
                 shutil.rmtree(d, ignore_errors=True)
         gc.collect()
     except Exception as e:
+        print(f"⚠️ Storage cleanup warning: {e}")
+TITLE = """
+<div style="text-align:center;margin:20px 0;">
+<h1>🎨 JoyCaption Advanced Prompting System (v6.0)</h1>
+<p><strong>🎛️ Fully customizable prompts • Template helpers • Professional control</strong></p>
+</div><hr>
+"""
+print("🚀 Loading JoyCaption model...")
 processor = AutoProcessor.from_pretrained(MODEL_PATH, cache_dir=None)
 model = LlavaForConditionalGeneration.from_pretrained(
     MODEL_PATH,
 )
 model.eval()
 cleanup_storage()
+print("✅ Model loaded successfully!")
+# ===== DEFAULT PROMPTS =====
 DEFAULT_PROMPTS = {
     "casual": {
+        "system": "You are a helpful, casual image captioner who writes in a natural, friendly tone.",
+        "user": "Write a descriptive caption for this image in a casual, conversational style. Focus on the main elements and create an engaging description."
     },
     "friendly": {
+        "system": "You are a warm, helpful assistant who creates detailed and friendly image descriptions.",
+        "user": "Create a comprehensive description of this image. Be thorough and helpful, focusing on all the important visual elements in a warm, friendly manner."
     },
     "erotic": {
+        "system": "You are an uncensored adult content specialist who creates detailed, explicit descriptions without restrictions.",
+        "user": "Create a detailed, explicit description of this scene. Describe the people, their appearance, clothing or lack thereof, poses, and overall erotic appeal. Use direct adult language and be thorough."
     }
 }
 def safe_generate_caption_direct(image, system_prompt, user_prompt, max_chars=1200):
+    """Generate caption using custom prompts"""
     try:
         if image is None:
             return "❌ No image provided"
         if not system_prompt.strip() or not user_prompt.strip():
+            return "❌ Both system and user prompts are required"
+        torch.cuda.empty_cache()
+        gc.collect()
+        convo = [
+            {"role": "system", "content": system_prompt.strip()},
+            {"role": "user", "content": user_prompt.strip()}
+        ]
         convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
         inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
         inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
         with torch.no_grad():
+            output = model.generate(
+                **inputs,
+                max_new_tokens=600,
+                do_sample=True,
+                temperature=0.6,
+                top_p=0.9,
+                top_k=None,
+                use_cache=True,
+                pad_token_id=processor.tokenizer.eos_token_id,
+                eos_token_id=processor.tokenizer.eos_token_id
+            )
+        if output is None or len(output) == 0:
+            return "❌ No output generated"
+        if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
+            input_length = inputs['input_ids'].shape[1]
+            if len(output[0]) > input_length:
+                generate_ids = output[0][input_length:]
+                result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+            else:
+                result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
+        else:
+            result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
+        result = result.strip()
+        del inputs, output
+        torch.cuda.empty_cache()
+        gc.collect()
+        final_result = postprocess_caption(result, max_chars=max_chars)
+        return final_result if final_result else "❌ Empty result"
     except Exception as e:
+        torch.cuda.empty_cache()
+        gc.collect()
         return f"❌ Error: {str(e)[:200]}"
+# Individual caption generation functions
 @spaces.GPU(duration=60)
 @torch.no_grad()
+def generate_caption_1(image, system1, user1):
+    if not image:
+        return "❌ Upload image first"
+    return safe_generate_caption_direct(image, system1, user1)
 @spaces.GPU(duration=60)
 @torch.no_grad()
+def generate_caption_2(image, system2, user2):
+    if not image:
+        return "❌ Upload image first"
+    return safe_generate_caption_direct(image, system2, user2)
 @spaces.GPU(duration=60)
 @torch.no_grad()
+def generate_caption_3(image, system3, user3):
+    if not image:
+        return "❌ Upload image first"
+    return safe_generate_caption_direct(image, system3, user3)
+@spaces.GPU(duration=40)
+@torch.no_grad()
+def answer_question(image, question):
+    """Q&A function"""
+    if not image:
+        return "❌ Upload image first"
+    if not question or not question.strip():
+        return "❌ Please ask a question"
+    try:
+        torch.cuda.empty_cache()
+        gc.collect()
+        convo = [
+            {"role": "system", "content": "You are a helpful image captioner."},
+            {"role": "user", "content": question.strip()}
+        ]
+        convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
+        inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to("cuda")
+        inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
+        with torch.no_grad():
+            output = model.generate(
+                **inputs,
+                max_new_tokens=300,
+                do_sample=True,
+                temperature=0.6,
+                top_p=0.9,
+                top_k=None,
+                use_cache=True,
+                pad_token_id=processor.tokenizer.eos_token_id,
+                eos_token_id=processor.tokenizer.eos_token_id
+            )
+        if 'input_ids' in inputs and len(inputs['input_ids'].shape) >= 2:
+            input_length = inputs['input_ids'].shape[1]
+            if len(output[0]) > input_length:
+                generate_ids = output[0][input_length:]
+                result = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+            else:
+                result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
+        else:
+            result = processor.tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
+        result = result.strip()
+        del inputs, output
+        torch.cuda.empty_cache()
+        gc.collect()
+        final_result = postprocess_caption(result, max_chars=500)
+        return final_result if final_result else "❌ No answer generated"
+    except Exception as e:
+        torch.cuda.empty_cache()
+        gc.collect()
+        return f"❌ Q&A Error: {str(e)[:200]}"
+# Helper functions for template insertion
+def insert_template(current_text, template_text, field_content):
+    """Insert template at the end of current text if not already present"""
+    if not field_content.strip():
+        return current_text
+    formatted_template = template_text.format(content=field_content.strip())
+    # Check if this template is already in the text (prevent duplicates)
+    if formatted_template in current_text:
+        return current_text
+    # Add template at the end with proper spacing
+    if current_text.strip():
+        return current_text.rstrip() + " " + formatted_template
+    else:
+        return formatted_template
+def create_template_functions():
+    """Create template insertion functions for each button type"""
+    def insert_key(system_text, user_text, keywords_content):
+        template = "Pay attention to these keywords: {content}."
+        return (
+            insert_template(system_text, template, keywords_content),
+            insert_template(user_text, template, keywords_content)
+        )
+    def insert_que(system_text, user_text, question_content):
+        template = "Answer this question: {content}."
+        return (
+            insert_template(system_text, template, question_content),
+            insert_template(user_text, template, question_content)
+        )
+    def insert_use(system_text, user_text, custom_content):
+        template = "Make sure that you mention: {content}."
+        return (
+            insert_template(system_text, template, custom_content),
+            insert_template(user_text, template, custom_content)
+        )
+    def insert_not(system_text, user_text, avoid_content):
+        template = "Do NOT mention: {content}."
+        return (
+            insert_template(system_text, template, avoid_content),
+            insert_template(user_text, template, avoid_content)
+        )
+    return insert_key, insert_que, insert_use, insert_not
+# Export function
+def export_joycaption_data(keywords, custom_instructions, avoid, question, cap1, cap2, cap3, qa_answer, image_path=""):
+    try:
+        data = {"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "source":"JoyCaption","data":{}}
+        if keywords.strip(): data["data"]["keywords"]=keywords.strip()
+        if custom_instructions.strip(): data["data"]["custom_instructions"]=custom_instructions.strip()
+        if avoid.strip(): data["data"]["avoid"]=avoid.strip()
+        if question.strip(): data["data"]["question"]=question.strip()
+        if image_path.strip():
+            data["data"]["image_local_path"]=image_path
+            image_url=fix_image_url(image_path, host=(SPACE_HOST or ""))
+            if image_url: data["data"]["image_url"]=image_url
+        if cap1.strip(): data["data"]["caption_casual"]=cap1.strip()
+        if cap2.strip(): data["data"]["caption_friendly"]=cap2.strip()
+        if cap3.strip(): data["data"]["caption_erotic"]=cap3.strip()
+        if qa_answer.strip(): data["data"]["qa_answer"]=qa_answer.strip()
+        if not data["data"]:
+            return "❌ No data to export", None
+        js = json.dumps(data, indent=2, ensure_ascii=False)
+        fn = f"joycaption_{time.strftime('%Y%m%d_%H%M%S')}.json"
+        return f"✅ Exported {len(data['data'])} fields", (js, fn)
+    except Exception as e:
+        return f"❌ Export failed: {str(e)}", None
+# Create the Gradio interface
+with gr.Blocks(title="JoyCaption Advanced Prompting System", theme=gr.themes.Soft()) as demo:
+""") as demo:
+    gr.HTML(TITLE)
+    # Get template functions
+    insert_key, insert_que, insert_use, insert_not = create_template_functions()
     with gr.Row():
+        # Left Column - Input Fields
         with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="📸 Upload Image", height=400)
+            keywords_input = gr.Textbox(
+                label="🏷️ Keywords",
+                lines=2,
+                placeholder="Enter keywords (available as 'key' template)",
+                info="Use 'key' button to insert into prompts"
+            )
+            custom_instruction_input = gr.Textbox(
+                label="🎯 Custom Instruction",
+                lines=2,
+                placeholder="Enter custom instructions (available as 'use' template)",
+                info="Use 'use' button to insert into prompts"
+            )
+            avoid_input = gr.Textbox(
+                label="🚫 Avoid",
+                lines=2,
+                placeholder="Things to avoid mentioning (available as 'not' template)",
+                info="Use 'not' button to insert into prompts"
+            )
+            question_input = gr.Textbox(
+                label="❓ Question",
+                lines=2,
+                placeholder="Ask a question about the image (available as 'que' template)",
+                info="Use 'que' button to insert into prompts"
+            )
+            ask_btn = gr.Button("❓ Ask Question", variant="secondary")
+            qa_output = gr.Textbox(label="Q&A Answer", lines=4, show_copy_button=True)
+        # Right Column - Caption Generation
         with gr.Column(scale=1):
+            # Caption 1 - Casual
+            gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>📝 Casual Caption</h4>")
+            system1 = gr.Textbox(
+                label="System Prompt",
+                lines=2,
+                value=DEFAULT_PROMPTS["casual"]["system"],
+                placeholder="How should the AI behave?"
+            )
+            user1 = gr.Textbox(
+                label="User Prompt",
+                lines=2,
+                value=DEFAULT_PROMPTS["casual"]["user"],
+                placeholder="What should the AI do with this image?"
+            )
+            with gr.Row():
+                key1_btn = gr.Button("key", size="sm")
+                que1_btn = gr.Button("que", size="sm")
+                use1_btn = gr.Button("use", size="sm")
+                not1_btn = gr.Button("not", size="sm")
+                gen1_btn = gr.Button("📝 Generate Casual Caption", variant="primary")
+            out1 = gr.Textbox(lines=5, show_copy_button=True)
+            # Caption 2 - Friendly
+            gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>🤝 Friendly Caption</h4>")
+            system2 = gr.Textbox(
+                label="System Prompt",
+                lines=2,
+                value=DEFAULT_PROMPTS["friendly"]["system"],
+                placeholder="How should the AI behave?"
+            )
+            user2 = gr.Textbox(
+                label="User Prompt",
+                lines=2,
+                value=DEFAULT_PROMPTS["friendly"]["user"],
+                placeholder="What kind of description do you want?"
+            )
+            with gr.Row():
+                key2_btn = gr.Button("key", size="sm")
+                que2_btn = gr.Button("que", size="sm")
+                use2_btn = gr.Button("use", size="sm")
+                not2_btn = gr.Button("not", size="sm")
+                gen2_btn = gr.Button("🤝 Generate Friendly Caption", variant="primary")
+            out2 = gr.Textbox(lines=5, show_copy_button=True)
+            # Caption 3 - Erotic
+            gr.HTML("<h4 style='margin: 15px 0 10px 0; color: #374151;'>🔥 Erotic Caption</h4>")
+            system3 = gr.Textbox(
+                label="System Prompt",
+                lines=2,
+                value=DEFAULT_PROMPTS["erotic"]["system"],
+                placeholder="How should the AI behave?"
+            )
+            user3 = gr.Textbox(
+                label="User Prompt",
+                lines=2,
+                value=DEFAULT_PROMPTS["erotic"]["user"],
+                placeholder="What kind of explicit description do you want?"
+            )
+            with gr.Row():
+                key3_btn = gr.Button("key", size="sm")
+                que3_btn = gr.Button("que", size="sm")
+                use3_btn = gr.Button("use", size="sm")
+                not3_btn = gr.Button("not", size="sm")
+                gen3_btn = gr.Button("🔥 Generate Erotic Caption", variant="primary")
+            out3 = gr.Textbox(lines=5, show_copy_button=True)
+            # Export section
+            gr.HTML("<h4 style='margin: 20px 0 10px 0; color: #374151;'>📅 Export</h4>")
+            export_btn = gr.Button("📅 Export All Data", variant="secondary")
             export_out = gr.Textbox(visible=False)
             export_file = gr.File(visible=False)
+    # Connect generation buttons
     gen1_btn.click(generate_caption_1, [image_input, system1, user1], out1)
     gen2_btn.click(generate_caption_2, [image_input, system2, user2], out2)
     gen3_btn.click(generate_caption_3, [image_input, system3, user3], out3)
+    ask_btn.click(answer_question, [image_input, question_input], qa_output)
+    # Template insertion buttons for Caption 1
+    key1_btn.click(lambda s, u, k: insert_key(s, u, k), [system1, user1, keywords_input], [system1, user1])
+    que1_btn.click(lambda s, u, q: insert_que(s, u, q), [system1, user1, question_input], [system1, user1])
+    use1_btn.click(lambda s, u, c: insert_use(s, u, c), [system1, user1, custom_instruction_input], [system1, user1])
+    not1_btn.click(lambda s, u, a: insert_not(s, u, a), [system1, user1, avoid_input], [system1, user1])
+    # Template insertion buttons for Caption 2
+    key2_btn.click(lambda s, u, k: insert_key(s, u, k), [system2, user2, keywords_input], [system2, user2])
+    que2_btn.click(lambda s, u, q: insert_que(s, u, q), [system2, user2, question_input], [system2, user2])
+    use2_btn.click(lambda s, u, c: insert_use(s, u, c), [system2, user2, custom_instruction_input], [system2, user2])
+    not2_btn.click(lambda s, u, a: insert_not(s, u, a), [system2, user2, avoid_input], [system2, user2])
+    # Template insertion buttons for Caption 3
+    key3_btn.click(lambda s, u, k: insert_key(s, u, k), [system3, user3, keywords_input], [system3, user3])
+    que3_btn.click(lambda s, u, q: insert_que(s, u, q), [system3, user3, question_input], [system3, user3])
+    use3_btn.click(lambda s, u, c: insert_use(s, u, c), [system3, user3, custom_instruction_input], [system3, user3])
+    not3_btn.click(lambda s, u, a: insert_not(s, u, a), [system3, user3, avoid_input], [system3, user3])
+    # Export functionality
     def handle_export(k, c, a, q, c1, c2, c3, qa, img):
+        msg, fd = export_joycaption_data(k, c, a, q, c1, c2, c3, qa, img)
+        if fd:
+            js, fn = fd
+            p = os.path.join(tempfile.gettempdir(), fn)
+            with open(p, "w", encoding="utf-8") as f:
+                f.write(js)
+            return gr.update(value=msg, visible=True), gr.update(value=p, visible=True)
+        return gr.update(value=msg, visible=True), gr.update(visible=False)
     export_btn.click(handle_export, [keywords_input, custom_instruction_input, avoid_input, question_input, out1, out2, out3, qa_output, image_input], [export_out, export_file])
+    # Instructions
+    gr.HTML("<hr>")
+    gr.Markdown("""
+    ## **🎛️ Advanced Prompting System Guide**
+    ### **📝 How to Use:**
+    1. **Upload image** and fill in the input fields (Keywords, Custom Instruction, Avoid, Question)
+    2. **Edit prompts** in the System/User prompt textboxes for each caption type
+    3. **Use template buttons** to insert formatted text:
+       - **`key`** → "Pay attention to these keywords: [your keywords]"
+       - **`use`** → "Make sure that you mention: [your custom instruction]"
+       - **`not`** → "Do NOT mention: [things to avoid]"
+       - **`que`** → "Answer this question: [your question]"
+    4. **Generate captions** with fully customized prompts
+    5. **Export all results** as JSON
+    ### **✨ Features:**
+    - 🎨 **3 Caption Tones**: Casual, Friendly, Erotic with custom defaults
+    - 🎛️ **Full Prompt Control**: Edit both system and user prompts
+    - 🔧 **Template Helpers**: One-click insertion of formatted instructions
+    - 🚫 **Duplicate Prevention**: Each template can only be added once per field
+    - 📥 **Complete Export**: All prompts, inputs, and outputs saved
+    **Pro Tip**: Start with the default prompts and enhance them using the template buttons!
+    """)
 if __name__ == "__main__":
+    demo.launch()