kazuhina commited on
Commit
24a9fe7
Β·
1 Parent(s): b1f0d72

Update to proper JoyCaption implementation with ChatInterface and streaming

Browse files
Files changed (1) hide show
  1. joycaption_app.py +220 -226
joycaption_app.py CHANGED
@@ -5,106 +5,108 @@ Uses fancyfeast/llama-joycaption-alpha-two-hf-llava model for high-quality image
5
  Free, open, and uncensored model for training Diffusion models
6
  """
7
 
 
8
  import gradio as gr
 
9
  import torch
10
- import spaces
11
- from transformers import AutoProcessor, LlavaForConditionalGeneration
12
  from PIL import Image
 
 
 
13
  import tempfile
14
  import os
15
  from pathlib import Path
16
 
 
 
 
17
  # Initialize the JoyCaption model
18
  print("Loading JoyCaption model...")
19
- model_name = "fancyfeast/llama-joycaption-alpha-two-hf-llava"
20
-
21
- # Global variables for model and processor
22
- processor = None
23
- llava_model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def load_model():
26
- """Load JoyCaption model with maximum memory efficiency"""
27
- global processor, llava_model
 
 
 
 
 
 
 
28
 
 
29
  try:
30
- print("Loading processor...")
31
- # Load processor first
32
- processor = AutoProcessor.from_pretrained(model_name)
33
- print("Processor loaded successfully!")
34
-
35
- print("Loading model with maximum memory efficiency...")
36
- # Load model with maximum memory efficiency settings
37
- llava_model = LlavaForConditionalGeneration.from_pretrained(
38
- model_name,
39
- torch_dtype=torch.float16,
40
- device_map="cpu", # Force CPU
41
- load_in_8bit=True, # Enable 8-bit quantization
42
- load_in_4bit=False, # Disable 4-bit for now
43
- low_cpu_mem_usage=True,
44
- trust_remote_code=True,
45
- max_memory={0: "4GB"}, # Limit memory usage
46
- offload_folder="./offload", # Offload to disk if needed
47
- offload_state_dict=True
48
- )
49
- llava_model.eval()
50
-
51
- print("JoyCaption model loaded successfully with 8-bit quantization!")
52
- return True
53
-
54
- except Exception as e:
55
- print(f"Error loading model: {e}")
56
- print("Model loading failed - will use fallback mode")
57
- return False
58
 
59
- # Try to load model at startup
60
- model_loaded = load_model()
 
61
 
62
- @spaces.GPU
63
- def generate_image_caption(image_file, prompt_type="formal_detailed", custom_prompt=""):
64
- """
65
- Generate high-quality image captions using JoyCaption model
66
 
67
- Args:
68
- image_file: Path to the image file or uploaded file
69
- prompt_type: Type of captioning (formal_detailed, creative, simple, custom)
70
- custom_prompt: Custom prompt for specialized captioning
71
-
72
- Returns:
73
- str: Generated image caption
74
- """
75
- global processor, llava_model
76
 
77
- # Lazy load model if not already loaded
78
- if llava_model is None or processor is None:
79
- print("Lazy loading JoyCaption model...")
80
- if not load_model():
81
- return "Error: JoyCaption model could not be loaded. This may be due to memory constraints or network issues. Please try again later."
82
 
83
  try:
84
- if not image_file:
85
- return "Please upload an image file."
 
 
 
86
 
87
- # Handle different types of image inputs
88
- if hasattr(image_file, 'name'):
89
- # Gradio file object
90
- image_path = image_file.name
91
- elif isinstance(image_file, str):
92
- # File path string
93
- image_path = image_file
94
  else:
95
- return "Invalid image file format."
 
96
 
97
- # Check if file exists
98
- if not os.path.exists(image_path):
99
- return "Image file not found."
100
 
101
- print(f"Processing image: {image_path}")
102
-
103
- # Load and preprocess image
104
- try:
105
- image = Image.open(image_path).convert('RGB')
106
- except Exception as e:
107
- return f"Error loading image: {str(e)}"
108
 
109
  # Define prompt templates based on type
110
  prompt_templates = {
@@ -112,11 +114,11 @@ def generate_image_caption(image_file, prompt_type="formal_detailed", custom_pro
112
  "creative": "Write a creative and artistic caption for this image, capturing its essence and mood.",
113
  "simple": "Write a simple, concise caption describing what you see in this image.",
114
  "technical": "Provide a detailed technical description of this image including composition, lighting, and visual elements.",
115
- "custom": custom_prompt if custom_prompt else "Write a descriptive caption for this image."
116
  }
117
 
118
  # Select appropriate prompt
119
- prompt = prompt_templates.get(prompt_type, prompt_templates["formal_detailed"])
120
 
121
  # Build conversation following JoyCaption's recommended format
122
  convo = [
@@ -126,176 +128,168 @@ def generate_image_caption(image_file, prompt_type="formal_detailed", custom_pro
126
  },
127
  {
128
  "role": "user",
129
- "content": prompt,
130
  },
131
  ]
132
 
133
- # Format the conversation using JoyCaption's specific method
134
- # WARNING: HF's handling of chat's on Llava models is very fragile
135
- convo_string = processor.apply_chat_template(
136
- convo,
137
- tokenize=False,
138
  add_generation_prompt=True
139
  )
140
  assert isinstance(convo_string, str)
141
 
142
- # Process the inputs with proper tensor handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
144
- inputs = processor(
145
- text=[convo_string],
146
- images=[image],
147
- return_tensors="pt"
148
- ).to(device)
149
 
150
- # Ensure pixel_values are in float16 for compatibility
151
- if 'pixel_values' in inputs:
152
- inputs['pixel_values'] = inputs['pixel_values'].to(torch.float16)
 
153
 
154
- # Generate captions with optimized parameters
155
- with torch.no_grad():
156
- generate_ids = llava_model.generate(
157
- **inputs,
158
- max_new_tokens=150, # Reduced for faster processing
159
- do_sample=True,
160
- suppress_tokens=None,
161
- use_cache=True,
162
- temperature=0.6,
163
- top_k=None,
164
- top_p=0.9,
165
- repetition_penalty=1.1,
166
- pad_token_id=processor.tokenizer.eos_token_id
167
- )[0]
168
-
169
- # Trim off the prompt
170
- generate_ids = generate_ids[inputs['input_ids'].shape[1]:]
171
-
172
- # Decode the caption
173
- caption = processor.tokenizer.decode(
174
- generate_ids,
175
- skip_special_tokens=True,
176
- clean_up_tokenization_spaces=False
177
- )
178
- caption = caption.strip()
179
 
180
- print(f"Caption generated successfully: {caption[:100]}...")
181
- return caption
182
 
 
 
 
 
 
 
 
 
 
 
183
  except Exception as e:
184
  error_msg = f"Error during caption generation: {str(e)}"
185
  print(error_msg)
186
  # Return a demo response when model fails
187
- return generate_demo_caption(image, prompt_type, custom_prompt)
188
 
189
- def generate_demo_caption(image, prompt_type, custom_prompt):
190
  """Generate a demo caption when the model is not available"""
191
- # Create a realistic demo response based on the prompt type
192
  demo_responses = {
193
- "formal_detailed": f"This image appears to contain visual elements including colors, shapes, and composition. The image shows various patterns and visual textures that could be described in detail. The overall scene demonstrates typical characteristics of digital imagery with identifiable visual components.",
194
- "creative": f"A captivating visual composition that captures the essence of artistic expression through color, form, and visual storytelling. The image presents an interesting arrangement of elements that invite creative interpretation and artistic appreciation.",
195
- "simple": f"An image containing visual elements and patterns. The composition shows various colors and shapes arranged in a structured manner.",
196
- "technical": f"Technical analysis: This image demonstrates standard digital image characteristics with RGB color space representation. The resolution and pixel arrangement follow conventional digital imaging protocols with typical compression and formatting.",
197
- "custom": f"Based on the custom prompt provided, this image shows visual elements that could be interpreted according to the specific requirements mentioned: '{custom_prompt}'."
198
  }
199
 
200
  return demo_responses.get(prompt_type, demo_responses["formal_detailed"]) + "\n\n[Note: This is a demo response. The full JoyCaption model is optimized for production use and may be temporarily unavailable in this demo environment.]"
201
 
202
- def create_demo_image():
203
- """Create a demo image for testing"""
204
- try:
205
- # Create a simple colored rectangle as demo
206
- from PIL import Image, ImageDraw
207
-
208
- # Create a 512x512 image with gradient
209
- width, height = 512, 512
210
- image = Image.new('RGB', (width, height), color='white')
211
- draw = ImageDraw.Draw(image)
212
-
213
- # Draw a simple pattern
214
- for i in range(0, width, 50):
215
- for j in range(0, height, 50):
216
- color = (i % 255, j % 255, (i + j) % 255)
217
- draw.rectangle([i, j, i+25, j+25], fill=color)
218
-
219
- # Save demo image
220
- demo_file = "demo_image.png"
221
- image.save(demo_file)
222
- return demo_file
223
-
224
- except Exception as e:
225
- print(f"Error creating demo image: {e}")
226
- return None
227
-
228
  # Create Gradio interface
229
- demo = gr.Interface(
230
- fn=generate_image_caption,
231
- inputs=[
232
- gr.Image(
233
- label="Upload Image for Captioning",
234
- type="filepath",
235
- format="png"
236
- ),
237
- gr.Dropdown(
238
- choices=["formal_detailed", "creative", "simple", "technical", "custom"],
239
- value="formal_detailed",
240
- label="Caption Style",
241
- info="Choose the style of caption generation"
242
- ),
243
- gr.Textbox(
244
- label="Custom Prompt (Optional)",
245
- placeholder="Enter custom prompt for specialized captioning...",
246
- lines=3,
247
- visible=False
248
- )
249
- ],
250
- outputs=[
251
- gr.Textbox(
252
- label="Generated Caption",
253
- lines=8,
254
- placeholder="The generated caption will appear here..."
255
- )
256
- ],
257
- title="🎨 JoyCaption - Advanced Image Captioning",
258
- description="""
259
- This application uses the **JoyCaption** model to generate high-quality, detailed captions for images.
260
-
261
- **Key Features:**
262
- - πŸ†“ **Free & Open**: No restrictions, open weights, training scripts included
263
- - πŸ”“ **Uncensored**: Equal coverage of SFW and NSFW concepts
264
- - 🌈 **Diversity**: Supports digital art, photoreal, anime, furry, and all styles
265
- - 🎯 **High Performance**: Near GPT4o-level captioning quality
266
- - πŸ”§ **Minimal Filtering**: Trained on diverse images for broad understanding
267
-
268
- **Supported image formats:** PNG, JPG, JPEG, WEBP
269
-
270
- **Caption Styles:**
271
- - **Formal Detailed**: Long descriptive captions in formal tone
272
- - **Creative**: Artistic and expressive descriptions
273
- - **Simple**: Concise, straightforward descriptions
274
- - **Technical**: Detailed technical analysis of composition and elements
275
- - **Custom**: User-defined prompts for specialized captioning
276
-
277
- **Model**: fancyfeast/llama-joycaption-alpha-two-hf-llava
278
- **Architecture**: LLaVA with Llama 3.1 base
279
- """,
280
- examples=[
281
- ["Upload an image for formal detailed captioning"],
282
- ["Upload an image for creative captioning"],
283
- ["Upload an image with custom prompt"],
284
- ],
285
- theme=gr.themes.Soft(
286
- primary_hue="purple",
287
- secondary_hue="slate",
288
- neutral_hue="slate"
289
- ),
290
- css="""
291
- .gradio-container {max-width: 900px !important; margin: auto !important;}
292
- .title {text-align: center; color: #7c3aed;}
293
- .description {text-align: center; font-size: 1.1em;}
294
- """,
295
- flagging_mode="never",
296
- submit_btn="🎨 Generate Caption",
297
- stop_btn="⏹️ Stop"
298
- )
 
 
 
 
 
299
 
300
  if __name__ == "__main__":
301
  print("πŸš€ Starting JoyCaption App...")
 
5
  Free, open, and uncensored model for training Diffusion models
6
  """
7
 
8
+ import spaces
9
  import gradio as gr
10
+ from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, LlavaForConditionalGeneration, TextIteratorStreamer
11
  import torch
12
+ import torch.amp.autocast_mode
 
13
  from PIL import Image
14
+ import torchvision.transforms.functional as TVF
15
+ from threading import Thread
16
+ from typing import Generator
17
  import tempfile
18
  import os
19
  from pathlib import Path
20
 
21
+ # Model configuration
22
+ MODEL_PATH = "fancyfeast/llama-joycaption-alpha-two-hf-llava"
23
+
24
  # Initialize the JoyCaption model
25
  print("Loading JoyCaption model...")
26
+ try:
27
+ # Load tokenizer
28
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True)
29
+ assert isinstance(tokenizer, PreTrainedTokenizer) or isinstance(tokenizer, PreTrainedTokenizerFast), f"Expected PreTrainedTokenizer, got {type(tokenizer)}"
30
+
31
+ # Load model with memory-efficient configuration
32
+ model = LlavaForConditionalGeneration.from_pretrained(
33
+ MODEL_PATH,
34
+ torch_dtype="bfloat16",
35
+ device_map="auto" if torch.cuda.is_available() else None,
36
+ load_in_8bit=True, # Enable 8-bit quantization for memory efficiency
37
+ low_cpu_mem_usage=True,
38
+ trust_remote_code=True
39
+ )
40
+ assert isinstance(model, LlavaForConditionalGeneration), f"Expected LlavaForConditionalGeneration, got {type(model)}"
41
+
42
+ print("JoyCaption model loaded successfully!")
43
+
44
+ except Exception as e:
45
+ print(f"Error loading model: {e}")
46
+ # Create fallback objects when model loading fails
47
+ tokenizer = None
48
+ model = None
49
+ print("Using fallback mode - model not available")
50
 
51
+ def trim_off_prompt(input_ids: list[int], eoh_id: int, eot_id: int) -> list[int]:
52
+ """Trim off the prompt from generated tokens"""
53
+ # Trim off the prompt
54
+ while True:
55
+ try:
56
+ i = input_ids.index(eoh_id)
57
+ except ValueError:
58
+ break
59
+
60
+ input_ids = input_ids[i + 1:]
61
 
62
+ # Trim off the end
63
  try:
64
+ i = input_ids.index(eot_id)
65
+ except ValueError:
66
+ return input_ids
67
+
68
+ return input_ids[:i]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # Get token IDs for special tokens
71
+ end_of_header_id = tokenizer.convert_tokens_to_ids("<|end_header_id|>") if tokenizer else None
72
+ end_of_turn_id = tokenizer.convert_tokens_to_ids("<|eot_id|>") if tokenizer else None
73
 
74
+ @spaces.GPU()
75
+ @torch.no_grad()
76
+ def generate_image_caption(message: dict, history, temperature: float = 0.6, top_p: float = 0.9, max_new_tokens: int = 300, log_prompt: bool = False) -> Generator[str, None, None]:
77
+ """Generate image captions using JoyCaption model"""
78
 
79
+ # Check if model is available
80
+ if model is None or tokenizer is None:
81
+ yield "Error: JoyCaption model not loaded. Please check the model availability and try again."
82
+ return
 
 
 
 
 
83
 
84
+ torch.cuda.empty_cache()
 
 
 
 
85
 
86
  try:
87
+ # Extract prompt from message
88
+ if isinstance(message, dict):
89
+ prompt = message.get('text', '').strip()
90
+ else:
91
+ prompt = str(message).strip()
92
 
93
+ # Load image
94
+ if isinstance(message, dict) and "files" in message and len(message["files"]) >= 1:
95
+ image = Image.open(message["files"][0])
 
 
 
 
96
  else:
97
+ yield "ERROR: This model requires exactly one image as input."
98
+ return
99
 
100
+ # Log the prompt if requested
101
+ if log_prompt:
102
+ print(f"Prompt: {prompt}")
103
 
104
+ # Preprocess image
105
+ # Resize to 384x384 for optimal performance
106
+ if image.size != (384, 384):
107
+ image = image.resize((384, 384), Image.LANCZOS)
108
+ image = image.convert("RGB")
109
+ pixel_values = TVF.pil_to_tensor(image)
 
110
 
111
  # Define prompt templates based on type
112
  prompt_templates = {
 
114
  "creative": "Write a creative and artistic caption for this image, capturing its essence and mood.",
115
  "simple": "Write a simple, concise caption describing what you see in this image.",
116
  "technical": "Provide a detailed technical description of this image including composition, lighting, and visual elements.",
117
+ "custom": prompt if prompt else "Write a descriptive caption for this image."
118
  }
119
 
120
  # Select appropriate prompt
121
+ final_prompt = prompt_templates.get(prompt, prompt_templates["formal_detailed"])
122
 
123
  # Build conversation following JoyCaption's recommended format
124
  convo = [
 
128
  },
129
  {
130
  "role": "user",
131
+ "content": final_prompt,
132
  },
133
  ]
134
 
135
+ # Format the conversation
136
+ convo_string = tokenizer.apply_chat_template(
137
+ convo,
138
+ tokenize=False,
 
139
  add_generation_prompt=True
140
  )
141
  assert isinstance(convo_string, str)
142
 
143
+ # Tokenize the conversation
144
+ convo_tokens = tokenizer.encode(convo_string, add_special_tokens=False, truncation=False)
145
+
146
+ # Repeat the image tokens
147
+ input_tokens = []
148
+ for token in convo_tokens:
149
+ if token == model.config.image_token_index:
150
+ input_tokens.extend([model.config.image_token_index] * model.config.image_seq_length)
151
+ else:
152
+ input_tokens.append(token)
153
+
154
+ input_ids = torch.tensor(input_tokens, dtype=torch.long)
155
+ attention_mask = torch.ones_like(input_ids)
156
+
157
+ # Move to GPU
158
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
159
+ input_ids = input_ids.unsqueeze(0).to(device)
160
+ attention_mask = attention_mask.unsqueeze(0).to(device)
161
+ pixel_values = pixel_values.unsqueeze(0).to(device)
 
 
162
 
163
+ # Normalize the image
164
+ pixel_values = pixel_values / 255.0
165
+ pixel_values = TVF.normalize(pixel_values, [0.5], [0.5])
166
+ pixel_values = pixel_values.to(torch.bfloat16)
167
 
168
+ # Set up streaming
169
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
170
+
171
+ # Generate parameters
172
+ generate_kwargs = dict(
173
+ input_ids=input_ids,
174
+ pixel_values=pixel_values,
175
+ attention_mask=attention_mask,
176
+ max_new_tokens=max_new_tokens,
177
+ do_sample=True,
178
+ suppress_tokens=None,
179
+ use_cache=True,
180
+ temperature=temperature,
181
+ top_k=None,
182
+ top_p=top_p,
183
+ streamer=streamer,
184
+ )
 
 
 
 
 
 
 
 
185
 
186
+ if temperature == 0:
187
+ generate_kwargs["do_sample"] = False
188
 
189
+ # Start generation in a separate thread
190
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
191
+ t.start()
192
+
193
+ # Stream the output
194
+ outputs = []
195
+ for text in streamer:
196
+ outputs.append(text)
197
+ yield "".join(outputs)
198
+
199
  except Exception as e:
200
  error_msg = f"Error during caption generation: {str(e)}"
201
  print(error_msg)
202
  # Return a demo response when model fails
203
+ yield generate_demo_caption(prompt)
204
 
205
+ def generate_demo_caption(prompt_type):
206
  """Generate a demo caption when the model is not available"""
 
207
  demo_responses = {
208
+ "formal_detailed": "This image appears to contain visual elements including colors, shapes, and composition. The image shows various patterns and visual textures that could be described in detail. The overall scene demonstrates typical characteristics of digital imagery with identifiable visual components.",
209
+ "creative": "A captivating visual composition that captures the essence of artistic expression through color, form, and visual storytelling. The image presents an interesting arrangement of elements that invite creative interpretation and artistic appreciation.",
210
+ "simple": "An image containing visual elements and patterns. The composition shows various colors and shapes arranged in a structured manner.",
211
+ "technical": "Technical analysis: This image demonstrates standard digital image characteristics with RGB color space representation. The resolution and pixel arrangement follow conventional digital imaging protocols with typical compression and formatting.",
212
+ "custom": "Based on the custom prompt provided, this image shows visual elements that could be interpreted according to the specific requirements mentioned."
213
  }
214
 
215
  return demo_responses.get(prompt_type, demo_responses["formal_detailed"]) + "\n\n[Note: This is a demo response. The full JoyCaption model is optimized for production use and may be temporarily unavailable in this demo environment.]"
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  # Create Gradio interface
218
+ TITLE = "<h1><center>🎨 JoyCaption - Advanced Image Captioning</center></h1>"
219
+ DESCRIPTION = """
220
+ <div>
221
+ <p>πŸ§ͺ This application uses the <strong>JoyCaption</strong> model to generate high-quality, detailed captions for images.</p>
222
+ <p><strong>Key Features:</strong></p>
223
+ <ul>
224
+ <li>πŸ†“ <strong>Free & Open</strong>: No restrictions, open weights, training scripts included</li>
225
+ <li>πŸ”“ <strong>Uncensored</strong>: Equal coverage of SFW and NSFW concepts</li>
226
+ <li>🌈 <strong>Diversity</strong>: Supports digital art, photoreal, anime, furry, and all styles</li>
227
+ <li>🎯 <strong>High Performance</strong>: Near GPT4o-level captioning quality</li>
228
+ <li>πŸ”§ <strong>Minimal Filtering</strong>: Trained on diverse images for broad understanding</li>
229
+ </ul>
230
+ <p><strong>Supported image formats:</strong> PNG, JPG, JPEG, WEBP</p>
231
+ <p><strong>Caption Styles:</strong></p>
232
+ <ul>
233
+ <li><strong>Formal Detailed</strong>: Long descriptive captions in formal tone</li>
234
+ <li><strong>Creative</strong>: Artistic and expressive descriptions</li>
235
+ <li><strong>Simple</strong>: Concise, straightforward descriptions</li>
236
+ <li><strong>Technical</strong>: Detailed technical analysis of composition and elements</li>
237
+ <li><strong>Custom</strong>: User-defined prompts for specialized captioning</li>
238
+ </ul>
239
+ <p><strong>Model:</strong> fancyfeast/llama-joycaption-alpha-two-hf-llava</p>
240
+ <p><strong>Architecture:</strong> LLaVA with Llama 3.1 base</p>
241
+ </div>
242
+ """
243
+
244
+ PLACEHOLDER = "Upload an image and describe what kind of caption you'd like..."
245
+
246
+ # Create chatbot interface
247
+ chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='JoyCaption ChatInterface', type="messages")
248
+ textbox = gr.MultimodalTextbox(file_types=["image"], file_count="single")
249
+
250
+ with gr.Blocks() as demo:
251
+ gr.HTML(TITLE)
252
+ chat_interface = gr.ChatInterface(
253
+ fn=generate_image_caption,
254
+ chatbot=chatbot,
255
+ type="messages",
256
+ fill_height=True,
257
+ multimodal=True,
258
+ textbox=textbox,
259
+ additional_inputs_accordion=gr.Accordion(label="βš™οΈ Parameters", open=True, render=False),
260
+ additional_inputs=[
261
+ gr.Slider(
262
+ minimum=0,
263
+ maximum=1,
264
+ step=0.1,
265
+ value=0.6,
266
+ label="Temperature",
267
+ render=False
268
+ ),
269
+ gr.Slider(
270
+ minimum=0,
271
+ maximum=1,
272
+ step=0.05,
273
+ value=0.9,
274
+ label="Top p",
275
+ render=False
276
+ ),
277
+ gr.Slider(
278
+ minimum=8,
279
+ maximum=4096,
280
+ step=1,
281
+ value=300,
282
+ label="Max new tokens",
283
+ render=False
284
+ ),
285
+ gr.Checkbox(
286
+ label="Help improve JoyCaption by logging your text query",
287
+ value=False,
288
+ render=False
289
+ ),
290
+ ],
291
+ )
292
+ gr.Markdown(DESCRIPTION)
293
 
294
  if __name__ == "__main__":
295
  print("πŸš€ Starting JoyCaption App...")