nickdigger commited on
Commit
cfdf4b3
·
verified ·
1 Parent(s): 6e12f0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -81
app.py CHANGED
@@ -1,8 +1,32 @@
1
- import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import torch
4
  from transformers import LlavaForConditionalGeneration, AutoProcessor
5
  from PIL import Image
 
6
  import gc
7
  import time
8
  import gc
@@ -10,26 +34,42 @@ import os
10
  import shutil
11
  import json
12
  from pathlib import Path
 
13
 
14
  from hf_space_utils import fix_image_url
15
 
16
- # Storage optimization - redirect cache to temporary directories
17
- os.environ["HF_HOME"] = "/tmp/hf_cache"
18
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
19
- os.environ["HF_DATASETS_CACHE"] = "/tmp/datasets_cache"
20
- os.environ["TORCH_HOME"] = "/tmp/torch_cache"
 
21
 
22
  # Model configuration
23
  MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
24
 
 
 
 
25
  def cleanup_storage():
26
  """Clean up temporary files and caches to prevent storage overflow"""
27
  try:
28
- # Clean up temporary caches
29
- temp_dirs = ["/tmp/hf_cache", "/tmp/transformers_cache", "/tmp/datasets_cache", "/tmp/torch_cache"]
 
 
 
 
 
30
  for temp_dir in temp_dirs:
 
 
31
  if os.path.exists(temp_dir):
32
- shutil.rmtree(temp_dir, ignore_errors=True)
 
 
 
 
33
 
34
  # Force garbage collection
35
  gc.collect()
@@ -56,22 +96,47 @@ print("🚀 Loading Sequential Three-Tone JoyCaption system... v2.1")
56
 
57
  # Load model and processor at startup
58
  print("📦 Loading model and processor at startup...")
59
- processor = AutoProcessor.from_pretrained(
60
- MODEL_PATH,
61
- low_cpu_mem_usage=True
62
- )
63
-
64
- model = LlavaForConditionalGeneration.from_pretrained(
65
- MODEL_PATH,
66
- torch_dtype=torch.bfloat16,
67
- device_map="auto",
68
- low_cpu_mem_usage=True
69
- )
70
- model.eval()
71
- print("✅ Model loaded and ready!")
72
-
73
- # Initial cleanup after model loading
74
- cleanup_storage()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  # Optimized 5-tone prompts with better temperature control
77
  # Temperature: Lower for prompt adherence, higher for word variety
@@ -112,25 +177,32 @@ def apply_smart_corrections(text):
112
  r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*': '',
113
 
114
  # Nudity precision corrections
115
- r'\btopless women\b': lambda m: 'nude women' if 'naked' in text.lower() or 'nude' in text.lower() else 'topless women',
116
- r'\btopless woman\b': lambda m: 'nude woman' if 'naked' in text.lower() or 'nude' in text.lower() else 'topless woman',
117
 
118
  # Person count corrections
119
- r'\bthree women\b': lambda m: 'two women' if text.count('woman') + text.count('female') <= 2 else 'three women',
120
- r'\bfour women\b': lambda m: 'three women' if text.count('woman') + text.count('female') <= 3 else 'four women',
121
 
122
  # Clothing precision
123
- r'\bwearing nothing\b': 'nude',
124
- r'\bnot wearing.*clothes\b': 'nude',
125
- r'\bcompletely naked\b': 'nude',
126
- r'\bfully nude\b': 'nude',
127
  }
128
 
129
  corrected_text = text
130
  try:
131
  for pattern, replacement in corrections.items():
132
  if callable(replacement):
133
- corrected_text = re.sub(pattern, replacement, corrected_text, flags=re.IGNORECASE)
 
 
 
 
 
 
 
134
  else:
135
  corrected_text = re.sub(pattern, replacement, corrected_text, flags=re.IGNORECASE)
136
  except Exception as e:
@@ -165,14 +237,27 @@ def safe_generate_caption_direct(image, tone, max_chars=600, keywords_text="", c
165
  {"role": "user", "content": base_prompt}
166
  ]
167
 
 
 
 
 
168
  convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
169
  inputs = processor(text=[convo_string], images=[image], return_tensors="pt")
170
-
171
  device = next(model.parameters()).device
172
  inputs = {k: v.to(device, non_blocking=True) if hasattr(v, 'to') else v for k, v in inputs.items()}
173
-
 
174
  if 'pixel_values' in inputs:
175
- inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
 
 
 
 
 
 
 
 
176
 
177
  # Get tone-specific generation parameters
178
  temperature = tone_config.get("temperature", 0.7)
@@ -240,14 +325,11 @@ def safe_generate_caption_direct(image, tone, max_chars=600, keywords_text="", c
240
  pass
241
  return f"❌ Error: {str(e)[:50]}..."
242
 
243
- # Individual GPU-decorated functions for all 3 tones
244
- @spaces.GPU(duration=45)
245
  @torch.no_grad()
246
  def generate_engaging_only(image, custom_instruction=""):
247
  """Generate only engaging caption"""
248
  return safe_generate_caption_direct(image, "engaging", custom_instruction=custom_instruction) if image else "❌ Upload image first"
249
 
250
- @spaces.GPU(duration=45)
251
  @torch.no_grad()
252
  def generate_casual_friend_only(image, custom_instruction=""):
253
  """Generate only casual friend caption"""
@@ -255,7 +337,6 @@ def generate_casual_friend_only(image, custom_instruction=""):
255
 
256
  # NSFW function removed - caused hallucination
257
 
258
- @spaces.GPU(duration=45)
259
  @torch.no_grad()
260
  def generate_uncensored_keywords_only(image, keywords_text, custom_instruction=""):
261
  """Generate only uncensored with keywords caption"""
@@ -263,7 +344,6 @@ def generate_uncensored_keywords_only(image, keywords_text, custom_instruction="
263
 
264
  # Body parts focus function removed - caused hallucination
265
 
266
- @spaces.GPU(duration=45)
267
  @torch.no_grad()
268
  def answer_question(image, question):
269
  """Answer any question about the image without censorship"""
@@ -282,14 +362,25 @@ def answer_question(image, question):
282
  {"role": "user", "content": qa_prompt}
283
  ]
284
 
 
 
 
 
285
  convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
286
  inputs = processor(text=[convo_string], images=[image], return_tensors="pt")
287
-
288
  device = next(model.parameters()).device
289
  inputs = {k: v.to(device, non_blocking=True) if hasattr(v, 'to') else v for k, v in inputs.items()}
290
-
 
291
  if 'pixel_values' in inputs:
292
- inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
 
 
 
 
 
 
293
 
294
  with torch.no_grad():
295
  output = model.generate(
@@ -345,7 +436,13 @@ def export_joycaption_data(keywords, custom_instructions, question, engaging_cap
345
 
346
  if question and question.strip():
347
  data["data"]["question"] = question.strip()
348
- image_url_converted = fix_image_url(image_path)
 
 
 
 
 
 
349
  if image_url_converted and str(image_url_converted).strip():
350
  data["data"]["image_url"] = str(image_url_converted).strip()
351
  # Add generated captions
@@ -392,7 +489,7 @@ EXPORT_JS = """
392
 
393
  // Get all textareas and inputs from the page
394
  const allInputs = document.querySelectorAll('textarea, input[type="text"]');
395
-
396
  allInputs.forEach((field, index) => {
397
  const placeholder = (field.placeholder || '').toLowerCase();
398
  const value = field.value ? field.value.trim() : '';
@@ -565,13 +662,7 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
565
  lines=2,
566
  info="Add keywords that will be mentioned by the 'Keywords' tone ONLY if they apply to what's visible in the image"
567
  )
568
-
569
-
570
- placeholder="e.g., blonde_girl_001.jpg, Instagram photo, OnlyFans pic...",
571
- label="🖼️ Image Reference",
572
- lines=1,
573
- info="Image filename or description for your reference (will be exported)"
574
- )
575
 
576
  custom_instruction_input = gr.Textbox(
577
  placeholder="e.g., 'from instagram', 'the left girl has red hair', 'two girls kissing', 'beach setting'...",
@@ -630,7 +721,7 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
630
  interactive=True,
631
  placeholder="Click the button above to generate engaging caption..."
632
  )
633
-
634
  # Casual Friend caption
635
  with gr.Row():
636
  with gr.Column(scale=4):
@@ -652,9 +743,9 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
652
  interactive=True,
653
  placeholder="Click the button above to generate casual friend caption..."
654
  )
655
-
656
  # NSFW section removed - caused hallucination
657
-
658
  # Keywords caption
659
  with gr.Row():
660
  with gr.Column(scale=4):
@@ -676,11 +767,11 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
676
  interactive=True,
677
  placeholder="Click the button above to generate keywords caption..."
678
  )
679
-
680
  # Body Parts Focus section removed - caused hallucination
681
-
682
  # Descriptive text removed for cleaner interface
683
-
684
  # Export functionality
685
  with gr.Row():
686
  export_btn = gr.Button(
@@ -717,7 +808,7 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
717
  )
718
 
719
  # NSFW button handler removed
720
-
721
  generate_uncensored_btn.click(
722
  generate_uncensored_keywords_only,
723
  inputs=[image_input, keywords_input, custom_instruction_input],
@@ -726,7 +817,7 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
726
  )
727
 
728
  # Body Parts Focus button handler removed
729
-
730
  # Individual reload buttons - using direct generation for consistency
731
  def reload_engaging_fn(image, custom_instruction):
732
  return safe_generate_caption_direct(image, "engaging", custom_instruction=custom_instruction) if image else "❌ Upload image first"
@@ -801,31 +892,18 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
801
  )
802
 
803
  # Export functionality
804
- def handle_export():
805
- """Handle the export button click"""
806
- # Get current values from all fields
807
- return export_joycaption_data(
808
- keywords_input.value or "",
809
- custom_instruction_input.value or "",
810
- question_input.value or "",
811
- engaging_output.value or "",
812
- friend_output.value or "",
813
- uncensored_output.value or "",
814
- qa_output.value or "",
815
- image_input.value or ""
816
- )
817
-
818
  def handle_export(keywords, custom_instructions, question, engaging_caption, casual_caption, keywords_caption, qa_answer, image_path):
819
- """Handle export and return proper file download"""
820
  message, file_data = export_joycaption_data(
821
  keywords, custom_instructions, question,
822
  engaging_caption, casual_caption, keywords_caption, qa_answer, image_path
823
  )
824
-
825
  if file_data:
826
  json_string, filename = file_data
827
- # Create temporary file for download
828
- temp_file = f"C:\\Users\\Andrei\\{filename}"
 
829
  with open(temp_file, 'w', encoding='utf-8') as f:
830
  f.write(json_string)
831
  return gr.update(value=message, visible=True), gr.update(value=temp_file, visible=True)
@@ -841,7 +919,8 @@ with gr.Blocks(title="Sequential Three-Tone JoyCaption", theme=gr.themes.Soft())
841
  engaging_output,
842
  friend_output,
843
  uncensored_output,
844
- qa_output
 
845
  ],
846
  outputs=[export_output, export_file]
847
  )
 
1
+ """
2
+ Copy of the full `app.py` into the deploy folder for direct upload.
3
+ This file is a snapshot of the application's main entrypoint and should be
4
+ identical to the root `app.py` when uploading to Hugging Face Spaces.
5
+ """
6
+
7
+ try:
8
+ import spaces
9
+ # Ensure spaces.GPU exists and is a decorator
10
+ if not hasattr(spaces, 'GPU'):
11
+ def _spaces_gpu(*args, **kwargs):
12
+ def _wrap(f):
13
+ return f
14
+ return _wrap
15
+ spaces.GPU = _spaces_gpu
16
+ except Exception:
17
+ # Provide a no-op spaces with a GPU decorator fallback so app can run outside HF Spaces
18
+ import types
19
+ spaces = types.SimpleNamespace()
20
+ def _spaces_gpu(*args, **kwargs):
21
+ def _wrap(f):
22
+ return f
23
+ return _wrap
24
+ spaces.GPU = _spaces_gpu
25
  import gradio as gr
26
  import torch
27
  from transformers import LlavaForConditionalGeneration, AutoProcessor
28
  from PIL import Image
29
+ import tempfile
30
  import gc
31
  import time
32
  import gc
 
34
  import shutil
35
  import json
36
  from pathlib import Path
37
+ import re
38
 
39
  from hf_space_utils import fix_image_url
40
 
41
+ # Storage optimization - redirect cache to temporary directories (platform independent)
42
+ _tmpdir = tempfile.gettempdir()
43
+ os.environ["HF_HOME"] = os.path.join(_tmpdir, "hf_cache")
44
+ os.environ["TRANSFORMERS_CACHE"] = os.path.join(_tmpdir, "transformers_cache")
45
+ os.environ["HF_DATASETS_CACHE"] = os.path.join(_tmpdir, "datasets_cache")
46
+ os.environ["TORCH_HOME"] = os.path.join(_tmpdir, "torch_cache")
47
 
48
  # Model configuration
49
  MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
50
 
51
+ # Optional public host for converting /tmp/gradio paths to public gradio_api URLs
52
+ SPACE_HOST = os.environ.get("SPACE_HOST") or os.environ.get("HF_SPACE_HOST") or None
53
+
54
  def cleanup_storage():
55
  """Clean up temporary files and caches to prevent storage overflow"""
56
  try:
57
+ # Clean up temporary caches using the configured environment paths
58
+ temp_dirs = [
59
+ os.environ.get("HF_HOME"),
60
+ os.environ.get("TRANSFORMERS_CACHE"),
61
+ os.environ.get("HF_DATASETS_CACHE"),
62
+ os.environ.get("TORCH_HOME")
63
+ ]
64
  for temp_dir in temp_dirs:
65
+ if not temp_dir:
66
+ continue
67
  if os.path.exists(temp_dir):
68
+ try:
69
+ shutil.rmtree(temp_dir, ignore_errors=True)
70
+ except Exception:
71
+ # best-effort cleanup
72
+ pass
73
 
74
  # Force garbage collection
75
  gc.collect()
 
96
 
97
  # Load model and processor at startup
98
  print("📦 Loading model and processor at startup...")
99
+ processor = None
100
+ model = None
101
+ MODEL_TORCH_DTYPE = None
102
+ MODEL_USE_CUDA = False
103
+
104
+ # Allow skipping model loading for tests or light-weight runs by setting SKIP_MODEL_LOAD=1
105
+ if not os.environ.get("SKIP_MODEL_LOAD"):
106
+ # Determine target device for model loading. On zero-GPU spaces, fall back to CPU.
107
+ use_cuda = torch.cuda.is_available()
108
+ if use_cuda:
109
+ # Prefer bf16 on supported GPUs, otherwise try float16
110
+ torch_dtype = getattr(torch, 'bfloat16', None) or getattr(torch, 'float16', None)
111
+ device_map = "auto"
112
+ MODEL_USE_CUDA = True
113
+ else:
114
+ torch_dtype = None
115
+ device_map = "cpu"
116
+ MODEL_USE_CUDA = False
117
+
118
+ processor = AutoProcessor.from_pretrained(
119
+ MODEL_PATH,
120
+ low_cpu_mem_usage=True
121
+ )
122
+
123
+ model_kwargs = dict(low_cpu_mem_usage=True, device_map=device_map)
124
+ if torch_dtype is not None and use_cuda:
125
+ model_kwargs['torch_dtype'] = torch_dtype
126
+
127
+ model = LlavaForConditionalGeneration.from_pretrained(
128
+ MODEL_PATH,
129
+ **model_kwargs
130
+ )
131
+ model.eval()
132
+ # remember dtype for later tensor conversions
133
+ MODEL_TORCH_DTYPE = model_kwargs.get('torch_dtype', None)
134
+ print("✅ Model loaded and ready!")
135
+
136
+ # Initial cleanup after model loading
137
+ cleanup_storage()
138
+ else:
139
+ print("⚠️ SKIP_MODEL_LOAD is set — skipping heavy model initialization (test mode)")
140
 
141
  # Optimized 5-tone prompts with better temperature control
142
  # Temperature: Lower for prompt adherence, higher for word variety
 
177
  r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*': '',
178
 
179
  # Nudity precision corrections
180
+ r'\\btopless women\\b': lambda m: 'nude women' if 'naked' in text.lower() or 'nude' in text.lower() else 'topless women',
181
+ r'\\btopless woman\\b': lambda m: 'nude woman' if 'naked' in text.lower() or 'nude' in text.lower() else 'topless woman',
182
 
183
  # Person count corrections
184
+ r'\\bthree women\\b': lambda m: 'two women' if text.count('woman') + text.count('female') <= 2 else 'three women',
185
+ r'\\bfour women\\b': lambda m: 'three women' if text.count('woman') + text.count('female') <= 3 else 'four women',
186
 
187
  # Clothing precision
188
+ r'\\bwearing nothing\\b': 'nude',
189
+ r'\\bnot wearing.*clothes\\b': 'nude',
190
+ r'\\bcompletely naked\\b': 'nude',
191
+ r'\\bfully nude\\b': 'nude',
192
  }
193
 
194
  corrected_text = text
195
  try:
196
  for pattern, replacement in corrections.items():
197
  if callable(replacement):
198
+ # Wrap the replacement to ensure it returns a string and accepts a Match
199
+ def _repl(match, rep=replacement):
200
+ try:
201
+ out = rep(match)
202
+ return "" if out is None else str(out)
203
+ except Exception:
204
+ return match.group(0)
205
+ corrected_text = re.sub(pattern, _repl, corrected_text, flags=re.IGNORECASE)
206
  else:
207
  corrected_text = re.sub(pattern, replacement, corrected_text, flags=re.IGNORECASE)
208
  except Exception as e:
 
237
  {"role": "user", "content": base_prompt}
238
  ]
239
 
240
+ # Ensure model and processor are loaded
241
+ if processor is None or model is None:
242
+ return "❌ Model or processor not initialized. Make sure model is loaded (unset SKIP_MODEL_LOAD) and dependencies are installed."
243
+
244
  convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
245
  inputs = processor(text=[convo_string], images=[image], return_tensors="pt")
246
+
247
  device = next(model.parameters()).device
248
  inputs = {k: v.to(device, non_blocking=True) if hasattr(v, 'to') else v for k, v in inputs.items()}
249
+
250
+ # Safely convert pixel tensor dtype depending on runtime capabilities
251
  if 'pixel_values' in inputs:
252
+ if MODEL_USE_CUDA and MODEL_TORCH_DTYPE is not None:
253
+ try:
254
+ inputs['pixel_values'] = inputs['pixel_values'].to(MODEL_TORCH_DTYPE)
255
+ except Exception:
256
+ # fallback to float32
257
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.float32)
258
+ else:
259
+ # CPU fallback
260
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.float32)
261
 
262
  # Get tone-specific generation parameters
263
  temperature = tone_config.get("temperature", 0.7)
 
325
  pass
326
  return f"❌ Error: {str(e)[:50]}..."
327
 
 
 
328
  @torch.no_grad()
329
  def generate_engaging_only(image, custom_instruction=""):
330
  """Generate only engaging caption"""
331
  return safe_generate_caption_direct(image, "engaging", custom_instruction=custom_instruction) if image else "❌ Upload image first"
332
 
 
333
  @torch.no_grad()
334
  def generate_casual_friend_only(image, custom_instruction=""):
335
  """Generate only casual friend caption"""
 
337
 
338
  # NSFW function removed - caused hallucination
339
 
 
340
  @torch.no_grad()
341
  def generate_uncensored_keywords_only(image, keywords_text, custom_instruction=""):
342
  """Generate only uncensored with keywords caption"""
 
344
 
345
  # Body parts focus function removed - caused hallucination
346
 
 
347
  @torch.no_grad()
348
  def answer_question(image, question):
349
  """Answer any question about the image without censorship"""
 
362
  {"role": "user", "content": qa_prompt}
363
  ]
364
 
365
+ # Ensure model and processor are loaded
366
+ if processor is None or model is None:
367
+ return "❌ Model or processor not initialized. Make sure model is loaded (unset SKIP_MODEL_LOAD) and dependencies are installed."
368
+
369
  convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
370
  inputs = processor(text=[convo_string], images=[image], return_tensors="pt")
371
+
372
  device = next(model.parameters()).device
373
  inputs = {k: v.to(device, non_blocking=True) if hasattr(v, 'to') else v for k, v in inputs.items()}
374
+
375
+ # Safely convert pixel_values dtype depending on runtime
376
  if 'pixel_values' in inputs:
377
+ if MODEL_USE_CUDA and MODEL_TORCH_DTYPE is not None:
378
+ try:
379
+ inputs['pixel_values'] = inputs['pixel_values'].to(MODEL_TORCH_DTYPE)
380
+ except Exception:
381
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.float32)
382
+ else:
383
+ inputs['pixel_values'] = inputs['pixel_values'].to(torch.float32)
384
 
385
  with torch.no_grad():
386
  output = model.generate(
 
436
 
437
  if question and question.strip():
438
  data["data"]["question"] = question.strip()
439
+
440
+ # Always attempt to include the uploaded image URL (converted) if an image path was provided
441
+ if image_path and str(image_path).strip():
442
+ # include the raw local path
443
+ data["data"]["image_local_path"] = str(image_path)
444
+ # pass empty string when no host is configured (fix_image_url treats falsy host as no conversion)
445
+ image_url_converted = fix_image_url(image_path, host=(SPACE_HOST or ""))
446
  if image_url_converted and str(image_url_converted).strip():
447
  data["data"]["image_url"] = str(image_url_converted).strip()
448
  # Add generated captions
 
489
 
490
  // Get all textareas and inputs from the page
491
  const allInputs = document.querySelectorAll('textarea, input[type="text"]');
492
+
493
  allInputs.forEach((field, index) => {
494
  const placeholder = (field.placeholder || '').toLowerCase();
495
  const value = field.value ? field.value.trim() : '';
 
662
  lines=2,
663
  info="Add keywords that will be mentioned by the 'Keywords' tone ONLY if they apply to what's visible in the image"
664
  )
665
+ # image_reference_input removed by request — we will export the actual image URL instead
 
 
 
 
 
 
666
 
667
  custom_instruction_input = gr.Textbox(
668
  placeholder="e.g., 'from instagram', 'the left girl has red hair', 'two girls kissing', 'beach setting'...",
 
721
  interactive=True,
722
  placeholder="Click the button above to generate engaging caption..."
723
  )
724
+
725
  # Casual Friend caption
726
  with gr.Row():
727
  with gr.Column(scale=4):
 
743
  interactive=True,
744
  placeholder="Click the button above to generate casual friend caption..."
745
  )
746
+
747
  # NSFW section removed - caused hallucination
748
+
749
  # Keywords caption
750
  with gr.Row():
751
  with gr.Column(scale=4):
 
767
  interactive=True,
768
  placeholder="Click the button above to generate keywords caption..."
769
  )
770
+
771
  # Body Parts Focus section removed - caused hallucination
772
+
773
  # Descriptive text removed for cleaner interface
774
+
775
  # Export functionality
776
  with gr.Row():
777
  export_btn = gr.Button(
 
808
  )
809
 
810
  # NSFW button handler removed
811
+
812
  generate_uncensored_btn.click(
813
  generate_uncensored_keywords_only,
814
  inputs=[image_input, keywords_input, custom_instruction_input],
 
817
  )
818
 
819
  # Body Parts Focus button handler removed
820
+
821
  # Individual reload buttons - using direct generation for consistency
822
  def reload_engaging_fn(image, custom_instruction):
823
  return safe_generate_caption_direct(image, "engaging", custom_instruction=custom_instruction) if image else "❌ Upload image first"
 
892
  )
893
 
894
  # Export functionality
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
  def handle_export(keywords, custom_instructions, question, engaging_caption, casual_caption, keywords_caption, qa_answer, image_path):
896
+ """Handle export and return proper file download (cross-platform, uses tempdir)"""
897
  message, file_data = export_joycaption_data(
898
  keywords, custom_instructions, question,
899
  engaging_caption, casual_caption, keywords_caption, qa_answer, image_path
900
  )
901
+
902
  if file_data:
903
  json_string, filename = file_data
904
+ # Use the OS temp directory so this works on Windows, macOS, Linux and in Spaces
905
+ base_dir = tempfile.gettempdir()
906
+ temp_file = os.path.join(base_dir, filename)
907
  with open(temp_file, 'w', encoding='utf-8') as f:
908
  f.write(json_string)
909
  return gr.update(value=message, visible=True), gr.update(value=temp_file, visible=True)
 
919
  engaging_output,
920
  friend_output,
921
  uncensored_output,
922
+ qa_output,
923
+ image_input
924
  ],
925
  outputs=[export_output, export_file]
926
  )