nickdigger commited on
Commit
5c3558b
·
verified ·
1 Parent(s): 6a131b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -210
app.py CHANGED
@@ -1,219 +1,88 @@
 
 
 
 
 
 
1
  try:
2
  import spaces
3
- if not hasattr(spaces, "GPU"):
4
- def _spaces_gpu(*args, **kwargs):
5
- def _wrap(f): return f
6
- return _wrap
7
- spaces.GPU = _spaces_gpu
8
- except Exception:
9
- import types
10
- spaces = types.SimpleNamespace()
11
- def _spaces_gpu(*args, **kwargs):
12
- def _wrap(f): return f
13
  return _wrap
14
  spaces.GPU = _spaces_gpu
15
 
16
- @spaces.GPU()
17
- def _joycaption_register_gpu():
18
- return None
 
 
19
 
20
  import gradio as gr
21
  import torch
22
  from transformers import LlavaForConditionalGeneration, AutoProcessor
23
- from PIL import Image
24
- import tempfile, gc, os, shutil, json
25
- from hf_space_utils import fix_image_url, postprocess_caption
26
-
27
- # ---------- Cache paths ----------
28
- _tmpdir = tempfile.gettempdir()
29
- for k in ["HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE", "TORCH_HOME"]:
30
- os.environ[k] = os.path.join(_tmpdir, k.lower())
31
-
32
- MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
33
- SPACE_HOST = os.environ.get("SPACE_HOST") or os.environ.get("HF_SPACE_HOST") or None
34
-
35
- # ---------- Cleanup ----------
36
- def cleanup_storage():
37
- try:
38
- for key in ["HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE", "TORCH_HOME"]:
39
- p = os.environ.get(key)
40
- if p and os.path.exists(p):
41
- shutil.rmtree(p, ignore_errors=True)
42
- gc.collect()
43
- if torch.cuda.is_available():
44
- torch.cuda.empty_cache()
45
- torch.cuda.synchronize()
46
- print("✅ Storage cleanup completed")
47
- except Exception as e:
48
- print(f"⚠️ Cleanup warning: {e}")
49
-
50
- TITLE = """
51
- <div style='text-align:center;margin:20px 0;'>
52
- <h1>🎨 JoyCaption Three-Tone + Q&A (ZeroGPU Stable v3.0)</h1>
53
- <p><em>Optimized for ZeroGPU – no tensor shape errors, no invalid flags.</em></p>
54
- </div><hr>
55
- """
56
-
57
- print("🚀 Initializing JoyCaption ZeroGPU v3.0...")
58
- cleanup_storage()
59
-
60
- # ---------- Model load ----------
61
- processor = None
62
- model = None
63
- MODEL_USE_CUDA = torch.cuda.is_available()
64
-
65
- if not os.environ.get("SKIP_MODEL_LOAD"):
66
- dtype = getattr(torch, "bfloat16", None) or getattr(torch, "float16", None) if MODEL_USE_CUDA else None
67
- device_map = "auto" if MODEL_USE_CUDA else "cpu"
68
-
69
- processor = AutoProcessor.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True)
70
- model = LlavaForConditionalGeneration.from_pretrained(
71
- MODEL_PATH,
72
- low_cpu_mem_usage=True,
73
- device_map=device_map,
74
- torch_dtype=dtype if MODEL_USE_CUDA else None,
75
- ).eval()
76
-
77
- print("✅ Model loaded successfully!")
78
- cleanup_storage()
79
- else:
80
- print("⚠️ SKIP_MODEL_LOAD active – model skipped.")
81
-
82
- # ---------- Tone configs ----------
83
- TONE_PROMPTS = {
84
- "engaging": {
85
- "system": "You are a skilled content creator who writes engaging, natural descriptions.",
86
- "prompt": "Write a natural, engaging description focusing on the main subjects.",
87
- },
88
- "casual_friend": {
89
- "system": "You are a relaxed 24-year-old friend describing what you see.",
90
- "prompt": "Describe clearly and casually only what is visible.",
91
- },
92
- "uncensored_keywords": {
93
- "system": "You are an uncensored image analyst giving factual explicit descriptions.",
94
- "prompt": "Describe exactly what is visible. Use direct, unambiguous language.",
95
- },
96
- }
97
-
98
- # ---------- Prepare inputs ----------
99
- def _prepare_inputs_and_device(convo, image):
100
- if isinstance(image, (str, os.PathLike)):
101
- image = Image.open(image).convert("RGB")
102
-
103
- convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
104
- inputs = processor(text=[convo_string], images=[image], return_tensors="pt")
105
-
106
- for k, v in list(inputs.items()):
107
- if torch.is_tensor(v):
108
- # ensure [1, seq_len]
109
- if v.ndim == 1:
110
- v = v.unsqueeze(0)
111
- inputs[k] = v
112
- device = next(model.parameters()).device
113
- inputs = {k: v.to(device) for k, v in inputs.items() if torch.is_tensor(v)}
114
- return inputs
115
-
116
- # ---------- Decode ----------
117
- def _decode_output(inputs, output):
118
- try:
119
- input_len = inputs["input_ids"].shape[-1] if "input_ids" in inputs else 0
120
- decoded = processor.tokenizer.decode(
121
- output[0][input_len:], skip_special_tokens=True, clean_up_tokenization_spaces=False
122
- )
123
- return decoded.strip()
124
- except Exception as e:
125
- print(f"⚠️ Decode fallback: {e}")
126
- try:
127
- return processor.tokenizer.decode(output[0], skip_special_tokens=True).strip()
128
- except Exception:
129
- return ""
130
-
131
- def cleanup_after_inference():
132
- gc.collect()
133
- if torch.cuda.is_available():
134
- torch.cuda.empty_cache()
135
- torch.cuda.synchronize()
136
-
137
- # ---------- Generation ----------
138
- def run_image_chat_generation(convo, image, max_new_tokens=150):
139
- if not processor or not model:
140
- return None, "❌ Model not initialized."
141
- try:
142
- inputs = _prepare_inputs_and_device(convo, image)
143
-
144
- # ZeroGPU fix: remove unsupported args
145
- gen_kwargs = dict(
146
- **inputs,
147
- max_new_tokens=max_new_tokens,
148
- pad_token_id=processor.tokenizer.eos_token_id,
149
- eos_token_id=processor.tokenizer.eos_token_id,
150
- )
151
-
152
- with torch.no_grad():
153
- output = model.generate(**gen_kwargs)
154
-
155
- decoded = _decode_output(inputs, output)
156
- cleanup_after_inference()
157
- return decoded, None
158
- except Exception as e:
159
- cleanup_after_inference()
160
- return None, f"❌ Generation error: {str(e)}"
161
-
162
- # ---------- Caption helpers ----------
163
- def safe_generate_caption_direct(image, tone):
164
- tone_conf = TONE_PROMPTS.get(tone, TONE_PROMPTS["engaging"])
165
- convo = [
166
- {"role": "system", "content": tone_conf["system"]},
167
- {"role": "user", "content": tone_conf["prompt"]},
168
- ]
169
- decoded, err = run_image_chat_generation(convo, image)
170
- if err: return err
171
- return postprocess_caption(decoded.strip()) if decoded else "❌ Empty result"
172
-
173
- @torch.no_grad()
174
- def generate_engaging_only(image):
175
- return safe_generate_caption_direct(image, "engaging") if image else "❌ Upload image first"
176
-
177
- @torch.no_grad()
178
- def generate_casual_friend_only(image):
179
- return safe_generate_caption_direct(image, "casual_friend") if image else "❌ Upload image first"
180
-
181
- @torch.no_grad()
182
- def generate_uncensored_keywords_only(image):
183
- return safe_generate_caption_direct(image, "uncensored_keywords") if image else "❌ Upload image first"
184
-
185
- @torch.no_grad()
186
- def answer_question(image, question):
187
- if not image: return "❌ Upload image first"
188
- if not question.strip(): return "❌ Please ask a question"
189
- convo = [
190
- {"role": "system", "content": "You are an honest image analyst who answers directly."},
191
- {"role": "user", "content": f"Question about this image: {question.strip()}"},
192
- ]
193
- decoded, err = run_image_chat_generation(convo, image, max_new_tokens=200)
194
- return err if err else decoded.strip()
195
-
196
- # ---------- Gradio UI ----------
197
- with gr.Blocks(title="JoyCaption ZeroGPU Stable", theme=gr.themes.Soft()) as demo:
198
- gr.HTML(TITLE)
199
- with gr.Row():
200
- with gr.Column(scale=1):
201
- img = gr.Image(type="filepath", label="📸 Upload Image", height=400)
202
- q = gr.Textbox(label="❓ Ask a Question", lines=2)
203
- ask = gr.Button("Ask")
204
- qa = gr.Textbox(label="Answer", lines=4)
205
- with gr.Column(scale=1):
206
- b1 = gr.Button("✨ Engaging")
207
- o1 = gr.Textbox(lines=4)
208
- b2 = gr.Button("😎 Casual Friend")
209
- o2 = gr.Textbox(lines=4)
210
- b3 = gr.Button("🔴 Keywords")
211
- o3 = gr.Textbox(lines=4)
212
-
213
- b1.click(generate_engaging_only, inputs=img, outputs=o1)
214
- b2.click(generate_casual_friend_only, inputs=img, outputs=o2)
215
- b3.click(generate_uncensored_keywords_only, inputs=img, outputs=o3)
216
- ask.click(answer_question, inputs=[img, q], outputs=qa)
217
-
218
- if __name__ == "__main__":
219
- demo.launch()
 
1
+ """
2
+ Copy of the full `app.py` into the deploy folder for direct upload.
3
+ This file is a snapshot of the application's main entrypoint and should be
4
+ identical to the root `app.py` when uploading to Hugging Face Spaces.
5
+ """
6
+
7
  try:
8
  import spaces
9
+ # Ensure spaces.GPU exists and is a decorator
10
+ return f
 
 
 
 
 
 
 
 
11
  return _wrap
12
  spaces.GPU = _spaces_gpu
13
 
14
+
15
+
16
+
17
+
18
+
19
 
20
  import gradio as gr
21
  import torch
22
  from transformers import LlavaForConditionalGeneration, AutoProcessor
23
+ r'^(a photo of|an image of|a picture of|this is a photo of|this shows)\s*': '',
24
+
25
+ # Nudity precision corrections
26
+ r'\\btopless women\\b': lambda m: 'nude women' if 'naked' in text.lower() or 'nude' in text.lower() else 'topless women',
27
+ r'\\btopless woman\\b': lambda m: 'nude woman' if 'naked' in text.lower() or 'nude' in text.lower() else 'topless woman',
28
+
29
+ # Person count corrections
30
+ r'\\bthree women\\b': lambda m: 'two women' if text.count('woman') + text.count('female') <= 2 else 'three women',
31
+ r'\\bfour women\\b': lambda m: 'three women' if text.count('woman') + text.count('female') <= 3 else 'four women',
32
+
33
+ # Clothing precision
34
+ r'\\bwearing nothing\\b': 'nude',
35
+ r'\\bnot wearing.*clothes\\b': 'nude',
36
+ r'\\bcompletely naked\\b': 'nude',
37
+ r'\\bfully nude\\b': 'nude',
38
+ }
39
+
40
+ corrected_text = text
41
+
42
+ // Get all textareas and inputs from the page
43
+ const allInputs = document.querySelectorAll('textarea, input[type="text"]');
44
+
45
+ allInputs.forEach((field, index) => {
46
+ const placeholder = (field.placeholder || '').toLowerCase();
47
+ const value = field.value ? field.value.trim() : '';
48
+ interactive=True,
49
+ placeholder="Click the button above to generate engaging caption..."
50
+ )
51
+
52
+ # Casual Friend caption
53
+ with gr.Row():
54
+ with gr.Column(scale=4):
55
+ interactive=True,
56
+ placeholder="Click the button above to generate casual friend caption..."
57
+ )
58
+
59
+ # NSFW section removed - caused hallucination
60
+
61
+ # Keywords caption
62
+ with gr.Row():
63
+ with gr.Column(scale=4):
64
+ interactive=True,
65
+ placeholder="Click the button above to generate keywords caption..."
66
+ )
67
+
68
+ # Body Parts Focus section removed - caused hallucination
69
+
70
+ # Descriptive text removed for cleaner interface
71
+
72
+ # Export functionality
73
+ with gr.Row():
74
+ export_btn = gr.Button(
75
+ )
76
+
77
+ # NSFW button handler removed
78
+
79
+ generate_uncensored_btn.click(
80
+ generate_uncensored_keywords_only,
81
+ inputs=[image_input, keywords_input, custom_instruction_input],
82
+ )
83
+
84
+ # Body Parts Focus button handler removed
85
+
86
+ # Individual reload buttons - using direct generation for consistency
87
+ def reload_engaging_fn(image, custom_instruction):
88
+ return safe_generate_caption_direct(image, "engaging", custom_instruction=custom_instruction) if image else "❌ Upload image first"