JS6969 commited on
Commit
6231519
·
verified ·
1 Parent(s): 57c4398

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -36
app.py CHANGED
@@ -1,7 +1,9 @@
1
  import os, io, csv, time, json, hashlib, base64, zipfile, re
2
  from typing import List, Tuple, Dict, Any
3
 
4
- # Caches
 
 
5
  os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
6
  os.makedirs(os.environ["HF_HOME"], exist_ok=True)
7
 
@@ -10,8 +12,16 @@ from PIL import Image
10
  import torch
11
  from transformers import LlavaForConditionalGeneration, AutoProcessor
12
 
 
 
 
 
 
 
 
 
13
  # ────────────────────────────────────────────────────────
14
- # Paths & caches
15
  # ────────────────────────────────────────────────────────
16
  APP_DIR = os.getcwd()
17
  SESSION_FILE = "/tmp/session.json"
@@ -34,8 +44,8 @@ def _detect_gpu():
34
  return "cpu", 0, "CPU"
35
 
36
  BACKEND, VRAM_GB, GPU_NAME = _detect_gpu()
37
- DTYPE = torch.bfloat16 if BACKEND == "cuda" else torch.float32
38
  DEVICE = "cuda" if BACKEND == "cuda" else "cpu"
 
39
  MAX_SIDE_CAP = 1024 if BACKEND == "cuda" else 640
40
 
41
  processor = AutoProcessor.from_pretrained(MODEL_PATH)
@@ -66,8 +76,8 @@ STYLE_OPTIONS = [
66
  ]
67
 
68
  CAPTION_TYPE_MAP = {
69
- "Descriptive (short)": "Write a short describition of the most important visible elements only. No speculation.",
70
- "Descriptive (long)": "Write a long, highly detailed description for this image.",
71
 
72
  "Character training (short)": (
73
  "Output a concise, prompt-like caption for character LoRA/ID training. "
@@ -103,7 +113,7 @@ CAPTION_TYPE_MAP = {
103
 
104
  EXTRA_CHOICES = [
105
  "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
106
- "IGNORE all watermarks.",
107
  "Do NOT use any ambiguous language.",
108
  "ONLY describe the most important elements of the image.",
109
  "Include information about the ages of any people/characters when applicable.",
@@ -124,7 +134,7 @@ EXTRA_CHOICES = [
124
  NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
125
 
126
  # ────────────────────────────────────────────────────────
127
- # Helpers (hashing, thumbs, resize)
128
  # ────────────────────────────────────────────────────────
129
  def ensure_thumb(path: str, max_side=256) -> str:
130
  try:
@@ -150,6 +160,17 @@ def resize_for_model(im: Image.Image, max_side: int) -> Image.Image:
150
  s = max_side / max(w, h)
151
  return im.resize((int(w*s), int(h*s)), Image.LANCZOS)
152
 
 
 
 
 
 
 
 
 
 
 
 
153
  # ────────────────────────────────────────────────────────
154
  # Instruction + caption helpers
155
  # ────────────────────────────────────────────────────────
@@ -163,17 +184,6 @@ def final_instruction(style_list: List[str], extra_opts: List[str], name_value:
163
  core = core.replace("{name}", (name_value or "{NAME}").strip())
164
  return core
165
 
166
- def apply_prefix_suffix(caption: str, trigger_word: str, begin_text: str, end_text: str) -> str:
167
- parts = []
168
- if trigger_word.strip():
169
- parts.append(trigger_word.strip())
170
- if begin_text.strip():
171
- parts.append(begin_text.strip())
172
- parts.append(caption.strip())
173
- if end_text.strip():
174
- parts.append(end_text.strip())
175
- return " ".join([p for p in parts if p])
176
-
177
  @torch.no_grad()
178
  def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
179
  # Your requested role script:
@@ -190,6 +200,7 @@ def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tok
190
  do_sample=temp > 0,
191
  temperature=temp if temp > 0 else None,
192
  top_p=top_p if temp > 0 else None,
 
193
  )
194
  gen_ids = out[0, inputs["input_ids"].shape[1]:]
195
  return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
@@ -231,12 +242,11 @@ def load_settings() -> dict:
231
  "end": "",
232
  "shape_aliases_enabled": True,
233
  "shape_aliases": [],
234
- "excel_thumb_px": 128, # new default
235
  }
236
  for k, v in defaults.items():
237
  cfg.setdefault(k, v)
238
 
239
- # migrate legacy names
240
  legacy_map = {
241
  "Descriptive": "Descriptive (short)",
242
  "LoRA (Flux_D Realism)": "LoRA (Flux_D Realism) (short)",
@@ -325,7 +335,7 @@ def save_shape_alias_rows(enabled, df_rows):
325
  )
326
 
327
  # ────────────────────────────────────────────────────────
328
- # Import / Export helpers
329
  # ────────────────────────────────────────────────────────
330
  def export_csv_from_table(table_value: Any) -> str:
331
  data = table_value or []
@@ -377,7 +387,7 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
377
  ws.column_dimensions["B"].width = 42
378
  ws.column_dimensions["C"].width = 100
379
 
380
- # Approx px→points (Excel row height is points; ~0.75 pt per px @ 96dpi)
381
  row_h = int(thumb_px * 0.75)
382
 
383
  r_i = 2
@@ -417,8 +427,9 @@ def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
417
  return new
418
 
419
  # ────────────────────────────────────────────────────────
420
- # Batch captioning (returns rows, gallery, table)
421
  # ────────────────────────────────────────────────────────
 
422
  @torch.no_grad()
423
  def run_batch(
424
  files: List[Any],
@@ -475,6 +486,17 @@ def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[L
475
  ]
476
  return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
477
 
 
 
 
 
 
 
 
 
 
 
 
478
  # ────────────────────────────────────────────────────────
479
  # UI
480
  # ────────────────────────────────────────────────────────
@@ -501,7 +523,7 @@ BASE_CSS = """
501
  def logo_b64_img() -> str:
502
  candidates = [
503
  os.path.join(APP_DIR, "forgecaptions-logo.png"),
504
- os.path.join(APP_DIR, "captionforge-logo.png"), # fallback if you kept the old name
505
  "/home/user/app/forgecaptions-logo.png",
506
  "forgecaptions-logo.png",
507
  "captionforge-logo.png",
@@ -514,6 +536,9 @@ def logo_b64_img() -> str:
514
  return ""
515
 
516
  with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
 
 
 
517
  settings = load_settings()
518
  settings["styles"] = [s for s in settings.get("styles", []) if s in STYLE_OPTIONS] or ["Character training (short)"]
519
 
@@ -556,9 +581,9 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
556
  dataset_name = gr.Textbox(label="Dataset name (used for export file titles)", value=settings.get("dataset_name", "forgecaptions"))
557
  max_side = gr.Slider(256, MAX_SIDE_CAP, settings.get("max_side", min(896, MAX_SIDE_CAP)), step=32, label="Max side (resize)")
558
  excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128), step=8, label="Excel thumbnail size (px)")
559
- gr.Markdown("Generation (saved in settings): temperature 0.6 • top-p 0.9 • max_tokens 256")
560
 
561
- # Auto-refresh instruction & persist key controls
562
  def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
563
  instr = final_instruction(styles or ["Character training (short)"], extra or [], name_value)
564
  cfg = load_settings()
@@ -580,11 +605,10 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
580
  outputs=[instruction_preview]
581
  )
582
 
583
- # Set initial instruction text on load
584
  demo.load(lambda s,e,n: final_instruction(s or ["Character training (short)"], e or [], n),
585
  inputs=[style_checks, extra_opts, name_input], outputs=[instruction_preview])
586
 
587
- # ===== Shape Aliases
588
  with gr.Accordion("Shape Aliases", open=False):
589
  gr.Markdown(
590
  "### 🔷 Shape Aliases\n"
@@ -616,7 +640,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
616
  clear_btn.click(_clear_rows, outputs=[alias_table])
617
  save_btn.click(save_shape_alias_rows, inputs=[enable_aliases, alias_table], outputs=[save_status, alias_table])
618
 
619
- # ===== Tabs: Single & Batch (keeps gallery/table position below)
620
  with gr.Tabs():
621
  with gr.Tab("Single"):
622
  input_image_single = gr.Image(type="pil", label="Input Image", height=512, width=512)
@@ -647,7 +671,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
647
  input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
648
  run_button = gr.Button("Caption batch", variant="primary")
649
 
650
- # ===== Results/Table (position unchanged)
651
  rows_state = gr.State(load_session())
652
  autosave_md = gr.Markdown("Ready.")
653
 
@@ -764,14 +788,12 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
764
  inputs=[table, rows_state, excel_thumb_px], outputs=[xlsx_file, xlsx_file]
765
  )
766
 
767
- # Launch
768
  if __name__ == "__main__":
769
  demo.queue(max_size=64).launch(
770
  server_name="0.0.0.0",
771
  server_port=int(os.getenv("PORT", "7860")),
772
- ssr_mode=False, # turn off experimental SSR
773
- debug=True, # log stack traces
774
- show_error=True # show UI error boxes
775
- # share=True # only for local dev; not needed on Spaces
776
  )
777
-
 
1
  import os, io, csv, time, json, hashlib, base64, zipfile, re
2
  from typing import List, Tuple, Dict, Any
3
 
4
+ # ────────────────────────────────────────────────────────
5
+ # Cache locations (kept simple / persistent)
6
+ # ────────────────────────────────────────────────────────
7
  os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
8
  os.makedirs(os.environ["HF_HOME"], exist_ok=True)
9
 
 
12
  import torch
13
  from transformers import LlavaForConditionalGeneration, AutoProcessor
14
 
15
+ # Try to import spaces and define a GPU decorator that works on CPU too
16
+ try:
17
+ import spaces
18
+ gpu = spaces.GPU()
19
+ except Exception:
20
+ def gpu(f): # no-op on CPU / local
21
+ return f
22
+
23
  # ────────────────────────────────────────────────────────
24
+ # Paths & files
25
  # ────────────────────────────────────────────────────────
26
  APP_DIR = os.getcwd()
27
  SESSION_FILE = "/tmp/session.json"
 
44
  return "cpu", 0, "CPU"
45
 
46
  BACKEND, VRAM_GB, GPU_NAME = _detect_gpu()
 
47
  DEVICE = "cuda" if BACKEND == "cuda" else "cpu"
48
+ DTYPE = torch.bfloat16 if BACKEND == "cuda" else torch.float32
49
  MAX_SIDE_CAP = 1024 if BACKEND == "cuda" else 640
50
 
51
  processor = AutoProcessor.from_pretrained(MODEL_PATH)
 
76
  ]
77
 
78
  CAPTION_TYPE_MAP = {
79
+ "Descriptive (short)": "One sentence (≤25 words) describing the most important visible elements only. No speculation.",
80
+ "Descriptive (long)": "Write a detailed description for this image.",
81
 
82
  "Character training (short)": (
83
  "Output a concise, prompt-like caption for character LoRA/ID training. "
 
113
 
114
  EXTRA_CHOICES = [
115
  "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
116
+ "Do NOT include information about whether there is a watermark or not.",
117
  "Do NOT use any ambiguous language.",
118
  "ONLY describe the most important elements of the image.",
119
  "Include information about the ages of any people/characters when applicable.",
 
134
  NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
135
 
136
  # ────────────────────────────────────────────────────────
137
+ # Helpers (thumbs, resize, prefix/suffix)
138
  # ────────────────────────────────────────────────────────
139
  def ensure_thumb(path: str, max_side=256) -> str:
140
  try:
 
160
  s = max_side / max(w, h)
161
  return im.resize((int(w*s), int(h*s)), Image.LANCZOS)
162
 
163
+ def apply_prefix_suffix(caption: str, trigger_word: str, begin_text: str, end_text: str) -> str:
164
+ parts = []
165
+ if trigger_word.strip():
166
+ parts.append(trigger_word.strip())
167
+ if begin_text.strip():
168
+ parts.append(begin_text.strip())
169
+ parts.append(caption.strip())
170
+ if end_text.strip():
171
+ parts.append(end_text.strip())
172
+ return " ".join([p for p in parts if p])
173
+
174
  # ────────────────────────────────────────────────────────
175
  # Instruction + caption helpers
176
  # ────────────────────────────────────────────────────────
 
184
  core = core.replace("{name}", (name_value or "{NAME}").strip())
185
  return core
186
 
 
 
 
 
 
 
 
 
 
 
 
187
  @torch.no_grad()
188
  def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
189
  # Your requested role script:
 
200
  do_sample=temp > 0,
201
  temperature=temp if temp > 0 else None,
202
  top_p=top_p if temp > 0 else None,
203
+ use_cache=True,
204
  )
205
  gen_ids = out[0, inputs["input_ids"].shape[1]:]
206
  return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
 
242
  "end": "",
243
  "shape_aliases_enabled": True,
244
  "shape_aliases": [],
245
+ "excel_thumb_px": 128,
246
  }
247
  for k, v in defaults.items():
248
  cfg.setdefault(k, v)
249
 
 
250
  legacy_map = {
251
  "Descriptive": "Descriptive (short)",
252
  "LoRA (Flux_D Realism)": "LoRA (Flux_D Realism) (short)",
 
335
  )
336
 
337
  # ────────────────────────────────────────────────────────
338
+ # Exports
339
  # ────────────────────────────────────────────────────────
340
  def export_csv_from_table(table_value: Any) -> str:
341
  data = table_value or []
 
387
  ws.column_dimensions["B"].width = 42
388
  ws.column_dimensions["C"].width = 100
389
 
390
+ # px→points (~0.75 pt per screen px @ ~96dpi)
391
  row_h = int(thumb_px * 0.75)
392
 
393
  r_i = 2
 
427
  return new
428
 
429
  # ────────────────────────────────────────────────────────
430
+ # Batch captioning (GPU) + sync
431
  # ────────────────────────────────────────────────────────
432
+ @gpu
433
  @torch.no_grad()
434
  def run_batch(
435
  files: List[Any],
 
486
  ]
487
  return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
488
 
489
+ # Tiny GPU warmup for HF Spaces detection
490
+ @gpu
491
+ @torch.no_grad()
492
+ def _gpu_startup_warm():
493
+ try:
494
+ im = Image.new("RGB", (64, 64), (127, 127, 127))
495
+ _ = caption_once(im, "Warm up.", temp=0.0, top_p=1.0, max_tokens=8)
496
+ print("[ForgeCaptions] GPU warmup complete")
497
+ except Exception as e:
498
+ print("[ForgeCaptions] GPU warmup skipped:", e)
499
+
500
  # ────────────────────────────────────────────────────────
501
  # UI
502
  # ────────────────────────────────────────────────────────
 
523
  def logo_b64_img() -> str:
524
  candidates = [
525
  os.path.join(APP_DIR, "forgecaptions-logo.png"),
526
+ os.path.join(APP_DIR, "captionforge-logo.png"),
527
  "/home/user/app/forgecaptions-logo.png",
528
  "forgecaptions-logo.png",
529
  "captionforge-logo.png",
 
536
  return ""
537
 
538
  with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
539
+ # Ensure HF GPU detection runs once UI starts
540
+ demo.load(_gpu_startup_warm, inputs=None, outputs=None)
541
+
542
  settings = load_settings()
543
  settings["styles"] = [s for s in settings.get("styles", []) if s in STYLE_OPTIONS] or ["Character training (short)"]
544
 
 
581
  dataset_name = gr.Textbox(label="Dataset name (used for export file titles)", value=settings.get("dataset_name", "forgecaptions"))
582
  max_side = gr.Slider(256, MAX_SIDE_CAP, settings.get("max_side", min(896, MAX_SIDE_CAP)), step=32, label="Max side (resize)")
583
  excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128), step=8, label="Excel thumbnail size (px)")
584
+ gr.Markdown("Generation (settings): temperature 0.6 • top-p 0.9 • max_tokens 256")
585
 
586
+ # Persist options + live instruction
587
  def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
588
  instr = final_instruction(styles or ["Character training (short)"], extra or [], name_value)
589
  cfg = load_settings()
 
605
  outputs=[instruction_preview]
606
  )
607
 
 
608
  demo.load(lambda s,e,n: final_instruction(s or ["Character training (short)"], e or [], n),
609
  inputs=[style_checks, extra_opts, name_input], outputs=[instruction_preview])
610
 
611
+ # ===== Shape Aliases (improved UX: add row / clear / save)
612
  with gr.Accordion("Shape Aliases", open=False):
613
  gr.Markdown(
614
  "### 🔷 Shape Aliases\n"
 
640
  clear_btn.click(_clear_rows, outputs=[alias_table])
641
  save_btn.click(save_shape_alias_rows, inputs=[enable_aliases, alias_table], outputs=[save_status, alias_table])
642
 
643
+ # ===== Tabs (Single + Batch)
644
  with gr.Tabs():
645
  with gr.Tab("Single"):
646
  input_image_single = gr.Image(type="pil", label="Input Image", height=512, width=512)
 
671
  input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
672
  run_button = gr.Button("Caption batch", variant="primary")
673
 
674
+ # ===== Results + Table (kept in the same place)
675
  rows_state = gr.State(load_session())
676
  autosave_md = gr.Markdown("Ready.")
677
 
 
788
  inputs=[table, rows_state, excel_thumb_px], outputs=[xlsx_file, xlsx_file]
789
  )
790
 
791
+ # Launch (disable experimental SSR to reduce churn)
792
  if __name__ == "__main__":
793
  demo.queue(max_size=64).launch(
794
  server_name="0.0.0.0",
795
  server_port=int(os.getenv("PORT", "7860")),
796
+ ssr_mode=False,
797
+ debug=True,
798
+ show_error=True,
 
799
  )