JS6969 commited on
Commit
9f5ffa7
·
verified ·
1 Parent(s): 1ceac14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -109
app.py CHANGED
@@ -5,7 +5,9 @@
5
  # ------------------------------
6
  # 0) Imports & environment
7
  # ------------------------------
8
- import os, io, csv, time, json, base64, re, zipfile
 
 
9
  from typing import List, Tuple, Dict, Any
10
 
11
  # Persist model caches between restarts
@@ -15,7 +17,7 @@ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
15
  import gradio as gr
16
  from PIL import Image
17
  import torch
18
- from transformers import LlavaForConditionalGeneration, AutoProcessor
19
 
20
  # Optional deps for import/export (we handle gracefully if missing)
21
  try:
@@ -23,6 +25,13 @@ try:
23
  except Exception:
24
  pd = None
25
 
 
 
 
 
 
 
 
26
  # Hugging Face Spaces GPU decorator (no-op locally)
27
  try:
28
  import spaces
@@ -37,12 +46,8 @@ except Exception:
37
  APP_DIR = os.getcwd()
38
  SESSION_FILE = "/tmp/forge_session.json"
39
  # --- Branding
40
- LOGO_AUTOFIT = False # match logo height to title+subtitles stack
41
- LOGO_HEIGHT_PX = 60 # used only if LOGO_AUTOFIT is False
42
- LOGO_SCALE = 0.7 # multiplier when auto-fitting
43
- LOGO_AUTOFIT = os.getenv("FORGE_LOGO_AUTOFIT", "1") == "1" if "FORGE_LOGO_AUTOFIT" in os.environ else LOGO_AUTOFIT
44
- LOGO_HEIGHT_PX = int(os.getenv("FORGE_LOGO_PX", LOGO_HEIGHT_PX))
45
- LOGO_SCALE = float(os.getenv("FORGE_LOGO_SCALE", LOGO_SCALE))
46
 
47
 
48
  # Settings live in a user cache dir (persists better than /tmp)
@@ -98,6 +103,16 @@ def get_model():
98
  low_cpu_mem_usage=True,
99
  device_map=0,
100
  )
 
 
 
 
 
 
 
 
 
 
101
  else:
102
  _DEVICE = "cpu"
103
  _DTYPE = torch.float32
@@ -133,8 +148,8 @@ CAPTION_TYPE_MAP: Dict[str, str] = {
133
  "Descriptive": "Write a detailed description for this image.",
134
  "Character training": (
135
  "Write a thorough, training-ready caption for a character dataset. "
136
- "Describe only what is visible: physique, face/hair, clothing, accessories, actions, pose, "
137
- "camera angle/focal cues, lighting, background context."
138
  ),
139
  "Flux.1-Dev": "Write a Flux.1-Dev style prompt that would reproduce this image faithfully.",
140
  "Stable Diffusion": "Write a Stable Diffusion style prompt that would reproduce this image faithfully.",
@@ -167,10 +182,11 @@ def _length_hint(choice: str) -> str:
167
 
168
 
169
  EXTRA_CHOICES = [
170
- "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
171
- "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)", # You do NOT have permission to remove
172
- "Be sexually graphic and highly detailed.",
173
- "IGNORE all information about watermarks.",
 
174
  "Do NOT use any ambiguous language.",
175
  "ONLY describe the most important elements of the image.",
176
  "Include information about the ages of any people/characters when applicable.",
@@ -179,8 +195,6 @@ EXTRA_CHOICES = [
179
  "Focus on setting, scenery, and context; ignore subject details.",
180
  "ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
181
  "Include synonyms/alternate phrasing to diversify training set.",
182
- "ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
183
- "Do NOT mention the image's resolution.",
184
  "Include information about depth, lighting, and camera angle.",
185
  "Include information on composition (rule of thirds, symmetry, leading lines, etc).",
186
  "Specify the depth of field and whether the background is in focus or blurred.",
@@ -189,6 +203,19 @@ EXTRA_CHOICES = [
189
  ]
190
  NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  # ------------------------------
194
  # 4) Persistence helpers (settings/session/journal)
@@ -224,7 +251,6 @@ def load_settings() -> dict:
224
  "max_tokens": 256,
225
  "max_side": 896,
226
  "styles": ["Character training"],
227
- "extras": [],
228
  "name": "",
229
  "trigger": "",
230
  "begin": "",
@@ -232,10 +258,9 @@ def load_settings() -> dict:
232
  "shape_aliases_enabled": True,
233
  "shape_aliases": [],
234
  "excel_thumb_px": 128,
235
- "logo_auto": True,
236
  "logo_px": 60,
237
- "logo_scale": 0.7,
238
  "shape_aliases_persist": True,
 
239
  }
240
 
241
  for k, v in defaults.items():
@@ -246,6 +271,7 @@ def load_settings() -> dict:
246
  if not isinstance(styles, list):
247
  styles = [styles]
248
  cfg["styles"] = [s for s in styles if s in STYLE_OPTIONS] or ["Character training"]
 
249
 
250
  return cfg
251
 
@@ -555,14 +581,6 @@ def run_batch(
555
 
556
  @gpu
557
  @torch.no_grad()
558
- def _gpu_startup_warm():
559
- try:
560
- im = Image.new("RGB", (64, 64), (127,127,127))
561
- _ = caption_once(im, "Warm up.", temp=0.0, top_p=1.0, max_tokens=8)
562
- print("[ForgeCaptions] GPU warmup complete")
563
- except Exception as e:
564
- print("[ForgeCaptions] GPU warmup skipped:", e)
565
-
566
 
567
  # ------------------------------
568
  # 9) Export/Import helpers (CSV/XLSX/TXT ZIP)
@@ -762,87 +780,33 @@ def import_captions_file(file_path: str, session_rows: List[dict]) -> Tuple[List
762
 
763
 
764
  # ------------------------------
765
- # 10) UI header helper (logo auto-fit to match title/subtitle block)
766
  # ------------------------------
767
- def _render_header_html(auto: bool, px: int, scale: float) -> str:
768
- auto_js = "true" if auto else "false"
769
  return f"""
770
  <div class="cf-hero">
771
  {logo_b64_img()}
772
  <div class="cf-text">
773
  <h1 class="cf-title">ForgeCaptions</h1>
774
- <div class="cf-sub">JoyCaption Image Captioning </div>
775
  <div class="cf-sub">Import CSV/XLSX • Export CSV/XLSX/TXT</div>
776
- <div class="cf-sub">Batch 10-20 per Zero GPU run • Larger batches with GPU</div>
777
  </div>
778
  </div>
779
  <hr>
780
  <style>
781
- .cf-logo {{ height: auto; width: auto; object-fit: contain; display:block; }}
782
- </style>
783
- <script>
784
- (function() {{
785
- const AUTO = {auto_js};
786
- const PX = {int(px)};
787
- const SCALE = {float(scale)};
788
- const MIN = 60, MAX = 100; // hard clamps
789
-
790
- function outerH(el) {{
791
- if (!el) return 0;
792
- const r = el.getBoundingClientRect();
793
- const cs = getComputedStyle(el);
794
- return r.height + parseFloat(cs.marginTop) + parseFloat(cs.marginBottom);
795
  }}
796
-
797
- function stackHeight(root) {{
798
- // Sum title + every subtitle's full box height (including margins)
799
- const title = root.querySelector('.cf-title');
800
- const subs = root.querySelectorAll('.cf-sub');
801
- let h = outerH(title);
802
- subs.forEach(s => h += outerH(s));
803
- // tiny buffer so the two columns don't look mismatched if rounding occurs
804
- return Math.round(h + 2);
805
  }}
806
-
807
- function fit() {{
808
- const logo = document.querySelector('.cf-logo');
809
- const text = document.querySelector('.cf-text');
810
- if (!logo || !text) return;
811
- if (AUTO) {{
812
- const total = stackHeight(text);
813
- const target = Math.max(MIN, Math.min(MAX, Math.round(total * SCALE)));
814
- logo.style.height = target + 'px';
815
- }} else {{
816
- logo.style.height = Math.max(MIN, Math.min(MAX, PX)) + 'px';
817
- }}
818
- }}
819
-
820
- // Re-fit at the right times
821
- const textNode = document.querySelector('.cf-text');
822
-
823
- // 1) Once fonts are ready (prevents under-measuring before webfonts load)
824
- if (document.fonts && document.fonts.ready) {{
825
- document.fonts.ready.then(() => requestAnimationFrame(fit));
826
- }}
827
-
828
- // 2) On resize
829
- window.addEventListener('resize', () => requestAnimationFrame(fit), {{ passive: true }});
830
-
831
- // 3) Whenever the text block changes size (line wrapping, content edits)
832
- if (window.ResizeObserver && textNode) {{
833
- const ro = new ResizeObserver(() => requestAnimationFrame(fit));
834
- ro.observe(textNode);
835
- }}
836
-
837
- // 4) As a fallback, run a couple times after first paint
838
- requestAnimationFrame(fit);
839
- setTimeout(fit, 100);
840
- setTimeout(fit, 400);
841
- }})();
842
- </script>
843
  """
844
 
845
-
846
  # ------------------------------
847
  # 11) UI (Blocks)
848
  # ------------------------------
@@ -867,11 +831,12 @@ BASE_CSS = """
867
 
868
  with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
869
  # Ensure Spaces sees a GPU function (without touching CUDA in main)
870
- demo.load(_gpu_startup_warm, inputs=None, outputs=None)
871
 
872
  # ---- Header
873
  settings = load_settings()
874
- header_html = gr.HTML(_render_header_html(LOGO_AUTOFIT, LOGO_HEIGHT_PX, LOGO_SCALE))
 
875
 
876
 
877
  # ---- Controls group
@@ -923,10 +888,10 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
923
  step=8, label="Excel thumbnail size (px)")
924
  # Chunking
925
  chunk_mode = gr.Radio(
926
- choices=["Auto", "Manual (all at once)", "Manual (step)"],
927
  value="Manual (step)", label="Batch mode"
928
  )
929
- chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
930
  gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
931
  no_time_limit = gr.Checkbox(value=False, label="No time limit (ignore above)")
932
 
@@ -1033,15 +998,29 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1033
  outputs=[single_caption_out]
1034
  )
1035
 
 
 
 
 
 
 
 
 
 
1036
  with gr.Tab("Batch"):
1037
  with gr.Accordion("Uploaded images", open=True):
1038
- input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
1039
- run_button = gr.Button("Caption batch", variant="primary")
1040
 
1041
- with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
1042
- import_file = gr.File(label="Choose .csv or .xlsx", file_types=[".csv", ".xlsx"], type="filepath")
1043
- import_btn = gr.Button("Import into current session")
 
 
 
 
 
1044
 
 
1045
  # ---- Results area (gallery left / table right)
1046
  rows_state = gr.State(load_session())
1047
  autosave_md = gr.Markdown("Ready.")
@@ -1049,9 +1028,9 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1049
  remaining_state = gr.State([])
1050
 
1051
  with gr.Row():
1052
- with gr.Column(scale=1):
1053
  gallery = gr.Gallery(
1054
- label="Results (image + caption)",
1055
  show_label=True,
1056
  columns=3,
1057
  elem_id="cfGal",
@@ -1059,7 +1038,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1059
  )
1060
  with gr.Column(scale=1, elem_id="cfTableWrap", elem_classes=["cf-scroll"]):
1061
  table = gr.Dataframe(
1062
- label="Editable captions (whole session)",
1063
  value=_rows_to_table(load_session()),
1064
  headers=["filename", "caption"],
1065
  interactive=True,
@@ -1156,7 +1135,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1156
  prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(remaining)}"
1157
  return new_rows, gal, tbl, stamp, remaining, panel_vis, gr.update(value=msg), gr.update(value=prog)
1158
 
1159
- # Auto / all-at-once
1160
  new_rows, gal, tbl, stamp, leftover, done, total = run_batch(
1161
  files, rows or [], instr, t, p, m, int(ms), budget
1162
  )
@@ -1168,9 +1147,21 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1168
  run_button.click(
1169
  _run_click,
1170
  inputs=[input_files, rows_state, instruction_preview, max_side, chunk_mode, chunk_size, gpu_budget, no_time_limit],
1171
- outputs=[rows_state, gallery, table, autosave_md, remaining_state, step_panel, step_msg, progress_md]
 
 
 
 
 
 
 
 
 
 
 
 
 
1172
  )
1173
-
1174
  def _step_next(remain, rows, instr, ms, csize, budget_s, no_limit):
1175
  t, p, m = _tpms()
1176
  remain = remain or []
@@ -1180,7 +1171,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1180
  return (
1181
  rows,
1182
  gr.update(value="No files remaining."),
1183
- gr.update(visible=False),
1184
  [],
1185
  [],
1186
  [],
@@ -1218,6 +1209,9 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1218
  return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
1219
  table.change(sync_table_to_session, inputs=[table, rows_state], outputs=[rows_state, gallery, autosave_md])
1220
 
 
 
 
1221
  # ---- Import hook
1222
  def _do_import(fpath, rows):
1223
  new_rows, gal, tbl, stamp = import_captions_file(fpath, rows or [])
 
5
  # ------------------------------
6
  # 0) Imports & environment
7
  # ------------------------------
8
+ import os,
9
+ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
10
+ import io, csv, time, json, base64, re, zipfile
11
  from typing import List, Tuple, Dict, Any
12
 
13
  # Persist model caches between restarts
 
17
  import gradio as gr
18
  from PIL import Image
19
  import torch
20
+ from transformers import LlavaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
21
 
22
  # Optional deps for import/export (we handle gracefully if missing)
23
  try:
 
25
  except Exception:
26
  pd = None
27
 
28
+ # Liger is optional; skip if missing
29
+ try:
30
+ from liger_kernel.transformers import apply_liger_kernel_to_llama
31
+ except Exception:
32
+ def apply_liger_kernel_to_llama(*args, **kwargs):
33
+ pass
34
+
35
  # Hugging Face Spaces GPU decorator (no-op locally)
36
  try:
37
  import spaces
 
46
  APP_DIR = os.getcwd()
47
  SESSION_FILE = "/tmp/forge_session.json"
48
  # --- Branding
49
+
50
+ LOGO_HEIGHT_PX = int(os.getenv("FORGE_LOGO_PX", 60))
 
 
 
 
51
 
52
 
53
  # Settings live in a user cache dir (persists better than /tmp)
 
103
  low_cpu_mem_usage=True,
104
  device_map=0,
105
  )
106
+ try:
107
+ from liger_kernel.transformers import apply_liger_kernel_to_llama
108
+ lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
109
+ if lm is not None:
110
+ ok = apply_liger_kernel_to_llama(lm)
111
+ print(f"[liger] enabled: {bool(ok)}")
112
+ else:
113
+ print("[liger] not enabled: LLM submodule not found")
114
+ except Exception as e:
115
+ print(f"[liger] not enabled: {e}")
116
  else:
117
  _DEVICE = "cpu"
118
  _DTYPE = torch.float32
 
148
  "Descriptive": "Write a detailed description for this image.",
149
  "Character training": (
150
  "Write a thorough, training-ready caption for a character dataset. "
151
+ "Describe subject appearance (physique, face/hair), clothing and accessories, actions/pose/gesture, camera angle/focal cues."
152
+ "If multiple subjects are present, describe each briefly (most prominent first) and distinguish them by visible traits."
153
  ),
154
  "Flux.1-Dev": "Write a Flux.1-Dev style prompt that would reproduce this image faithfully.",
155
  "Stable Diffusion": "Write a Stable Diffusion style prompt that would reproduce this image faithfully.",
 
182
 
183
 
184
  EXTRA_CHOICES = [
185
+ "Only include a character's modifiable, style-level attributes (hair style/color, makeup, clothing/accessories, pose, expression). Do NOT mention identity traits (skin tone, age, body type).",
186
+ "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, etc.).", # You do NOT have permission to remove
187
+ "Be sexually graphic and describe sexual position when visible.",
188
+ "Exclude brand names, watermarks, artist names, file/meta info, quality terms, hashtags, and resolution notes.",
189
+ "Arrange information in the order: Subject; Clothing/Accessories; Action/Pose; Setting/Environment; Lighting/Camera.",
190
  "Do NOT use any ambiguous language.",
191
  "ONLY describe the most important elements of the image.",
192
  "Include information about the ages of any people/characters when applicable.",
 
195
  "Focus on setting, scenery, and context; ignore subject details.",
196
  "ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
197
  "Include synonyms/alternate phrasing to diversify training set.",
 
 
198
  "Include information about depth, lighting, and camera angle.",
199
  "Include information on composition (rule of thirds, symmetry, leading lines, etc).",
200
  "Specify the depth of field and whether the background is in focus or blurred.",
 
203
  ]
204
  NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
205
 
206
+ # Place near EXTRA_CHOICES
207
+ DEFAULT_EXTRA_CHOICES = [
208
+ "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, etc.).",
209
+ "Be sexually graphic and describe sexual position when visible.",
210
+ "Exclude brand names, watermarks, artist names, file/meta info, quality terms, hashtags, and resolution notes.",
211
+ "Arrange information in the order: Subject; Clothing/Accessories; Action/Pose; Setting/Environment; Lighting/Camera.",
212
+ ]
213
+ ALL_EXTRA_CHOICES = [NAME_OPTION] + EXTRA_CHOICES
214
+
215
+ def _valid_extras(selected: list[str] | None) -> list[str]:
216
+ allowed = set(ALL_EXTRA_CHOICES)
217
+ return [x for x in (selected or []) if x in allowed]
218
+
219
 
220
  # ------------------------------
221
  # 4) Persistence helpers (settings/session/journal)
 
251
  "max_tokens": 256,
252
  "max_side": 896,
253
  "styles": ["Character training"],
 
254
  "name": "",
255
  "trigger": "",
256
  "begin": "",
 
258
  "shape_aliases_enabled": True,
259
  "shape_aliases": [],
260
  "excel_thumb_px": 128,
 
261
  "logo_px": 60,
 
262
  "shape_aliases_persist": True,
263
+ "extras": DEFAULT_EXTRA_CHOICES,
264
  }
265
 
266
  for k, v in defaults.items():
 
271
  if not isinstance(styles, list):
272
  styles = [styles]
273
  cfg["styles"] = [s for s in styles if s in STYLE_OPTIONS] or ["Character training"]
274
+ cfg["extras"] = _valid_extras(cfg.get("extras"))
275
 
276
  return cfg
277
 
 
581
 
582
  @gpu
583
  @torch.no_grad()
 
 
 
 
 
 
 
 
584
 
585
  # ------------------------------
586
  # 9) Export/Import helpers (CSV/XLSX/TXT ZIP)
 
780
 
781
 
782
  # ------------------------------
783
+ # 10) UI header helper (fixed logo size)
784
  # ------------------------------
785
+ def _render_header_html(px: int) -> str:
 
786
  return f"""
787
  <div class="cf-hero">
788
  {logo_b64_img()}
789
  <div class="cf-text">
790
  <h1 class="cf-title">ForgeCaptions</h1>
791
+ <div class="cf-sub">JoyCaption Image Captioning</div>
792
  <div class="cf-sub">Import CSV/XLSX • Export CSV/XLSX/TXT</div>
793
+ <div class="cf-sub">Batch 1020 per Zero GPU run • Larger batches with dedicated GPU</div>
794
  </div>
795
  </div>
796
  <hr>
797
  <style>
798
+ .cf-logo {{
799
+ height: {int(px)}px; /* fixed height */
800
+ width: auto;
801
+ object-fit: contain;
802
+ display: block;
 
 
 
 
 
 
 
 
 
803
  }}
804
+ @media (max-width: 640px) {{
805
+ .cf-logo {{ height: {max(60, int(px) - 12)}px; }} /* optional small-screen tweak */
 
 
 
 
 
 
 
806
  }}
807
+ </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  """
809
 
 
810
  # ------------------------------
811
  # 11) UI (Blocks)
812
  # ------------------------------
 
831
 
832
  with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
833
  # Ensure Spaces sees a GPU function (without touching CUDA in main)
834
+ demo.load(inputs=None, outputs=None)
835
 
836
  # ---- Header
837
  settings = load_settings()
838
+ header_html = gr.HTML(_render_header_html(LOGO_HEIGHT_PX))
839
+
840
 
841
 
842
  # ---- Controls group
 
888
  step=8, label="Excel thumbnail size (px)")
889
  # Chunking
890
  chunk_mode = gr.Radio(
891
+ choices=["Auto", "Manual (step)"],
892
  value="Manual (step)", label="Batch mode"
893
  )
894
+ chunk_size = gr.Slider(1, 200, value=15, step=1, label="Chunk size")
895
  gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
896
  no_time_limit = gr.Checkbox(value=False, label="No time limit (ignore above)")
897
 
 
998
  outputs=[single_caption_out]
999
  )
1000
 
1001
+ # with gr.Tab("Batch"):
1002
+ # with gr.Accordion("Uploaded images", open=True):
1003
+ # input_files = gr.File(label="Drop images (or click to select)", file_types=["image"], file_count="multiple", type="filepath")
1004
+ # run_button = gr.Button("Caption batch", variant="primary")
1005
+
1006
+ # with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
1007
+ # import_file = gr.File(label="Choose .csv or .xlsx", file_types=[".csv", ".xlsx"], type="filepath")
1008
+ # import_btn = gr.Button("Import into current session")
1009
+
1010
  with gr.Tab("Batch"):
1011
  with gr.Accordion("Uploaded images", open=True):
1012
+ input_files = gr.File(label="Drop images (or click to select)", file_types=["image"], file_count="multiple",)
 
1013
 
1014
+ run_button = gr.Button("Caption batch", variant="primary")
1015
+ preview_gallery = gr.Gallery(
1016
+ label="Preview (un-captioned)",
1017
+ show_label=True,
1018
+ columns=5,
1019
+ height=220,
1020
+ )
1021
+ input_files.change(on_files_changed, inputs=[input_files], outputs=[preview_gallery])
1022
 
1023
+
1024
  # ---- Results area (gallery left / table right)
1025
  rows_state = gr.State(load_session())
1026
  autosave_md = gr.Markdown("Ready.")
 
1028
  remaining_state = gr.State([])
1029
 
1030
  with gr.Row():
1031
+ with gr.Column(scale=2):
1032
  gallery = gr.Gallery(
1033
+ label="Results",
1034
  show_label=True,
1035
  columns=3,
1036
  elem_id="cfGal",
 
1038
  )
1039
  with gr.Column(scale=1, elem_id="cfTableWrap", elem_classes=["cf-scroll"]):
1040
  table = gr.Dataframe(
1041
+ label="Editable captions",
1042
  value=_rows_to_table(load_session()),
1043
  headers=["filename", "caption"],
1044
  interactive=True,
 
1135
  prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(remaining)}"
1136
  return new_rows, gal, tbl, stamp, remaining, panel_vis, gr.update(value=msg), gr.update(value=prog)
1137
 
1138
+ # Auto
1139
  new_rows, gal, tbl, stamp, leftover, done, total = run_batch(
1140
  files, rows or [], instr, t, p, m, int(ms), budget
1141
  )
 
1147
  run_button.click(
1148
  _run_click,
1149
  inputs=[input_files, rows_state, instruction_preview, max_side, chunk_mode, chunk_size, gpu_budget, no_time_limit],
1150
+ outputs=[rows_state, gallery, table, autosave_md, remaining_state, step_panel, step_msg, progress_md],
1151
+ ).then(
1152
+ lambda rows: [(Image.open(r["path"]).convert("RGB"), r["caption"]) for r in rows],
1153
+ inputs=[rows_state],
1154
+ outputs=[gallery],
1155
+ )
1156
+ table.change(
1157
+ sync_table_to_session,
1158
+ inputs=[table, rows_state],
1159
+ outputs=[rows_state, captions_text],
1160
+ ).then(
1161
+ lambda rows: [(Image.open(r["path"]).convert("RGB"), r["caption"]) for r in rows],
1162
+ inputs=[rows_state],
1163
+ outputs=[gallery],
1164
  )
 
1165
  def _step_next(remain, rows, instr, ms, csize, budget_s, no_limit):
1166
  t, p, m = _tpms()
1167
  remain = remain or []
 
1171
  return (
1172
  rows,
1173
  gr.update(value="No files remaining."),
1174
+ gr.update(visible=True),
1175
  [],
1176
  [],
1177
  [],
 
1209
  return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
1210
  table.change(sync_table_to_session, inputs=[table, rows_state], outputs=[rows_state, gallery, autosave_md])
1211
 
1212
+ def new_session() -> Tuple[List[dict], list, list, str]:
1213
+ return [], [], _rows_to_table([]), ""
1214
+
1215
  # ---- Import hook
1216
  def _do_import(fpath, rows):
1217
  new_rows, gal, tbl, stamp = import_captions_file(fpath, rows or [])