Mayo commited on
Commit
f05100a
·
unverified ·
1 Parent(s): f3a6ee8

fix: inpainting OOM & pipeline error handling

Browse files
koharu-app/bin/pipeline.rs CHANGED
@@ -235,6 +235,16 @@ async fn run() -> Result<()> {
235
  let ensure_translation_fallback = !cli.with_translate;
236
 
237
  let cancel = Arc::new(AtomicBool::new(false));
 
 
 
 
 
 
 
 
 
 
238
  let result = koharu_app::pipeline::run(
239
  session.clone(),
240
  app.registry.clone(),
@@ -245,11 +255,16 @@ async fn run() -> Result<()> {
245
  spec,
246
  cancel,
247
  Some(progress_sink),
 
248
  )
249
  .await;
250
 
251
  match &result {
252
- Ok(()) => eprintln!("=> pipeline succeeded"),
 
 
 
 
253
  Err(e) => eprintln!("=> pipeline failed: {e:#}"),
254
  }
255
 
@@ -261,7 +276,7 @@ async fn run() -> Result<()> {
261
  .with_context(|| format!("dump artifacts to {}", cli.output_dir.display()))?;
262
 
263
  app.close_project().await.ok();
264
- result
265
  }
266
 
267
  /// Load `AppConfig` from TOML at `path` or default.
 
235
  let ensure_translation_fallback = !cli.with_translate;
236
 
237
  let cancel = Arc::new(AtomicBool::new(false));
238
+ let warning_sink: koharu_app::pipeline::WarningSink =
239
+ Arc::new(|tick: koharu_app::pipeline::WarningTick| {
240
+ eprintln!(
241
+ "warn: step '{}' failed on page {}/{}: {}",
242
+ tick.step_id,
243
+ tick.page_index + 1,
244
+ tick.total_pages,
245
+ tick.message
246
+ );
247
+ });
248
  let result = koharu_app::pipeline::run(
249
  session.clone(),
250
  app.registry.clone(),
 
255
  spec,
256
  cancel,
257
  Some(progress_sink),
258
+ Some(warning_sink),
259
  )
260
  .await;
261
 
262
  match &result {
263
+ Ok(outcome) if outcome.warning_count == 0 => eprintln!("=> pipeline succeeded"),
264
+ Ok(outcome) => eprintln!(
265
+ "=> pipeline finished with {} failed step(s)",
266
+ outcome.warning_count
267
+ ),
268
  Err(e) => eprintln!("=> pipeline failed: {e:#}"),
269
  }
270
 
 
276
  .with_context(|| format!("dump artifacts to {}", cli.output_dir.display()))?;
277
 
278
  app.close_project().await.ok();
279
+ result.map(|_| ())
280
  }
281
 
282
  /// Load `AppConfig` from TOML at `path` or default.
koharu-app/src/pipeline/engines/aot.rs CHANGED
@@ -1,14 +1,15 @@
1
- //! AOT inpainting. Simpler than Lama: direct source + segment → result.
 
 
2
  //!
3
- //! With `ctx.options.region`, composites onto the existing `Image { Inpainted }`
4
- //! (falling back to Source) so repair-brush strokes only affect the touched
5
- //! area. AOT inference has no blockwise overload, so we crop the base image
6
- //! and mask to the region, inpaint the crop, and paste back.
7
 
8
  use anyhow::{Result, anyhow};
9
  use async_trait::async_trait;
10
- use image::{DynamicImage, GenericImage, GenericImageView};
11
- use koharu_core::{ImageRole, MaskRole, Op};
12
  use koharu_ml::aot_inpainting::AotInpainting;
13
 
14
  use crate::pipeline::artifacts::Artifact;
@@ -26,31 +27,22 @@ impl Engine for Model {
26
  .ok_or_else(|| anyhow!("no Segment mask on page"))?;
27
  let mask = ctx.blobs.load_image(&mask_ref)?;
28
 
29
- let result = match ctx.options.region {
30
- None => {
31
- let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
32
- self.0.inference(&image, &mask)?
33
- }
34
  Some(r) => {
35
  let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
36
  Some((_, blob)) => ctx.blobs.load_image(&blob)?,
37
  None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
38
  };
39
- let (w, h) = base.dimensions();
40
- let x0 = r.x.min(w.saturating_sub(1));
41
- let y0 = r.y.min(h.saturating_sub(1));
42
- let rw = r.width.min(w - x0).max(1);
43
- let rh = r.height.min(h - y0).max(1);
44
- let image_crop = DynamicImage::ImageRgba8(base.view(x0, y0, rw, rh).to_image());
45
- let mask_crop =
46
- DynamicImage::ImageLuma8(mask.to_luma8().view(x0, y0, rw, rh).to_image());
47
- let patched = self.0.inference(&image_crop, &mask_crop)?;
48
- let mut out = base;
49
- out.copy_from(&patched, x0, y0)?;
50
- out
51
  }
52
  };
53
 
 
54
  let (w, h) = image_dimensions(&result);
55
  let blob = ctx.blobs.put_webp(&result)?;
56
  Ok(vec![upsert_image_blob(
@@ -64,6 +56,23 @@ impl Engine for Model {
64
  }
65
  }
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  inventory::submit! {
68
  EngineInfo {
69
  id: "aot-inpainting",
 
1
+ //! AOT inpainting. Direct source + segment → result. Subdivision is handled
2
+ //! by [`koharu_ml::inpainting::run_inpaint`] (shared with Lama) — this engine
3
+ //! only wires up the scene I/O.
4
  //!
5
+ //! For repair-brush (`ctx.options.region`), composite onto the existing
6
+ //! `Image { Inpainted }` if present (fallback Source) and zero out mask
7
+ //! pixels outside the region so only that area is reprocessed.
 
8
 
9
  use anyhow::{Result, anyhow};
10
  use async_trait::async_trait;
11
+ use image::{DynamicImage, GrayImage, Luma};
12
+ use koharu_core::{ImageRole, MaskRole, Op, Region};
13
  use koharu_ml::aot_inpainting::AotInpainting;
14
 
15
  use crate::pipeline::artifacts::Artifact;
 
27
  .ok_or_else(|| anyhow!("no Segment mask on page"))?;
28
  let mask = ctx.blobs.load_image(&mask_ref)?;
29
 
30
+ let (image, mask) = match ctx.options.region {
 
 
 
 
31
  Some(r) => {
32
  let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
33
  Some((_, blob)) => ctx.blobs.load_image(&blob)?,
34
  None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
35
  };
36
+ let clipped = clip_mask_to_region(&mask, &r);
37
+ (base, clipped)
38
+ }
39
+ None => {
40
+ let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
41
+ (image, mask)
 
 
 
 
 
 
42
  }
43
  };
44
 
45
+ let result = self.0.inference(&image, &mask)?;
46
  let (w, h) = image_dimensions(&result);
47
  let blob = ctx.blobs.put_webp(&result)?;
48
  Ok(vec![upsert_image_blob(
 
56
  }
57
  }
58
 
59
+ fn clip_mask_to_region(mask: &DynamicImage, region: &Region) -> DynamicImage {
60
+ let src = mask.to_luma8();
61
+ let (w, h) = src.dimensions();
62
+ let x0 = region.x.min(w);
63
+ let y0 = region.y.min(h);
64
+ let x1 = region.x.saturating_add(region.width).min(w);
65
+ let y1 = region.y.saturating_add(region.height).min(h);
66
+
67
+ let mut clipped = GrayImage::new(w, h);
68
+ for y in y0..y1 {
69
+ for x in x0..x1 {
70
+ clipped.put_pixel(x, y, Luma([src.get_pixel(x, y).0[0]]));
71
+ }
72
+ }
73
+ DynamicImage::ImageLuma8(clipped)
74
+ }
75
+
76
  inventory::submit! {
77
  EngineInfo {
78
  id: "aot-inpainting",
koharu-app/src/pipeline/engines/lama.rs CHANGED
@@ -1,23 +1,26 @@
1
  //! Lama Manga inpainter. Reads source + segmentation mask from the page,
2
  //! runs the model, writes the output as `Image { role: Inpainted }`.
3
  //!
4
- //! When `ctx.options.region` is set (e.g. repair-brush re-inpaint), the
5
- //! engine composites onto the existing `Image { Inpainted }` if present
6
- //! (falling back to `Source`) and processes just that one block. Without
7
- //! a region, behaves as a full-page pass using the scene's text nodes
8
- //! as block hints.
 
 
 
 
9
 
10
  use anyhow::{Result, anyhow};
11
  use async_trait::async_trait;
12
- use koharu_core::{ImageRole, MaskRole, Op};
 
13
  use koharu_ml::lama::Lama;
14
- use koharu_ml::types::TextRegion;
15
 
16
  use crate::pipeline::artifacts::Artifact;
17
  use crate::pipeline::engine::{Engine, EngineCtx, EngineInfo};
18
  use crate::pipeline::engines::support::{
19
- find_image_node, find_mask_node, image_dimensions, load_source_image, region_to_text_region,
20
- text_node_to_region, text_nodes, upsert_image_blob,
21
  };
22
 
23
  pub struct Model(Lama);
@@ -29,26 +32,22 @@ impl Engine for Model {
29
  .ok_or_else(|| anyhow!("no Segment mask on page"))?;
30
  let mask = ctx.blobs.load_image(&mask_ref)?;
31
 
32
- let (image, text_regions): (_, Vec<TextRegion>) = match ctx.options.region {
33
  Some(r) => {
34
  let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
35
  Some((_, blob)) => ctx.blobs.load_image(&blob)?,
36
  None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
37
  };
38
- (base, vec![region_to_text_region(&r)])
 
39
  }
40
  None => {
41
  let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
42
- let regions = text_nodes(ctx.scene, ctx.page)
43
- .iter()
44
- .map(|(_, transform, t)| text_node_to_region(transform, t))
45
- .collect();
46
- (image, regions)
47
  }
48
  };
49
 
50
- let regions_ref = (!text_regions.is_empty()).then_some(text_regions.as_slice());
51
- let result = self.0.inference_with_blocks(&image, &mask, regions_ref)?;
52
  let (w, h) = image_dimensions(&result);
53
  let blob = ctx.blobs.put_webp(&result)?;
54
  Ok(vec![upsert_image_blob(
@@ -62,6 +61,26 @@ impl Engine for Model {
62
  }
63
  }
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  inventory::submit! {
66
  EngineInfo {
67
  id: "lama-manga",
 
1
  //! Lama Manga inpainter. Reads source + segmentation mask from the page,
2
  //! runs the model, writes the output as `Image { role: Inpainted }`.
3
  //!
4
+ //! Box subdivision (the "which regions to run the model on" question) is
5
+ //! driven by the **mask itself** via `boxes_from_mask` mirrors IOPaint's
6
+ //! `InpaintModel.__call__`. Text detections are no longer consulted; the
7
+ //! segmentation mask already encodes which pixels to remove.
8
+ //!
9
+ //! When `ctx.options.region` is set (repair-brush re-inpaint), we composite
10
+ //! onto the existing `Image { Inpainted }` if present (falling back to
11
+ //! `Source`) and zero out mask pixels outside the region before dispatch —
12
+ //! so only that region is reprocessed.
13
 
14
  use anyhow::{Result, anyhow};
15
  use async_trait::async_trait;
16
+ use image::{DynamicImage, GrayImage, Luma};
17
+ use koharu_core::{ImageRole, MaskRole, Op, Region};
18
  use koharu_ml::lama::Lama;
 
19
 
20
  use crate::pipeline::artifacts::Artifact;
21
  use crate::pipeline::engine::{Engine, EngineCtx, EngineInfo};
22
  use crate::pipeline::engines::support::{
23
+ find_image_node, find_mask_node, image_dimensions, load_source_image, upsert_image_blob,
 
24
  };
25
 
26
  pub struct Model(Lama);
 
32
  .ok_or_else(|| anyhow!("no Segment mask on page"))?;
33
  let mask = ctx.blobs.load_image(&mask_ref)?;
34
 
35
+ let (image, mask) = match ctx.options.region {
36
  Some(r) => {
37
  let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
38
  Some((_, blob)) => ctx.blobs.load_image(&blob)?,
39
  None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
40
  };
41
+ let clipped = clip_mask_to_region(&mask, &r);
42
+ (base, clipped)
43
  }
44
  None => {
45
  let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
46
+ (image, mask)
 
 
 
 
47
  }
48
  };
49
 
50
+ let result = self.0.inference(&image, &mask)?;
 
51
  let (w, h) = image_dimensions(&result);
52
  let blob = ctx.blobs.put_webp(&result)?;
53
  Ok(vec![upsert_image_blob(
 
61
  }
62
  }
63
 
64
+ /// Zero out every pixel of `mask` that falls outside `region`. The Crop
65
+ /// strategy's `boxes_from_mask` then only finds contours inside the region,
66
+ /// so the inpainter only touches that area.
67
+ fn clip_mask_to_region(mask: &DynamicImage, region: &Region) -> DynamicImage {
68
+ let src = mask.to_luma8();
69
+ let (w, h) = src.dimensions();
70
+ let x0 = region.x.min(w);
71
+ let y0 = region.y.min(h);
72
+ let x1 = region.x.saturating_add(region.width).min(w);
73
+ let y1 = region.y.saturating_add(region.height).min(h);
74
+
75
+ let mut clipped = GrayImage::new(w, h);
76
+ for y in y0..y1 {
77
+ for x in x0..x1 {
78
+ clipped.put_pixel(x, y, Luma([src.get_pixel(x, y).0[0]]));
79
+ }
80
+ }
81
+ DynamicImage::ImageLuma8(clipped)
82
+ }
83
+
84
  inventory::submit! {
85
  EngineInfo {
86
  id: "lama-manga",
koharu-app/src/pipeline/engines/support.rs CHANGED
@@ -7,7 +7,7 @@ use anyhow::{Context, Result};
7
  use image::{DynamicImage, GenericImageView};
8
  use koharu_core::{
9
  BlobRef, ImageData, ImageRole, MaskData, MaskRole, Node, NodeDataPatch, NodeId, NodeKind, Op,
10
- PageId, Region, Scene, TextData, Transform,
11
  };
12
 
13
  use crate::blobs::BlobStore;
@@ -89,23 +89,6 @@ pub fn text_node_to_region(transform: &Transform, text: &TextData) -> koharu_ml:
89
  }
90
  }
91
 
92
- /// Wrap a raw pixel `Region` as a `TextRegion` with no text hints. Used when
93
- /// an inpainter engine receives a region override (repair-brush path).
94
- pub fn region_to_text_region(r: &Region) -> koharu_ml::types::TextRegion {
95
- koharu_ml::types::TextRegion {
96
- x: r.x as f32,
97
- y: r.y as f32,
98
- width: r.width as f32,
99
- height: r.height as f32,
100
- confidence: 1.0,
101
- line_polygons: None,
102
- source_direction: None,
103
- rotation_deg: None,
104
- detected_font_size_px: None,
105
- detector: None,
106
- }
107
- }
108
-
109
  /// Inverse of `ml_text_direction_to_core`.
110
  pub fn core_text_direction_to_ml(d: koharu_core::TextDirection) -> koharu_ml::types::TextDirection {
111
  match d {
 
7
  use image::{DynamicImage, GenericImageView};
8
  use koharu_core::{
9
  BlobRef, ImageData, ImageRole, MaskData, MaskRole, Node, NodeDataPatch, NodeId, NodeKind, Op,
10
+ PageId, Scene, TextData, Transform,
11
  };
12
 
13
  use crate::blobs::BlobStore;
 
89
  }
90
  }
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  /// Inverse of `ml_text_direction_to_core`.
93
  pub fn core_text_direction_to_ml(d: koharu_core::TextDirection) -> koharu_ml::types::TextDirection {
94
  match d {
koharu-app/src/pipeline/mod.rs CHANGED
@@ -27,6 +27,11 @@ use tracing::Instrument;
27
  /// about to run (or just finished); step_index / page_index are 0-based.
28
  pub type ProgressSink = Arc<dyn Fn(ProgressTick) + Send + Sync>;
29
 
 
 
 
 
 
30
  #[derive(Debug, Clone)]
31
  pub struct ProgressTick {
32
  /// Coarse UI-facing step tag derived from the engine's primary
@@ -42,6 +47,20 @@ pub struct ProgressTick {
42
  pub overall_percent: u8,
43
  }
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  /// Map an engine's produced artifact to its UI step category. Stays
46
  /// co-located with the engine metadata so adding a new engine can't
47
  /// silently bypass the toolbar spinner — only the registered artifact
@@ -89,6 +108,12 @@ pub enum Scope {
89
 
90
  /// Execute `spec` against `session`. Each engine step becomes one `Op::Batch`
91
  /// applied via the session's history (one undo step per step per page).
 
 
 
 
 
 
92
  #[allow(clippy::too_many_arguments)]
93
  #[tracing::instrument(level = "info", skip_all)]
94
  pub async fn run(
@@ -101,7 +126,8 @@ pub async fn run(
101
  spec: PipelineSpec,
102
  cancel: Arc<AtomicBool>,
103
  progress: Option<ProgressSink>,
104
- ) -> Result<()> {
 
105
  let infos: Vec<&EngineInfo> = spec
106
  .steps
107
  .iter()
@@ -124,8 +150,9 @@ pub async fn run(
124
  let total_steps = order.len().max(1);
125
  let total_units = (total_pages * total_steps) as u64;
126
  let mut completed: u64 = 0;
 
127
 
128
- for (page_index, page_id) in pages.iter().enumerate() {
129
  for (seq, &i) in order.iter().enumerate() {
130
  if cancel.load(Ordering::Relaxed) {
131
  bail!("cancelled");
@@ -147,11 +174,31 @@ pub async fn run(
147
 
148
  // The page must still exist (user may have deleted it mid-run).
149
  if !session.scene.read().pages.contains_key(page_id) {
150
- completed += 1;
151
- continue;
 
 
152
  }
153
 
154
- let engine = registry.get(info.id, &runtime, cpu).await?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  let scene_snap = session.scene_snapshot();
156
  let ctx = EngineCtx {
157
  scene: &scene_snap,
@@ -163,9 +210,29 @@ pub async fn run(
163
  llm: &llm,
164
  renderer: &renderer,
165
  };
166
- let ops = async { engine.run(ctx).await }
167
  .instrument(tracing::info_span!("step", engine = info.id, page = %page_id))
168
- .await?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  completed += 1;
170
  if ops.is_empty() {
171
  continue;
@@ -174,7 +241,20 @@ pub async fn run(
174
  ops,
175
  label: format!("{}: page {}", info.id, page_id),
176
  };
177
- session.apply(batch)?;
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  }
179
  }
180
 
@@ -189,7 +269,37 @@ pub async fn run(
189
  overall_percent: 100,
190
  });
191
  }
192
- Ok(())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  }
194
 
195
  // ---------------------------------------------------------------------------
 
27
  /// about to run (or just finished); step_index / page_index are 0-based.
28
  pub type ProgressSink = Arc<dyn Fn(ProgressTick) + Send + Sync>;
29
 
30
+ /// Observer for non-fatal step failures. Called once per failed step; the
31
+ /// pipeline skips the rest of that page's steps and moves on to the next
32
+ /// page.
33
+ pub type WarningSink = Arc<dyn Fn(WarningTick) + Send + Sync>;
34
+
35
  #[derive(Debug, Clone)]
36
  pub struct ProgressTick {
37
  /// Coarse UI-facing step tag derived from the engine's primary
 
47
  pub overall_percent: u8,
48
  }
49
 
50
+ #[derive(Debug, Clone)]
51
+ pub struct WarningTick {
52
+ pub step_id: String,
53
+ pub page_index: usize,
54
+ pub total_pages: usize,
55
+ pub message: String,
56
+ }
57
+
58
+ /// Returned by [`run`]. `warning_count == 0` means the run finished cleanly.
59
+ #[derive(Debug, Clone, Default)]
60
+ pub struct RunOutcome {
61
+ pub warning_count: usize,
62
+ }
63
+
64
  /// Map an engine's produced artifact to its UI step category. Stays
65
  /// co-located with the engine metadata so adding a new engine can't
66
  /// silently bypass the toolbar spinner — only the registered artifact
 
108
 
109
  /// Execute `spec` against `session`. Each engine step becomes one `Op::Batch`
110
  /// applied via the session's history (one undo step per step per page).
111
+ ///
112
+ /// A failed step on a given page is non-fatal: the rest of that page's steps
113
+ /// are skipped (they typically depend on the failed step's output), one
114
+ /// [`WarningTick`] is emitted via `warnings`, and the driver moves on to the
115
+ /// next page. The function returns the total number of per-step warnings
116
+ /// that fired, letting callers flag the run as `CompletedWithErrors`.
117
  #[allow(clippy::too_many_arguments)]
118
  #[tracing::instrument(level = "info", skip_all)]
119
  pub async fn run(
 
126
  spec: PipelineSpec,
127
  cancel: Arc<AtomicBool>,
128
  progress: Option<ProgressSink>,
129
+ warnings: Option<WarningSink>,
130
+ ) -> Result<RunOutcome> {
131
  let infos: Vec<&EngineInfo> = spec
132
  .steps
133
  .iter()
 
150
  let total_steps = order.len().max(1);
151
  let total_units = (total_pages * total_steps) as u64;
152
  let mut completed: u64 = 0;
153
+ let mut warning_count: usize = 0;
154
 
155
+ 'pages: for (page_index, page_id) in pages.iter().enumerate() {
156
  for (seq, &i) in order.iter().enumerate() {
157
  if cancel.load(Ordering::Relaxed) {
158
  bail!("cancelled");
 
174
 
175
  // The page must still exist (user may have deleted it mid-run).
176
  if !session.scene.read().pages.contains_key(page_id) {
177
+ // Skip the remaining steps for a deleted page and credit all
178
+ // of them against total_units so progress still reaches 100%.
179
+ completed += (total_steps - seq) as u64;
180
+ continue 'pages;
181
  }
182
 
183
+ let engine = match registry.get(info.id, &runtime, cpu).await {
184
+ Ok(e) => e,
185
+ Err(err) => {
186
+ // Engine *load* failure: same recovery as a run failure.
187
+ report_step_failure(
188
+ info.id,
189
+ page_id,
190
+ seq,
191
+ page_index,
192
+ total_pages,
193
+ total_steps,
194
+ &err,
195
+ &mut warning_count,
196
+ warnings.as_ref(),
197
+ );
198
+ completed += (total_steps - seq) as u64;
199
+ continue 'pages;
200
+ }
201
+ };
202
  let scene_snap = session.scene_snapshot();
203
  let ctx = EngineCtx {
204
  scene: &scene_snap,
 
210
  llm: &llm,
211
  renderer: &renderer,
212
  };
213
+ let step_result = async { engine.run(ctx).await }
214
  .instrument(tracing::info_span!("step", engine = info.id, page = %page_id))
215
+ .await;
216
+ let ops = match step_result {
217
+ Ok(ops) => ops,
218
+ Err(err) => {
219
+ report_step_failure(
220
+ info.id,
221
+ page_id,
222
+ seq,
223
+ page_index,
224
+ total_pages,
225
+ total_steps,
226
+ &err,
227
+ &mut warning_count,
228
+ warnings.as_ref(),
229
+ );
230
+ // Subsequent steps on this page almost always consume the
231
+ // failed step's artifact; skip the rest and move on.
232
+ completed += (total_steps - seq) as u64;
233
+ continue 'pages;
234
+ }
235
+ };
236
  completed += 1;
237
  if ops.is_empty() {
238
  continue;
 
241
  ops,
242
  label: format!("{}: page {}", info.id, page_id),
243
  };
244
+ if let Err(err) = session.apply(batch) {
245
+ report_step_failure(
246
+ info.id,
247
+ page_id,
248
+ seq,
249
+ page_index,
250
+ total_pages,
251
+ total_steps,
252
+ &err,
253
+ &mut warning_count,
254
+ warnings.as_ref(),
255
+ );
256
+ continue 'pages;
257
+ }
258
  }
259
  }
260
 
 
269
  overall_percent: 100,
270
  });
271
  }
272
+ Ok(RunOutcome { warning_count })
273
+ }
274
+
275
+ #[allow(clippy::too_many_arguments)]
276
+ fn report_step_failure(
277
+ engine_id: &str,
278
+ page_id: &PageId,
279
+ step_index: usize,
280
+ page_index: usize,
281
+ total_pages: usize,
282
+ total_steps: usize,
283
+ err: &anyhow::Error,
284
+ warning_count: &mut usize,
285
+ sink: Option<&WarningSink>,
286
+ ) {
287
+ let _ = total_steps;
288
+ tracing::warn!(
289
+ engine = engine_id,
290
+ page = %page_id,
291
+ step_index,
292
+ "pipeline step failed: {err:#}"
293
+ );
294
+ *warning_count += 1;
295
+ if let Some(sink) = sink {
296
+ sink(WarningTick {
297
+ step_id: engine_id.to_string(),
298
+ page_index,
299
+ total_pages,
300
+ message: format!("{err:#}"),
301
+ });
302
+ }
303
  }
304
 
305
  // ---------------------------------------------------------------------------
koharu-core/src/events.rs CHANGED
@@ -21,8 +21,15 @@ use crate::protocol::LlmTarget;
21
  #[serde(tag = "event", rename_all = "camelCase")]
22
  pub enum AppEvent {
23
  // Pipeline jobs.
24
- JobStarted { id: String, kind: String },
 
 
 
25
  JobProgress(PipelineProgress),
 
 
 
 
26
  JobFinished(JobFinishedEvent),
27
 
28
  // Runtime library / model downloads.
@@ -36,9 +43,15 @@ pub enum AppEvent {
36
  // - `LlmLoaded` — model is on the GPU and ready for inference.
37
  // - `LlmFailed` — load failed; see `GET /llm/current` for the reason.
38
  // - `LlmUnloaded` — model released.
39
- LlmLoading { target: LlmTarget },
40
- LlmLoaded { target: LlmTarget },
41
- LlmFailed { target: Option<LlmTarget> },
 
 
 
 
 
 
42
  LlmUnloaded,
43
 
44
  // (Re)connect replay so the client can seed in-flight state.
@@ -122,6 +135,21 @@ pub struct JobFinishedEvent {
122
  pub error: Option<String>,
123
  }
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  // ---------------------------------------------------------------------------
126
  // Downloads
127
  // ---------------------------------------------------------------------------
 
21
  #[serde(tag = "event", rename_all = "camelCase")]
22
  pub enum AppEvent {
23
  // Pipeline jobs.
24
+ JobStarted {
25
+ id: String,
26
+ kind: String,
27
+ },
28
  JobProgress(PipelineProgress),
29
+ /// A single step on one page failed but the pipeline kept running.
30
+ /// Emitted per failed step so clients can show a non-fatal warning while
31
+ /// the job continues with the next page.
32
+ JobWarning(JobWarningEvent),
33
  JobFinished(JobFinishedEvent),
34
 
35
  // Runtime library / model downloads.
 
43
  // - `LlmLoaded` — model is on the GPU and ready for inference.
44
  // - `LlmFailed` — load failed; see `GET /llm/current` for the reason.
45
  // - `LlmUnloaded` — model released.
46
+ LlmLoading {
47
+ target: LlmTarget,
48
+ },
49
+ LlmLoaded {
50
+ target: LlmTarget,
51
+ },
52
+ LlmFailed {
53
+ target: Option<LlmTarget>,
54
+ },
55
  LlmUnloaded,
56
 
57
  // (Re)connect replay so the client can seed in-flight state.
 
135
  pub error: Option<String>,
136
  }
137
 
138
+ /// A non-fatal step failure during a pipeline run. The pipeline recovers by
139
+ /// skipping the rest of the current page's steps and moving on to the next
140
+ /// page; the UI accumulates these into a list during the job.
141
+ #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
142
+ #[serde(rename_all = "camelCase")]
143
+ pub struct JobWarningEvent {
144
+ pub job_id: String,
145
+ /// 0-based page index where the failure happened.
146
+ pub page_index: usize,
147
+ pub total_pages: usize,
148
+ /// Engine id (e.g. `"lama-manga"`) of the step that failed.
149
+ pub step_id: String,
150
+ pub message: String,
151
+ }
152
+
153
  // ---------------------------------------------------------------------------
154
  // Downloads
155
  // ---------------------------------------------------------------------------
koharu-core/src/lib.rs CHANGED
@@ -19,7 +19,7 @@ pub mod style;
19
  pub use blob::BlobRef;
20
  pub use events::{
21
  AppEvent, DownloadProgress, DownloadStatus, JobFinishedEvent, JobStatus, JobSummary,
22
- PipelineProgress, PipelineStatus, PipelineStep, ProjectSummary, SnapshotEvent,
23
  };
24
  pub use font::{FontPrediction, NamedFontPrediction, TextDirection, TopFont};
25
  pub use google_fonts::{FontSource, GoogleFontCatalog, GoogleFontEntry, GoogleFontVariant};
 
19
  pub use blob::BlobRef;
20
  pub use events::{
21
  AppEvent, DownloadProgress, DownloadStatus, JobFinishedEvent, JobStatus, JobSummary,
22
+ JobWarningEvent, PipelineProgress, PipelineStatus, PipelineStep, ProjectSummary, SnapshotEvent,
23
  };
24
  pub use font::{FontPrediction, NamedFontPrediction, TextDirection, TopFont};
25
  pub use google_fonts::{FontSource, GoogleFontCatalog, GoogleFontEntry, GoogleFontVariant};
koharu-ml/bin/aot-inpainting.rs CHANGED
@@ -57,7 +57,11 @@ async fn main() -> Result<()> {
57
  let mask = image::open(&cli.mask)?;
58
  let started = std::time::Instant::now();
59
  let output = if let Some(max_side) = cli.max_side {
60
- model.inference_with_max_side(&image, &mask, max_side)?
 
 
 
 
61
  } else {
62
  model.inference(&image, &mask)?
63
  };
 
57
  let mask = image::open(&cli.mask)?;
58
  let started = std::time::Instant::now();
59
  let output = if let Some(max_side) = cli.max_side {
60
+ let cfg = koharu_ml::inpainting::HdStrategyConfig {
61
+ resize_limit: max_side,
62
+ ..model.default_config()
63
+ };
64
+ model.inference_with_config(&image, &mask, &cfg)?
65
  } else {
66
  model.inference(&image, &mask)?
67
  };
koharu-ml/src/aot_inpainting/mod.rs CHANGED
@@ -7,17 +7,17 @@ use std::{
7
 
8
  use anyhow::{Context, Result, bail};
9
  use candle_core::{DType, Device, Tensor};
10
- use image::{
11
- DynamicImage, GenericImageView, GrayImage, RgbImage,
12
- imageops::{FilterType, resize},
13
- };
14
  use koharu_runtime::RuntimeManager;
15
  use serde::Deserialize;
16
  use tracing::instrument;
17
 
18
  use crate::{
19
  device,
20
- inpainting::{binarize_mask, extract_alpha, restore_alpha_channel},
 
 
 
21
  loading,
22
  };
23
 
@@ -49,16 +49,6 @@ pub struct AotInpainting {
49
  device: Device,
50
  }
51
 
52
- #[derive(Debug, Clone)]
53
- struct PreparedInput {
54
- pixel_values: Tensor,
55
- mask_values: Tensor,
56
- original_rgb: RgbImage,
57
- original_mask: GrayImage,
58
- model_width: u32,
59
- model_height: u32,
60
- }
61
-
62
  #[derive(Debug, Clone, Deserialize)]
63
  struct AotInpaintingConfig {
64
  model_type: String,
@@ -137,21 +127,27 @@ impl AotInpainting {
137
  })
138
  }
139
 
 
 
 
 
 
 
 
 
 
140
  #[instrument(level = "debug", skip_all)]
141
  pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
142
- self.inference_with_max_side(image, mask, self.config.default_max_side)
143
  }
144
 
145
  #[instrument(level = "debug", skip_all)]
146
- pub fn inference_with_max_side(
147
  &self,
148
  image: &DynamicImage,
149
  mask: &DynamicImage,
150
- max_side: u32,
151
  ) -> Result<DynamicImage> {
152
- if max_side == 0 {
153
- bail!("max_side must be positive");
154
- }
155
  if image.dimensions() != mask.dimensions() {
156
  bail!(
157
  "image and mask dimensions dismatch: image is {:?}, mask is {:?}",
@@ -161,84 +157,36 @@ impl AotInpainting {
161
  }
162
 
163
  let started = Instant::now();
164
- let prepared = self.preprocess(image, mask, max_side)?;
165
- let output = self
166
- .model
167
- .forward(&prepared.pixel_values, &prepared.mask_values)?;
168
- let composited = self.postprocess(&output, &prepared)?;
169
 
170
  tracing::info!(
171
  width = image.width(),
172
  height = image.height(),
173
- model_width = prepared.model_width,
174
- model_height = prepared.model_height,
175
- max_side,
176
  total_ms = started.elapsed().as_millis(),
177
  "aot inpainting timings"
178
  );
179
 
180
  if image.color().has_alpha() {
181
  let alpha = extract_alpha(&image.to_rgba8());
182
- let rgba = restore_alpha_channel(&composited, &alpha, &prepared.original_mask);
183
  Ok(DynamicImage::ImageRgba8(rgba))
184
  } else {
185
- Ok(DynamicImage::ImageRgb8(composited))
186
  }
187
  }
188
 
189
- fn preprocess(
190
- &self,
191
- image: &DynamicImage,
192
- mask: &DynamicImage,
193
- max_side: u32,
194
- ) -> Result<PreparedInput> {
195
- let original_rgb = image.to_rgb8();
196
- let original_mask = binarize_mask(mask);
197
- let mut working_rgb = original_rgb.clone();
198
- let mut working_mask = original_mask.clone();
199
-
200
- if working_rgb.width().max(working_rgb.height()) > max_side {
201
- let (resized_width, resized_height) =
202
- resize_keep_aspect_dims(working_rgb.width(), working_rgb.height(), max_side);
203
- working_rgb = resize(
204
- &working_rgb,
205
- resized_width,
206
- resized_height,
207
- FilterType::Triangle,
208
- );
209
- working_mask = resize(
210
- &working_mask,
211
- resized_width,
212
- resized_height,
213
- FilterType::Triangle,
214
- );
215
- }
216
-
217
- let model_width = round_up_multiple(working_rgb.width(), self.config.pad_multiple as u32);
218
- let model_height = round_up_multiple(working_rgb.height(), self.config.pad_multiple as u32);
219
- if model_width != working_rgb.width() || model_height != working_rgb.height() {
220
- working_rgb = resize(
221
- &working_rgb,
222
- model_width,
223
- model_height,
224
- FilterType::Triangle,
225
- );
226
- working_mask = resize(
227
- &working_mask,
228
- model_width,
229
- model_height,
230
- FilterType::Triangle,
231
- );
232
- }
233
-
234
- let mut binary_model_mask = working_mask;
235
- for pixel in binary_model_mask.pixels_mut() {
236
- pixel.0[0] = if pixel.0[0] >= 127 { 255 } else { 0 };
237
- }
238
-
239
  let image_tensor = (Tensor::from_vec(
240
- working_rgb.into_raw(),
241
- (1, model_height as usize, model_width as usize, 3),
242
  &self.device,
243
  )?
244
  .permute((0, 3, 1, 2))?
@@ -247,29 +195,22 @@ impl AotInpainting {
247
  let image_tensor = (image_tensor - 1.0)?;
248
 
249
  let mask_tensor = Tensor::from_vec(
250
- binary_model_mask.clone().into_raw(),
251
- (1, model_height as usize, model_width as usize, 1),
252
  &self.device,
253
  )?
254
  .permute((0, 3, 1, 2))?
255
  .to_dtype(DType::F32)?;
256
  let mask_tensor = (mask_tensor / 255.0)?;
257
  let mask_inv = (Tensor::ones_like(&mask_tensor)? - &mask_tensor)?;
258
- let mask_inv_rgb =
259
- mask_inv.broadcast_as((1, 3, model_height as usize, model_width as usize))?;
260
  let masked_image = (&image_tensor * &mask_inv_rgb)?;
261
 
262
- Ok(PreparedInput {
263
- pixel_values: masked_image,
264
- mask_values: mask_tensor,
265
- original_rgb,
266
- original_mask,
267
- model_width,
268
- model_height,
269
- })
270
  }
271
 
272
- fn postprocess(&self, output: &Tensor, prepared: &PreparedInput) -> Result<RgbImage> {
273
  let output = output.to_device(&Device::Cpu)?.squeeze(0)?;
274
  let (channels, height, width) = output.dims3()?;
275
  if channels != 3 {
@@ -282,27 +223,27 @@ impl AotInpainting {
282
  .to_dtype(DType::U8)?
283
  .flatten_all()?
284
  .to_vec1::<u8>()?;
285
- let predicted = RgbImage::from_raw(width as u32, height as u32, raw)
286
- .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))?;
 
 
287
 
288
- let predicted = if width as u32 != prepared.original_rgb.width()
289
- || height as u32 != prepared.original_rgb.height()
290
- {
291
- resize(
292
- &predicted,
293
- prepared.original_rgb.width(),
294
- prepared.original_rgb.height(),
295
- FilterType::Triangle,
296
- )
297
- } else {
298
- predicted
299
- };
300
 
301
- Ok(composite_rgb(
302
- &prepared.original_rgb,
303
- &predicted,
304
- &prepared.original_mask,
305
- ))
 
 
 
 
 
 
 
306
  }
307
  }
308
 
@@ -323,49 +264,3 @@ async fn resolve_model_paths(runtime: &RuntimeManager) -> Result<(PathBuf, PathB
323
  .with_context(|| format!("failed to download {SAFETENSORS_FILENAME} from {HF_REPO}"))?;
324
  Ok((config, weights))
325
  }
326
-
327
- fn resize_keep_aspect_dims(width: u32, height: u32, max_side: u32) -> (u32, u32) {
328
- let ratio = max_side as f32 / width.max(height) as f32;
329
- (
330
- ((width as f32 * ratio).round() as u32).max(1),
331
- ((height as f32 * ratio).round() as u32).max(1),
332
- )
333
- }
334
-
335
- fn round_up_multiple(value: u32, multiple: u32) -> u32 {
336
- if value.is_multiple_of(multiple) {
337
- value
338
- } else {
339
- value + (multiple - value % multiple)
340
- }
341
- }
342
-
343
- fn composite_rgb(original: &RgbImage, predicted: &RgbImage, mask: &GrayImage) -> RgbImage {
344
- let mut composited = original.clone();
345
- for y in 0..original.height() {
346
- for x in 0..original.width() {
347
- if mask.get_pixel(x, y).0[0] > 0 {
348
- composited.put_pixel(x, y, *predicted.get_pixel(x, y));
349
- }
350
- }
351
- }
352
- composited
353
- }
354
-
355
- #[cfg(test)]
356
- mod tests {
357
- use super::{resize_keep_aspect_dims, round_up_multiple};
358
-
359
- #[test]
360
- fn resize_keep_aspect_matches_upstream_rounding() {
361
- assert_eq!(resize_keep_aspect_dims(1600, 900, 1024), (1024, 576));
362
- assert_eq!(resize_keep_aspect_dims(900, 1600, 1024), (576, 1024));
363
- }
364
-
365
- #[test]
366
- fn round_up_multiple_expands_to_next_valid_shape() {
367
- assert_eq!(round_up_multiple(1024, 8), 1024);
368
- assert_eq!(round_up_multiple(1025, 8), 1032);
369
- assert_eq!(round_up_multiple(7, 8), 8);
370
- }
371
- }
 
7
 
8
  use anyhow::{Context, Result, bail};
9
  use candle_core::{DType, Device, Tensor};
10
+ use image::{DynamicImage, GenericImageView, GrayImage, RgbImage};
 
 
 
11
  use koharu_runtime::RuntimeManager;
12
  use serde::Deserialize;
13
  use tracing::instrument;
14
 
15
  use crate::{
16
  device,
17
+ inpainting::{
18
+ HdStrategyConfig, InpaintForward, binarize_mask, extract_alpha, restore_alpha_channel,
19
+ run_inpaint, try_fill_balloon,
20
+ },
21
  loading,
22
  };
23
 
 
49
  device: Device,
50
  }
51
 
 
 
 
 
 
 
 
 
 
 
52
  #[derive(Debug, Clone, Deserialize)]
53
  struct AotInpaintingConfig {
54
  model_type: String,
 
127
  })
128
  }
129
 
130
+ /// Default strategy: Resize, using the model's shipped `default_max_side`
131
+ /// as the resize limit. Matches pre-refactor behaviour.
132
+ pub fn default_config(&self) -> HdStrategyConfig {
133
+ HdStrategyConfig::aot_default(
134
+ self.config.default_max_side,
135
+ self.config.pad_multiple as u32,
136
+ )
137
+ }
138
+
139
  #[instrument(level = "debug", skip_all)]
140
  pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
141
+ self.inference_with_config(image, mask, &self.default_config())
142
  }
143
 
144
  #[instrument(level = "debug", skip_all)]
145
+ pub fn inference_with_config(
146
  &self,
147
  image: &DynamicImage,
148
  mask: &DynamicImage,
149
+ cfg: &HdStrategyConfig,
150
  ) -> Result<DynamicImage> {
 
 
 
151
  if image.dimensions() != mask.dimensions() {
152
  bail!(
153
  "image and mask dimensions dismatch: image is {:?}, mask is {:?}",
 
157
  }
158
 
159
  let started = Instant::now();
160
+ let binary_mask = binarize_mask(mask);
161
+ let image_rgb = image.to_rgb8();
162
+ let forward = AotForward { aot: self };
163
+ let output_rgb = run_inpaint(&forward, &image_rgb, &binary_mask, cfg)?;
 
164
 
165
  tracing::info!(
166
  width = image.width(),
167
  height = image.height(),
168
+ resize_limit = cfg.resize_limit,
 
 
169
  total_ms = started.elapsed().as_millis(),
170
  "aot inpainting timings"
171
  );
172
 
173
  if image.color().has_alpha() {
174
  let alpha = extract_alpha(&image.to_rgba8());
175
+ let rgba = restore_alpha_channel(&output_rgb, &alpha, &binary_mask);
176
  Ok(DynamicImage::ImageRgba8(rgba))
177
  } else {
178
+ Ok(DynamicImage::ImageRgb8(output_rgb))
179
  }
180
  }
181
 
182
+ /// Raw model forward on a pre-padded RGB image + mask. Input spatial dims
183
+ /// must already be multiples of `pad_multiple` — the HD-strategy dispatcher
184
+ /// handles this.
185
+ fn forward_rgb(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
186
+ let (w, h) = image.dimensions();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  let image_tensor = (Tensor::from_vec(
188
+ image.clone().into_raw(),
189
+ (1, h as usize, w as usize, 3),
190
  &self.device,
191
  )?
192
  .permute((0, 3, 1, 2))?
 
195
  let image_tensor = (image_tensor - 1.0)?;
196
 
197
  let mask_tensor = Tensor::from_vec(
198
+ mask.clone().into_raw(),
199
+ (1, h as usize, w as usize, 1),
200
  &self.device,
201
  )?
202
  .permute((0, 3, 1, 2))?
203
  .to_dtype(DType::F32)?;
204
  let mask_tensor = (mask_tensor / 255.0)?;
205
  let mask_inv = (Tensor::ones_like(&mask_tensor)? - &mask_tensor)?;
206
+ let mask_inv_rgb = mask_inv.broadcast_as((1, 3, h as usize, w as usize))?;
 
207
  let masked_image = (&image_tensor * &mask_inv_rgb)?;
208
 
209
+ let output = self.model.forward(&masked_image, &mask_tensor)?;
210
+ self.postprocess(&output)
 
 
 
 
 
 
211
  }
212
 
213
+ fn postprocess(&self, output: &Tensor) -> Result<RgbImage> {
214
  let output = output.to_device(&Device::Cpu)?.squeeze(0)?;
215
  let (channels, height, width) = output.dims3()?;
216
  if channels != 3 {
 
223
  .to_dtype(DType::U8)?
224
  .flatten_all()?
225
  .to_vec1::<u8>()?;
226
+ RgbImage::from_raw(width as u32, height as u32, raw)
227
+ .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
228
+ }
229
+ }
230
 
231
+ struct AotForward<'a> {
232
+ aot: &'a AotInpainting,
233
+ }
 
 
 
 
 
 
 
 
 
234
 
235
+ impl InpaintForward for AotForward<'_> {
236
+ fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
237
+ if mask.pixels().all(|p| p.0[0] == 0) {
238
+ return Ok(image.clone());
239
+ }
240
+ // Same flat-balloon fast path as Lama: skip the model when the mask
241
+ // sits in a uniform-background bubble. Fires per-crop under the Crop
242
+ // strategy; generally no-ops on whole-image forwards under Resize.
243
+ if let Some(filled) = try_fill_balloon(image, mask) {
244
+ return Ok(filled);
245
+ }
246
+ self.aot.forward_rgb(image, mask)
247
  }
248
  }
249
 
 
264
  .with_context(|| format!("failed to download {SAFETENSORS_FILENAME} from {HF_REPO}"))?;
265
  Ok((config, weights))
266
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
koharu-ml/src/inpainting/balloon.rs ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Balloon-fill fast path for inpainting.
2
+ //!
3
+ //! When a mask sits inside a speech bubble with a near-uniform background,
4
+ //! the model can be skipped entirely: fill the masked pixels with the median
5
+ //! background colour of the balloon. This is purely image processing, so
6
+ //! every erase model (Lama, AoT) can use it as a pre-model pass.
7
+ //!
8
+ //! Effectiveness depends on the caller handing us one bubble at a time —
9
+ //! which is exactly what the Crop strategy does, since each crop corresponds
10
+ //! to a connected mask contour. On a whole-image forward (Resize strategy),
11
+ //! `extract_balloon_mask` usually fails to find a single containing contour
12
+ //! and we fall through to the model.
13
+
14
+ use image::{DynamicImage, GrayImage, Luma, Rgb, RgbImage};
15
+ use imageproc::{
16
+ contours::find_contours, distance_transform::Norm, drawing::draw_polygon_mut, edges::canny,
17
+ filter::gaussian_blur_f32, morphology::dilate, point::Point,
18
+ };
19
+
20
+ const BALLOON_CANNY_LOW: f32 = 70.0;
21
+ const BALLOON_CANNY_HIGH: f32 = 140.0;
22
+ const SIMPLE_BG_THRESHOLD_LOW_VARIANCE: f64 = 10.0;
23
+ const SIMPLE_BG_THRESHOLD_HIGH_VARIANCE: f64 = 7.0;
24
+ const SIMPLE_BG_CHANNEL_STD_SWITCH: f64 = 1.0;
25
+
26
+ type Xyxy = [u32; 4];
27
+
28
+ pub(crate) struct BalloonMasks {
29
+ pub balloon_mask: GrayImage,
30
+ pub non_text_mask: GrayImage,
31
+ }
32
+
33
+ /// Return an image with the masked pixels painted the balloon's median
34
+ /// background colour, iff a containing bubble with low background variance
35
+ /// can be identified. `None` means "no confident fast path; call the model".
36
+ pub fn try_fill_balloon(image: &RgbImage, mask: &GrayImage) -> Option<RgbImage> {
37
+ let masks = extract_balloon_mask(image, mask)?;
38
+ let average_bg_color = median_rgb(image, &masks.non_text_mask)?;
39
+ let std_rgb = color_stddev(image, &masks.non_text_mask, average_bg_color);
40
+ let inpaint_thresh = if stddev3(std_rgb) > SIMPLE_BG_CHANNEL_STD_SWITCH {
41
+ SIMPLE_BG_THRESHOLD_HIGH_VARIANCE
42
+ } else {
43
+ SIMPLE_BG_THRESHOLD_LOW_VARIANCE
44
+ };
45
+ let std_max = std_rgb.into_iter().fold(0.0, f64::max);
46
+
47
+ if std_max >= inpaint_thresh {
48
+ return None;
49
+ }
50
+
51
+ let mut result = image.clone();
52
+ let fill = [
53
+ average_bg_color[0] as u8,
54
+ average_bg_color[1] as u8,
55
+ average_bg_color[2] as u8,
56
+ ];
57
+ for (x, y, pixel) in masks.balloon_mask.enumerate_pixels() {
58
+ if pixel.0[0] > 0 {
59
+ result.put_pixel(x, y, Rgb(fill));
60
+ }
61
+ }
62
+
63
+ Some(result)
64
+ }
65
+
66
+ pub(crate) fn extract_balloon_mask(image: &RgbImage, mask: &GrayImage) -> Option<BalloonMasks> {
67
+ if image.dimensions() != mask.dimensions() {
68
+ return None;
69
+ }
70
+
71
+ let text_bbox = non_zero_bbox(mask)?;
72
+ let text_sum = count_nonzero(mask);
73
+ if text_sum == 0 {
74
+ return None;
75
+ }
76
+
77
+ let gray = DynamicImage::ImageRgb8(image.clone()).to_luma8();
78
+ let blurred = gaussian_blur_f32(&gray, 1.0);
79
+ let mut cannyed = canny(&blurred, BALLOON_CANNY_LOW, BALLOON_CANNY_HIGH);
80
+ cannyed = dilate(&cannyed, Norm::LInf, 1);
81
+ draw_binary_border(&mut cannyed);
82
+ subtract_binary_mask(&mut cannyed, mask);
83
+
84
+ let contours = find_contours::<i32>(&cannyed);
85
+ let (width, height) = cannyed.dimensions();
86
+ let mut best_mask = None;
87
+ let mut best_area = f64::INFINITY;
88
+
89
+ for contour in contours {
90
+ let Some(polygon) = contour_polygon(&contour.points) else {
91
+ continue;
92
+ };
93
+ let bbox = polygon_bbox(&polygon)?;
94
+ if bbox[0] > text_bbox[0]
95
+ || bbox[1] > text_bbox[1]
96
+ || bbox[2] < text_bbox[2]
97
+ || bbox[3] < text_bbox[3]
98
+ {
99
+ continue;
100
+ }
101
+
102
+ let mut candidate = GrayImage::new(width, height);
103
+ draw_polygon_mut(&mut candidate, &polygon, Luma([255u8]));
104
+ if count_overlap(&candidate, mask) < text_sum {
105
+ continue;
106
+ }
107
+
108
+ let area = polygon_area(&polygon);
109
+ if area < best_area {
110
+ best_area = area;
111
+ best_mask = Some(candidate);
112
+ }
113
+ }
114
+
115
+ let balloon_mask = best_mask?;
116
+ let mut non_text_mask = balloon_mask.clone();
117
+ for (x, y, pixel) in mask.enumerate_pixels() {
118
+ if pixel.0[0] > 0 {
119
+ non_text_mask.put_pixel(x, y, Luma([0]));
120
+ }
121
+ }
122
+
123
+ Some(BalloonMasks {
124
+ balloon_mask,
125
+ non_text_mask,
126
+ })
127
+ }
128
+
129
+ fn contour_polygon(points: &[Point<i32>]) -> Option<Vec<Point<i32>>> {
130
+ let mut polygon = points.to_vec();
131
+ if polygon.len() < 3 {
132
+ return None;
133
+ }
134
+ if polygon.first() == polygon.last() {
135
+ polygon.pop();
136
+ }
137
+ if polygon.len() < 3 {
138
+ return None;
139
+ }
140
+ Some(polygon)
141
+ }
142
+
143
+ fn polygon_bbox(points: &[Point<i32>]) -> Option<Xyxy> {
144
+ let first = points.first()?;
145
+ let mut min_x = first.x;
146
+ let mut min_y = first.y;
147
+ let mut max_x = first.x;
148
+ let mut max_y = first.y;
149
+ for point in points.iter().skip(1) {
150
+ min_x = min_x.min(point.x);
151
+ min_y = min_y.min(point.y);
152
+ max_x = max_x.max(point.x);
153
+ max_y = max_y.max(point.y);
154
+ }
155
+
156
+ Some([
157
+ min_x.max(0) as u32,
158
+ min_y.max(0) as u32,
159
+ max_x.max(min_x).saturating_add(1) as u32,
160
+ max_y.max(min_y).saturating_add(1) as u32,
161
+ ])
162
+ }
163
+
164
+ fn polygon_area(points: &[Point<i32>]) -> f64 {
165
+ let mut area = 0.0;
166
+ for index in 0..points.len() {
167
+ let current = points[index];
168
+ let next = points[(index + 1) % points.len()];
169
+ area += f64::from(current.x) * f64::from(next.y) - f64::from(next.x) * f64::from(current.y);
170
+ }
171
+ area.abs() * 0.5
172
+ }
173
+
174
+ fn draw_binary_border(image: &mut GrayImage) {
175
+ let width = image.width();
176
+ let height = image.height();
177
+ if width == 0 || height == 0 {
178
+ return;
179
+ }
180
+
181
+ for x in 0..width {
182
+ image.put_pixel(x, 0, Luma([255]));
183
+ image.put_pixel(x, height - 1, Luma([255]));
184
+ }
185
+ for y in 0..height {
186
+ image.put_pixel(0, y, Luma([255]));
187
+ image.put_pixel(width - 1, y, Luma([255]));
188
+ }
189
+ }
190
+
191
+ fn subtract_binary_mask(image: &mut GrayImage, mask: &GrayImage) {
192
+ for (x, y, pixel) in image.enumerate_pixels_mut() {
193
+ if mask.get_pixel(x, y).0[0] > 0 {
194
+ pixel.0[0] = 0;
195
+ }
196
+ }
197
+ }
198
+
199
+ fn non_zero_bbox(mask: &GrayImage) -> Option<Xyxy> {
200
+ let (width, height) = mask.dimensions();
201
+ let mut min_x = width;
202
+ let mut min_y = height;
203
+ let mut max_x = 0;
204
+ let mut max_y = 0;
205
+ let mut found = false;
206
+
207
+ for (x, y, pixel) in mask.enumerate_pixels() {
208
+ if pixel.0[0] == 0 {
209
+ continue;
210
+ }
211
+ found = true;
212
+ min_x = min_x.min(x);
213
+ min_y = min_y.min(y);
214
+ max_x = max_x.max(x);
215
+ max_y = max_y.max(y);
216
+ }
217
+
218
+ found.then_some([
219
+ min_x,
220
+ min_y,
221
+ max_x.saturating_add(1),
222
+ max_y.saturating_add(1),
223
+ ])
224
+ }
225
+
226
+ fn count_nonzero(mask: &GrayImage) -> u32 {
227
+ mask.pixels().filter(|pixel| pixel.0[0] > 0).count() as u32
228
+ }
229
+
230
+ fn count_overlap(left: &GrayImage, right: &GrayImage) -> u32 {
231
+ left.pixels()
232
+ .zip(right.pixels())
233
+ .filter(|(l, r)| l.0[0] > 0 && r.0[0] > 0)
234
+ .count() as u32
235
+ }
236
+
237
+ fn median_rgb(image: &RgbImage, mask: &GrayImage) -> Option<[f64; 3]> {
238
+ let mut channels = [Vec::new(), Vec::new(), Vec::new()];
239
+ for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
240
+ if mask_pixel.0[0] == 0 {
241
+ continue;
242
+ }
243
+ channels[0].push(pixel.0[0]);
244
+ channels[1].push(pixel.0[1]);
245
+ channels[2].push(pixel.0[2]);
246
+ }
247
+
248
+ Some([
249
+ median_channel(&channels[0])?,
250
+ median_channel(&channels[1])?,
251
+ median_channel(&channels[2])?,
252
+ ])
253
+ }
254
+
255
+ fn median_channel(values: &[u8]) -> Option<f64> {
256
+ if values.is_empty() {
257
+ return None;
258
+ }
259
+
260
+ let mut values = values.to_vec();
261
+ values.sort_unstable();
262
+ let mid = values.len() / 2;
263
+ if values.len().is_multiple_of(2) {
264
+ Some((f64::from(values[mid - 1]) + f64::from(values[mid])) / 2.0)
265
+ } else {
266
+ Some(f64::from(values[mid]))
267
+ }
268
+ }
269
+
270
+ fn color_stddev(image: &RgbImage, mask: &GrayImage, median: [f64; 3]) -> [f64; 3] {
271
+ let mut sum_sq = [0.0; 3];
272
+ let mut count = 0.0;
273
+
274
+ for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
275
+ if mask_pixel.0[0] == 0 {
276
+ continue;
277
+ }
278
+ count += 1.0;
279
+ for channel in 0..3 {
280
+ let diff = f64::from(pixel.0[channel]) - median[channel];
281
+ sum_sq[channel] += diff * diff;
282
+ }
283
+ }
284
+
285
+ if count == 0.0 {
286
+ return [f64::INFINITY; 3];
287
+ }
288
+
289
+ [
290
+ (sum_sq[0] / count).sqrt(),
291
+ (sum_sq[1] / count).sqrt(),
292
+ (sum_sq[2] / count).sqrt(),
293
+ ]
294
+ }
295
+
296
+ fn stddev3(values: [f64; 3]) -> f64 {
297
+ let mean = values.iter().sum::<f64>() / 3.0;
298
+ let variance = values
299
+ .iter()
300
+ .map(|value| {
301
+ let diff = value - mean;
302
+ diff * diff
303
+ })
304
+ .sum::<f64>()
305
+ / 3.0;
306
+ variance.sqrt()
307
+ }
308
+
309
+ #[cfg(test)]
310
+ mod tests {
311
+ use super::*;
312
+ use imageproc::drawing::draw_hollow_rect_mut;
313
+ use imageproc::rect::Rect;
314
+
315
+ #[test]
316
+ fn extract_balloon_mask_prefers_smallest_covering_contour() {
317
+ let mut image = RgbImage::from_pixel(80, 80, Rgb([255, 255, 255]));
318
+ draw_hollow_rect_mut(&mut image, Rect::at(4, 4).of_size(72, 72), Rgb([0, 0, 0]));
319
+ draw_hollow_rect_mut(&mut image, Rect::at(20, 20).of_size(28, 20), Rgb([0, 0, 0]));
320
+
321
+ let mut mask = GrayImage::new(80, 80);
322
+ for y in 24..36 {
323
+ for x in 24..44 {
324
+ mask.put_pixel(x, y, Luma([255]));
325
+ }
326
+ }
327
+
328
+ let masks = extract_balloon_mask(&image, &mask).expect("balloon should be detected");
329
+ let balloon_pixels = count_nonzero(&masks.balloon_mask);
330
+
331
+ assert!(
332
+ balloon_pixels < 900,
333
+ "expected inner contour fill, got {balloon_pixels}"
334
+ );
335
+ assert!(
336
+ balloon_pixels > 250,
337
+ "expected meaningful bubble area, got {balloon_pixels}"
338
+ );
339
+ }
340
+
341
+ #[test]
342
+ fn simple_balloon_chooses_fill_but_textured_balloon_does_not() {
343
+ let mut flat = RgbImage::from_pixel(64, 64, Rgb([240, 240, 240]));
344
+ draw_hollow_rect_mut(&mut flat, Rect::at(8, 8).of_size(48, 32), Rgb([0, 0, 0]));
345
+
346
+ let mut mask = GrayImage::new(64, 64);
347
+ for y in 18..30 {
348
+ for x in 18..46 {
349
+ mask.put_pixel(x, y, Luma([255]));
350
+ }
351
+ }
352
+
353
+ assert!(try_fill_balloon(&flat, &mask).is_some());
354
+
355
+ let mut textured = flat.clone();
356
+ for y in 9..39 {
357
+ for x in 9..55 {
358
+ let noise = ((x + y) % 23) as u8;
359
+ textured.put_pixel(
360
+ x,
361
+ y,
362
+ Rgb([200 + noise, 210 + (noise / 2), 220 - (noise / 3)]),
363
+ );
364
+ }
365
+ }
366
+
367
+ assert!(try_fill_balloon(&textured, &mask).is_none());
368
+ }
369
+ }
koharu-ml/src/{inpainting.rs → inpainting/mod.rs} RENAMED
@@ -1,3 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  use image::{DynamicImage, GrayImage, Luma, RgbImage, Rgba, RgbaImage};
2
  use imageproc::{distance_transform::Norm, morphology::dilate};
3
 
 
1
+ //! Shared inpainting infrastructure: alpha handling, mask prep, and the
2
+ //! HD-strategy dispatcher used by every erase model (Lama, AoT).
3
+ //!
4
+ //! The strategy dispatcher mirrors IOPaint's `InpaintModel.__call__`: one place
5
+ //! decides between Original / Resize / Crop based on image size and a
6
+ //! per-model config. Concrete models only implement the raw forward pass.
7
+
8
+ pub mod balloon;
9
+ pub mod strategy;
10
+
11
+ pub use balloon::try_fill_balloon;
12
+ pub use strategy::{HdStrategy, HdStrategyConfig, InpaintForward, run_inpaint};
13
+
14
  use image::{DynamicImage, GrayImage, Luma, RgbImage, Rgba, RgbaImage};
15
  use imageproc::{distance_transform::Norm, morphology::dilate};
16
 
koharu-ml/src/inpainting/strategy.rs ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! HD-strategy dispatcher for erase models.
2
+ //!
3
+ //! Mirrors IOPaint's `InpaintModel.__call__` (`iopaint/model/base.py`): one
4
+ //! entry point chooses between Original / Resize / Crop based on image size,
5
+ //! then delegates the raw forward to a model-specific [`InpaintForward`].
6
+ //!
7
+ //! ## Strategies
8
+ //!
9
+ //! - **Original** — pad to `pad_mod`, forward, unpad. Highest VRAM.
10
+ //! - **Resize** — downscale so `max(h,w) <= resize_limit`, pad, forward, unpad,
11
+ //! upscale, then restore pixels outside the mask from the original. Medium
12
+ //! VRAM, preserves quality outside the mask.
13
+ //! - **Crop** — extract one bounding box per connected mask contour, expand by
14
+ //! `crop_margin` on each side, forward each crop independently, paste back.
15
+ //! Lowest VRAM. Default for manga (many small speech bubbles).
16
+ //!
17
+ //! The Crop path uses [`pad_forward_bounded`] per crop, so an oversized crop
18
+ //! (e.g. a brush stroke covering most of a page) falls back to the Resize path
19
+ //! inside that single crop. No `HdStrategy` ever OOMs on a reasonable GPU
20
+ //! provided `resize_limit` is within VRAM budget.
21
+ //!
22
+ //! Mask boxes come from `imageproc::contours::find_contours` on the binarized
23
+ //! mask — equivalent to OpenCV's `cv2.findContours(RETR_EXTERNAL)` that IOPaint
24
+ //! uses. Only `BorderType::Outer` contours become boxes (holes are ignored).
25
+
26
+ use anyhow::Result;
27
+ use image::{
28
+ GrayImage, RgbImage,
29
+ imageops::{FilterType, crop_imm, replace, resize},
30
+ };
31
+ use imageproc::contours::{BorderType, find_contours};
32
+
33
+ /// Which preprocessing strategy to apply before the raw forward. See the
34
+ /// module docs for the semantics of each variant.
35
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
36
+ pub enum HdStrategy {
37
+ Original,
38
+ Resize,
39
+ Crop,
40
+ }
41
+
42
+ /// Tunable knobs for [`run_inpaint`]. Defaults match IOPaint
43
+ /// (`iopaint/schema.py` — trigger 800, margin 128, resize limit 1280).
44
+ #[derive(Debug, Clone, Copy)]
45
+ pub struct HdStrategyConfig {
46
+ pub strategy: HdStrategy,
47
+ /// Crop strategy only activates when `max(image.w, image.h) >
48
+ /// crop_trigger_size`. Smaller images fall through to Original.
49
+ pub crop_trigger_size: u32,
50
+ /// Additive margin (pixels) added to each side of a mask bounding box when
51
+ /// cropping. Controls how much context the model sees around the mask.
52
+ pub crop_margin: u32,
53
+ /// Hard ceiling on the forward's longer side. Applied by Resize strategy at
54
+ /// the top level, and as a nested fallback inside oversized crops.
55
+ pub resize_limit: u32,
56
+ /// Model-required spatial divisor. LaMa / AoT both need 8; larger for
57
+ /// models with deeper downsampling.
58
+ pub pad_mod: u32,
59
+ }
60
+
61
+ impl HdStrategyConfig {
62
+ /// Manga-tuned default for Lama: Crop strategy with IOPaint's defaults.
63
+ /// Many small speech bubbles → many small per-bubble crops → trivial VRAM.
64
+ pub const fn lama_default() -> Self {
65
+ Self {
66
+ strategy: HdStrategy::Crop,
67
+ crop_trigger_size: 800,
68
+ crop_margin: 128,
69
+ resize_limit: 1280,
70
+ pad_mod: 8,
71
+ }
72
+ }
73
+
74
+ /// Default for AoT: whole-image Resize with a fixed upper bound (AoT's
75
+ /// upstream config calls this `default_max_side`).
76
+ pub const fn aot_default(resize_limit: u32, pad_mod: u32) -> Self {
77
+ Self {
78
+ strategy: HdStrategy::Resize,
79
+ crop_trigger_size: 800,
80
+ crop_margin: 128,
81
+ resize_limit,
82
+ pad_mod,
83
+ }
84
+ }
85
+ }
86
+
87
+ /// `[x1, y1, x2, y2]` half-open rectangle: `x1,y1` inclusive, `x2,y2` exclusive.
88
+ pub type Xyxy = [u32; 4];
89
+
90
+ /// A raw forward pass on a (padded) image + mask, returning an image of the
91
+ /// same spatial size. Implementors are free to apply fast paths (e.g. Lama's
92
+ /// balloon-fill shortcut) before the model forward.
93
+ pub trait InpaintForward {
94
+ fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage>;
95
+ }
96
+
97
+ /// Entry point: dispatch on `cfg.strategy` and return an RGB image with the
98
+ /// masked region inpainted. `mask` must already be binarized (0 or 255).
99
+ pub fn run_inpaint<F: InpaintForward>(
100
+ model: &F,
101
+ image: &RgbImage,
102
+ mask: &GrayImage,
103
+ cfg: &HdStrategyConfig,
104
+ ) -> Result<RgbImage> {
105
+ assert_eq!(image.dimensions(), mask.dimensions());
106
+ let max_side = image.width().max(image.height());
107
+
108
+ match cfg.strategy {
109
+ HdStrategy::Crop if max_side > cfg.crop_trigger_size => run_crop(model, image, mask, cfg),
110
+ HdStrategy::Resize if max_side > cfg.resize_limit => run_resize(model, image, mask, cfg),
111
+ _ => pad_forward(model, image, mask, cfg.pad_mod),
112
+ }
113
+ }
114
+
115
+ fn run_crop<F: InpaintForward>(
116
+ model: &F,
117
+ image: &RgbImage,
118
+ mask: &GrayImage,
119
+ cfg: &HdStrategyConfig,
120
+ ) -> Result<RgbImage> {
121
+ let boxes = boxes_from_mask(mask);
122
+ if boxes.is_empty() {
123
+ return Ok(image.clone());
124
+ }
125
+
126
+ tracing::debug!(
127
+ count = boxes.len(),
128
+ "inpaint crop strategy: one forward per mask contour"
129
+ );
130
+
131
+ let mut out = image.clone();
132
+ for b in boxes {
133
+ let (crop_img, crop_mask, [l, t, _r, _bt]) = crop_box(image, mask, b, cfg.crop_margin);
134
+ let crop_result = pad_forward_bounded(model, &crop_img, &crop_mask, cfg)?;
135
+ replace(&mut out, &crop_result, i64::from(l), i64::from(t));
136
+ }
137
+ Ok(out)
138
+ }
139
+
140
+ fn run_resize<F: InpaintForward>(
141
+ model: &F,
142
+ image: &RgbImage,
143
+ mask: &GrayImage,
144
+ cfg: &HdStrategyConfig,
145
+ ) -> Result<RgbImage> {
146
+ let (w, h) = image.dimensions();
147
+ let (nw, nh) = scaled_dims(w, h, cfg.resize_limit);
148
+ tracing::debug!(
149
+ from_w = w,
150
+ from_h = h,
151
+ to_w = nw,
152
+ to_h = nh,
153
+ "inpaint resize strategy"
154
+ );
155
+
156
+ let small_img = resize(image, nw, nh, FilterType::Triangle);
157
+ let small_mask = rebinarize(&resize(mask, nw, nh, FilterType::Triangle));
158
+
159
+ let small_out = pad_forward(model, &small_img, &small_mask, cfg.pad_mod)?;
160
+ let full_out = resize(&small_out, w, h, FilterType::CatmullRom);
161
+
162
+ // Restore untouched pixels from the original so Resize only loses quality
163
+ // where we actually inpainted. Matches IOPaint's
164
+ // `original_pixel_indices = mask < 127`.
165
+ let mut out = full_out;
166
+ for y in 0..h {
167
+ for x in 0..w {
168
+ if mask.get_pixel(x, y).0[0] < 127 {
169
+ out.put_pixel(x, y, *image.get_pixel(x, y));
170
+ }
171
+ }
172
+ }
173
+ Ok(out)
174
+ }
175
+
176
+ /// `pad_forward` with a nested Resize fallback when the input exceeds
177
+ /// `resize_limit`. Used inside the Crop loop so oversized crops don't OOM.
178
+ fn pad_forward_bounded<F: InpaintForward>(
179
+ model: &F,
180
+ image: &RgbImage,
181
+ mask: &GrayImage,
182
+ cfg: &HdStrategyConfig,
183
+ ) -> Result<RgbImage> {
184
+ if image.width().max(image.height()) > cfg.resize_limit {
185
+ run_resize(model, image, mask, cfg)
186
+ } else {
187
+ pad_forward(model, image, mask, cfg.pad_mod)
188
+ }
189
+ }
190
+
191
+ /// Pad both tensors to `pad_mod` on right/bottom with symmetric reflection,
192
+ /// forward through the model, then crop the output back to the input size.
193
+ /// Matches IOPaint's `_pad_forward` / `pad_img_to_modulo`.
194
+ fn pad_forward<F: InpaintForward>(
195
+ model: &F,
196
+ image: &RgbImage,
197
+ mask: &GrayImage,
198
+ pad_mod: u32,
199
+ ) -> Result<RgbImage> {
200
+ let (w, h) = image.dimensions();
201
+ let pad_w = ceil_multiple(w, pad_mod);
202
+ let pad_h = ceil_multiple(h, pad_mod);
203
+
204
+ let out = if pad_w == w && pad_h == h {
205
+ model.forward(image, mask)?
206
+ } else {
207
+ let pad_img = symmetric_pad_rgb(image, pad_w, pad_h);
208
+ let pad_msk = symmetric_pad_gray(mask, pad_w, pad_h);
209
+ let padded_out = model.forward(&pad_img, &pad_msk)?;
210
+ crop_imm(&padded_out, 0, 0, w, h).to_image()
211
+ };
212
+ Ok(out)
213
+ }
214
+
215
+ /// External-contour bounding boxes of a binarized mask. Equivalent to
216
+ /// IOPaint's `boxes_from_mask` (`cv2.findContours(RETR_EXTERNAL)` +
217
+ /// `cv2.boundingRect`). Hole borders are discarded.
218
+ pub fn boxes_from_mask(mask: &GrayImage) -> Vec<Xyxy> {
219
+ let contours = find_contours::<i32>(mask);
220
+ let (mw, mh) = mask.dimensions();
221
+ let mut boxes = Vec::new();
222
+ for contour in contours {
223
+ if contour.border_type != BorderType::Outer || contour.points.is_empty() {
224
+ continue;
225
+ }
226
+ let mut min_x = i32::MAX;
227
+ let mut min_y = i32::MAX;
228
+ let mut max_x = i32::MIN;
229
+ let mut max_y = i32::MIN;
230
+ for p in &contour.points {
231
+ min_x = min_x.min(p.x);
232
+ min_y = min_y.min(p.y);
233
+ max_x = max_x.max(p.x);
234
+ max_y = max_y.max(p.y);
235
+ }
236
+ let x1 = (min_x.max(0) as u32).min(mw);
237
+ let y1 = (min_y.max(0) as u32).min(mh);
238
+ let x2 = (max_x.saturating_add(1).max(0) as u32).min(mw);
239
+ let y2 = (max_y.saturating_add(1).max(0) as u32).min(mh);
240
+ if x2 > x1 && y2 > y1 {
241
+ boxes.push([x1, y1, x2, y2]);
242
+ }
243
+ }
244
+ boxes
245
+ }
246
+
247
+ /// Expand `box_xyxy` by `margin` pixels on each side, clamped to the image.
248
+ /// When the expanded rect would overflow one edge, shift inward so the full
249
+ /// `(box + margin*2)` footprint still fits when possible — matches IOPaint's
250
+ /// `_crop_box` (`iopaint/model/base.py`).
251
+ pub fn crop_box(
252
+ image: &RgbImage,
253
+ mask: &GrayImage,
254
+ box_xyxy: Xyxy,
255
+ margin: u32,
256
+ ) -> (RgbImage, GrayImage, Xyxy) {
257
+ let [bx1, by1, bx2, by2] = box_xyxy;
258
+ let (img_w, img_h) = image.dimensions();
259
+ let cx = (bx1 + bx2) / 2;
260
+ let cy = (by1 + by2) / 2;
261
+ let want_w = (bx2 - bx1) + margin * 2;
262
+ let want_h = (by2 - by1) + margin * 2;
263
+ let half_w = want_w / 2;
264
+ let half_h = want_h / 2;
265
+
266
+ // Signed desired bounds before clamping (i64 to preserve negatives).
267
+ let desire_l = cx as i64 - half_w as i64;
268
+ let desire_r = cx as i64 + half_w as i64;
269
+ let desire_t = cy as i64 - half_h as i64;
270
+ let desire_b = cy as i64 + half_h as i64;
271
+
272
+ let img_w_i = img_w as i64;
273
+ let img_h_i = img_h as i64;
274
+
275
+ let mut l = desire_l.max(0);
276
+ let mut r = desire_r.min(img_w_i);
277
+ let mut t = desire_t.max(0);
278
+ let mut b = desire_b.min(img_h_i);
279
+
280
+ if desire_l < 0 {
281
+ r = (r - desire_l).min(img_w_i);
282
+ }
283
+ if desire_r > img_w_i {
284
+ l = (l - (desire_r - img_w_i)).max(0);
285
+ }
286
+ if desire_t < 0 {
287
+ b = (b - desire_t).min(img_h_i);
288
+ }
289
+ if desire_b > img_h_i {
290
+ t = (t - (desire_b - img_h_i)).max(0);
291
+ }
292
+
293
+ let l = l.clamp(0, img_w_i) as u32;
294
+ let r = r.clamp(0, img_w_i) as u32;
295
+ let t = t.clamp(0, img_h_i) as u32;
296
+ let b = b.clamp(0, img_h_i) as u32;
297
+ let r = r.max(l + 1).min(img_w);
298
+ let b = b.max(t + 1).min(img_h);
299
+
300
+ let cw = r - l;
301
+ let ch = b - t;
302
+ let crop_img = crop_imm(image, l, t, cw, ch).to_image();
303
+ let crop_mask = crop_imm(mask, l, t, cw, ch).to_image();
304
+ (crop_img, crop_mask, [l, t, r, b])
305
+ }
306
+
307
+ /// Scale `(w, h)` so `max(w, h) == max_side`, preserving aspect ratio. No-op
308
+ /// when the image already fits. Mirrors IOPaint's `resize_max_size`.
309
+ pub fn scaled_dims(w: u32, h: u32, max_side: u32) -> (u32, u32) {
310
+ let longer = w.max(h);
311
+ if longer <= max_side {
312
+ return (w, h);
313
+ }
314
+ let ratio = f64::from(max_side) / f64::from(longer);
315
+ let nw = ((f64::from(w) * ratio).round() as u32).max(1);
316
+ let nh = ((f64::from(h) * ratio).round() as u32).max(1);
317
+ (nw, nh)
318
+ }
319
+
320
+ fn ceil_multiple(v: u32, m: u32) -> u32 {
321
+ if m == 0 {
322
+ return v;
323
+ }
324
+ let r = v % m;
325
+ if r == 0 { v } else { v + (m - r) }
326
+ }
327
+
328
+ fn rebinarize(mask: &GrayImage) -> GrayImage {
329
+ let mut out = mask.clone();
330
+ for p in out.pixels_mut() {
331
+ p.0[0] = if p.0[0] > 127 { 255 } else { 0 };
332
+ }
333
+ out
334
+ }
335
+
336
+ /// Numpy-style `mode="symmetric"` padding, but only on the right/bottom edges
337
+ /// (we only ever pad up to `pad_mod - 1` pixels to reach a modulo boundary).
338
+ fn symmetric_pad_rgb(img: &RgbImage, new_w: u32, new_h: u32) -> RgbImage {
339
+ let (w, h) = img.dimensions();
340
+ if new_w == w && new_h == h {
341
+ return img.clone();
342
+ }
343
+ let mut out = RgbImage::new(new_w, new_h);
344
+ for y in 0..new_h {
345
+ let sy = reflect_index(y, h);
346
+ for x in 0..new_w {
347
+ let sx = reflect_index(x, w);
348
+ out.put_pixel(x, y, *img.get_pixel(sx, sy));
349
+ }
350
+ }
351
+ out
352
+ }
353
+
354
+ fn symmetric_pad_gray(img: &GrayImage, new_w: u32, new_h: u32) -> GrayImage {
355
+ let (w, h) = img.dimensions();
356
+ if new_w == w && new_h == h {
357
+ return img.clone();
358
+ }
359
+ let mut out = GrayImage::new(new_w, new_h);
360
+ for y in 0..new_h {
361
+ let sy = reflect_index(y, h);
362
+ for x in 0..new_w {
363
+ let sx = reflect_index(x, w);
364
+ out.put_pixel(x, y, *img.get_pixel(sx, sy));
365
+ }
366
+ }
367
+ out
368
+ }
369
+
370
+ /// Reflect index for symmetric padding: `[0..len-1]` maps to itself, `[len..]`
371
+ /// reflects. Padding is always less than `len` for our use (right/bottom only,
372
+ /// by `pad_mod - 1` pixels max).
373
+ fn reflect_index(i: u32, len: u32) -> u32 {
374
+ if len == 0 {
375
+ return 0;
376
+ }
377
+ if i < len {
378
+ return i;
379
+ }
380
+ let past = i - len;
381
+ if past < len {
382
+ len - 1 - past
383
+ } else {
384
+ past % len
385
+ }
386
+ }
387
+
388
+ #[cfg(test)]
389
+ mod tests {
390
+ use super::*;
391
+ use image::{Luma, Rgb};
392
+
393
+ fn solid_rgb(w: u32, h: u32, rgb: [u8; 3]) -> RgbImage {
394
+ RgbImage::from_pixel(w, h, Rgb(rgb))
395
+ }
396
+
397
+ struct IdentityForward;
398
+ impl InpaintForward for IdentityForward {
399
+ fn forward(&self, image: &RgbImage, _mask: &GrayImage) -> Result<RgbImage> {
400
+ Ok(image.clone())
401
+ }
402
+ }
403
+
404
+ #[test]
405
+ fn ceil_multiple_rounds_up() {
406
+ assert_eq!(ceil_multiple(8, 8), 8);
407
+ assert_eq!(ceil_multiple(9, 8), 16);
408
+ assert_eq!(ceil_multiple(0, 8), 0);
409
+ }
410
+
411
+ #[test]
412
+ fn reflect_index_mirrors_beyond_boundary() {
413
+ // len=5 → symmetric pads: [..., 2, 1, 0, 1, 2, 3, 4, 4, 3, 2, ...]
414
+ // but our padding is right-side only so we only care about i >= len:
415
+ assert_eq!(reflect_index(0, 5), 0);
416
+ assert_eq!(reflect_index(4, 5), 4);
417
+ assert_eq!(reflect_index(5, 5), 4);
418
+ assert_eq!(reflect_index(6, 5), 3);
419
+ assert_eq!(reflect_index(9, 5), 0);
420
+ }
421
+
422
+ #[test]
423
+ fn scaled_dims_preserves_aspect() {
424
+ assert_eq!(scaled_dims(1600, 900, 1280), (1280, 720));
425
+ assert_eq!(scaled_dims(800, 600, 1280), (800, 600));
426
+ assert_eq!(scaled_dims(1000, 2000, 1280), (640, 1280));
427
+ }
428
+
429
+ #[test]
430
+ fn boxes_from_mask_finds_each_contour() {
431
+ let mut mask = GrayImage::new(100, 100);
432
+ for y in 10..20 {
433
+ for x in 10..25 {
434
+ mask.put_pixel(x, y, Luma([255]));
435
+ }
436
+ }
437
+ for y in 50..60 {
438
+ for x in 70..80 {
439
+ mask.put_pixel(x, y, Luma([255]));
440
+ }
441
+ }
442
+ let boxes = boxes_from_mask(&mask);
443
+ assert_eq!(boxes.len(), 2);
444
+ let mut sorted = boxes;
445
+ sorted.sort_by_key(|b| b[0]);
446
+ assert_eq!(sorted[0], [10, 10, 25, 20]);
447
+ assert_eq!(sorted[1], [70, 50, 80, 60]);
448
+ }
449
+
450
+ #[test]
451
+ fn boxes_from_mask_ignores_holes() {
452
+ // Filled rectangle with a hole in the middle.
453
+ let mut mask = GrayImage::new(50, 50);
454
+ for y in 5..45 {
455
+ for x in 5..45 {
456
+ mask.put_pixel(x, y, Luma([255]));
457
+ }
458
+ }
459
+ for y in 20..30 {
460
+ for x in 20..30 {
461
+ mask.put_pixel(x, y, Luma([0]));
462
+ }
463
+ }
464
+ let boxes = boxes_from_mask(&mask);
465
+ assert_eq!(boxes.len(), 1, "hole must not produce a second box");
466
+ }
467
+
468
+ #[test]
469
+ fn crop_box_expands_by_margin_additively() {
470
+ let img = solid_rgb(200, 200, [255, 255, 255]);
471
+ let mask = GrayImage::new(200, 200);
472
+ let (ci, _cm, [l, t, r, b]) = crop_box(&img, &mask, [80, 80, 120, 120], 20);
473
+ assert_eq!([l, t, r, b], [60, 60, 140, 140]);
474
+ assert_eq!(ci.dimensions(), (80, 80));
475
+ }
476
+
477
+ #[test]
478
+ fn crop_box_shifts_inward_at_edges() {
479
+ let img = solid_rgb(100, 100, [255, 255, 255]);
480
+ let mask = GrayImage::new(100, 100);
481
+ // Box hugging the left edge — desired crop starts at -10, so we shift
482
+ // the right edge outward to keep the full (box + margin*2) width.
483
+ let (_ci, _cm, [l, t, r, b]) = crop_box(&img, &mask, [0, 40, 20, 60], 10);
484
+ assert_eq!(l, 0);
485
+ assert_eq!(r, 40);
486
+ assert_eq!(t, 30);
487
+ assert_eq!(b, 70);
488
+ }
489
+
490
+ #[test]
491
+ fn crop_strategy_skips_when_mask_empty() {
492
+ let img = solid_rgb(900, 900, [50, 60, 70]);
493
+ let mask = GrayImage::new(900, 900);
494
+ let cfg = HdStrategyConfig::lama_default();
495
+ let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
496
+ assert_eq!(out.get_pixel(0, 0).0, [50, 60, 70]);
497
+ }
498
+
499
+ #[test]
500
+ fn resize_strategy_restores_unmasked_pixels() {
501
+ // Small image → even under Resize, unmasked pixels must be identical.
502
+ let mut img = solid_rgb(1600, 1200, [10, 20, 30]);
503
+ // One pixel in the masked area, different value.
504
+ img.put_pixel(500, 500, Rgb([200, 200, 200]));
505
+ let mut mask = GrayImage::new(1600, 1200);
506
+ mask.put_pixel(500, 500, Luma([255]));
507
+
508
+ let cfg = HdStrategyConfig {
509
+ strategy: HdStrategy::Resize,
510
+ resize_limit: 640,
511
+ ..HdStrategyConfig::lama_default()
512
+ };
513
+ let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
514
+ assert_eq!(out.get_pixel(0, 0).0, [10, 20, 30]);
515
+ assert_eq!(out.get_pixel(1599, 1199).0, [10, 20, 30]);
516
+ }
517
+
518
+ #[test]
519
+ fn crop_strategy_paste_bounds() {
520
+ // Two masked blobs → two crops → full image untouched outside crops.
521
+ let img = solid_rgb(1200, 1200, [100, 100, 100]);
522
+ let mut mask = GrayImage::new(1200, 1200);
523
+ for y in 100..120 {
524
+ for x in 100..120 {
525
+ mask.put_pixel(x, y, Luma([255]));
526
+ }
527
+ }
528
+ for y in 900..920 {
529
+ for x in 900..920 {
530
+ mask.put_pixel(x, y, Luma([255]));
531
+ }
532
+ }
533
+ let cfg = HdStrategyConfig::lama_default();
534
+ let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
535
+ // IdentityForward is a no-op, so output == input everywhere.
536
+ assert_eq!(out.get_pixel(0, 0).0, [100, 100, 100]);
537
+ assert_eq!(out.get_pixel(500, 500).0, [100, 100, 100]);
538
+ }
539
+ }
koharu-ml/src/lama/mod.rs CHANGED
@@ -1,23 +1,18 @@
1
  mod fft;
2
  mod model;
3
 
4
- use crate::types::TextRegion;
5
  use anyhow::{Result, bail};
6
  use candle_core::{DType, Device, Tensor};
7
- use image::{
8
- DynamicImage, GenericImageView, GrayImage, Luma, Rgb, RgbImage,
9
- imageops::{crop_imm, replace},
10
- };
11
- use imageproc::{
12
- contours::find_contours, distance_transform::Norm, drawing::draw_polygon_mut, edges::canny,
13
- filter::gaussian_blur_f32, morphology::dilate, point::Point,
14
- };
15
  use koharu_runtime::RuntimeManager;
16
  use tracing::instrument;
17
 
18
  use crate::{
19
  device,
20
- inpainting::{binarize_mask, extract_alpha, restore_alpha_channel},
 
 
 
21
  loading,
22
  };
23
 
@@ -31,20 +26,6 @@ koharu_runtime::declare_hf_model_package!(
31
  order: 130,
32
  );
33
 
34
- const BALLOON_CANNY_LOW: f32 = 70.0;
35
- const BALLOON_CANNY_HIGH: f32 = 140.0;
36
- const BALLOON_WINDOW_RATIO: f64 = 1.7;
37
- const BALLOON_WINDOW_ASPECT_RATIO: f64 = 1.0;
38
- const SIMPLE_BG_THRESHOLD_LOW_VARIANCE: f64 = 10.0;
39
- const SIMPLE_BG_THRESHOLD_HIGH_VARIANCE: f64 = 7.0;
40
- const SIMPLE_BG_CHANNEL_STD_SWITCH: f64 = 1.0;
41
- type Xyxy = [u32; 4];
42
-
43
- struct BalloonMasks {
44
- balloon_mask: GrayImage,
45
- non_text_mask: GrayImage,
46
- }
47
-
48
  pub struct Lama {
49
  model: model::Lama,
50
  device: Device,
@@ -64,33 +45,21 @@ impl Lama {
64
  Ok(Self { model, device })
65
  }
66
 
67
- #[instrument(level = "debug", skip_all)]
68
- fn forward(&self, image: &Tensor, mask: &Tensor) -> Result<Tensor> {
69
- self.model.forward(image, mask)
70
- }
71
-
72
- #[instrument(level = "debug", skip_all)]
73
- pub fn inference_model(
74
- &self,
75
- image: &DynamicImage,
76
- mask: &DynamicImage,
77
- ) -> Result<DynamicImage> {
78
- let (image_tensor, mask_tensor) = self.preprocess(image, mask)?;
79
- let output = self.forward(&image_tensor, &mask_tensor)?;
80
- self.postprocess(&output)
81
- }
82
-
83
  #[instrument(level = "debug", skip_all)]
84
  pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
85
- self.inference_with_blocks(image, mask, None)
86
  }
87
 
 
 
 
88
  #[instrument(level = "debug", skip_all)]
89
- pub fn inference_with_blocks(
90
  &self,
91
  image: &DynamicImage,
92
  mask: &DynamicImage,
93
- text_blocks: Option<&[TextRegion]>,
94
  ) -> Result<DynamicImage> {
95
  if image.dimensions() != mask.dimensions() {
96
  bail!(
@@ -101,12 +70,9 @@ impl Lama {
101
  }
102
 
103
  let binary_mask = binarize_mask(mask);
104
- let output_rgb = if let Some(blocks) = text_blocks.filter(|blocks| !blocks.is_empty()) {
105
- let image_rgb = image.to_rgb8();
106
- self.inference_blockwise(&image_rgb, &binary_mask, blocks)?
107
- } else {
108
- self.inference_crop(&image.to_rgb8(), &binary_mask)?
109
- };
110
 
111
  if image.color().has_alpha() {
112
  let original_alpha = image.to_rgba8();
@@ -119,90 +85,22 @@ impl Lama {
119
  }
120
 
121
  #[instrument(level = "debug", skip_all)]
122
- fn inference_crop(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
123
- if let Some(filled) = try_fill_balloon(image, mask) {
124
- return Ok(filled);
125
- }
126
-
127
- self.inference_model_rgb(image, mask)
128
- }
129
-
130
- #[instrument(level = "debug", skip_all)]
131
- fn inference_blockwise(
132
- &self,
133
- image: &RgbImage,
134
- mask: &GrayImage,
135
- text_blocks: &[TextRegion],
136
- ) -> Result<RgbImage> {
137
- let (im_w, im_h) = image.dimensions();
138
- let mut inpainted = image.clone();
139
- let mut working_mask = mask.clone();
140
-
141
- for block in text_blocks {
142
- let Some(xyxy) = block_xyxy(block, im_w, im_h) else {
143
- continue;
144
- };
145
- let xyxy_e = enlarge_window(
146
- xyxy,
147
- im_w,
148
- im_h,
149
- BALLOON_WINDOW_RATIO,
150
- BALLOON_WINDOW_ASPECT_RATIO,
151
- );
152
- let crop_width = xyxy_e[2].saturating_sub(xyxy_e[0]);
153
- let crop_height = xyxy_e[3].saturating_sub(xyxy_e[1]);
154
- if crop_width == 0 || crop_height == 0 {
155
- continue;
156
- }
157
-
158
- let crop_image =
159
- crop_imm(&inpainted, xyxy_e[0], xyxy_e[1], crop_width, crop_height).to_image();
160
- let crop_mask =
161
- crop_imm(&working_mask, xyxy_e[0], xyxy_e[1], crop_width, crop_height).to_image();
162
-
163
- let output = if count_nonzero(&crop_mask) == 0 {
164
- crop_image
165
- } else if let Some(filled) = try_fill_balloon(&crop_image, &crop_mask) {
166
- filled
167
- } else {
168
- self.inference_model_rgb(&crop_image, &crop_mask)?
169
- };
170
-
171
- replace(
172
- &mut inpainted,
173
- &output,
174
- i64::from(xyxy_e[0]),
175
- i64::from(xyxy_e[1]),
176
- );
177
- clear_mask_bbox(&mut working_mask, xyxy);
178
- }
179
-
180
- Ok(inpainted)
181
  }
182
 
183
  #[instrument(level = "debug", skip_all)]
184
- fn inference_model_rgb(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
185
- Ok(self
186
- .inference_model(
187
- &DynamicImage::ImageRgb8(image.clone()),
188
- &DynamicImage::ImageLuma8(mask.clone()),
189
- )?
190
- .to_rgb8())
191
  }
192
 
193
  #[instrument(level = "debug", skip_all)]
194
- fn preprocess(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<(Tensor, Tensor)> {
195
- if image.dimensions() != mask.dimensions() {
196
- bail!(
197
- "image and mask dimensions dismatch: image is {:?}, mask is {:?}",
198
- image.dimensions(),
199
- mask.dimensions()
200
- );
201
- }
202
  let (w, h) = (image.width() as usize, image.height() as usize);
203
-
204
- let rgb = image.to_rgb8().into_raw();
205
- let luma = mask.to_luma8().into_raw();
206
 
207
  let image_tensor = (Tensor::from_vec(rgb, (1, h, w, 3), &self.device)?
208
  .permute((0, 3, 1, 2))?
@@ -218,7 +116,7 @@ impl Lama {
218
  }
219
 
220
  #[instrument(level = "debug", skip_all)]
221
- fn postprocess(&self, output: &Tensor) -> Result<DynamicImage> {
222
  let output = output.squeeze(0)?;
223
  let (channels, height, width) = output.dims3()?;
224
  if channels != 3 {
@@ -229,439 +127,37 @@ impl Lama {
229
  .permute((1, 2, 0))?
230
  .to_dtype(DType::U8)?;
231
  let raw: Vec<u8> = output.flatten_all()?.to_vec1()?;
232
- let image = RgbImage::from_raw(width as u32, height as u32, raw)
233
- .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))?;
234
- Ok(DynamicImage::ImageRgb8(image))
235
- }
236
- }
237
-
238
- fn block_xyxy(block: &TextRegion, width: u32, height: u32) -> Option<Xyxy> {
239
- let x1 = block.x.floor().max(0.0) as u32;
240
- let y1 = block.y.floor().max(0.0) as u32;
241
- let x2 = (block.x + block.width).ceil().max(block.x.floor()) as u32;
242
- let y2 = (block.y + block.height).ceil().max(block.y.floor()) as u32;
243
-
244
- let x1 = x1.min(width);
245
- let y1 = y1.min(height);
246
- let x2 = x2.min(width);
247
- let y2 = y2.min(height);
248
-
249
- if x2 <= x1 || y2 <= y1 {
250
- return None;
251
- }
252
-
253
- Some([x1, y1, x2, y2])
254
- }
255
-
256
- fn enlarge_window(rect: Xyxy, im_w: u32, im_h: u32, ratio: f64, aspect_ratio: f64) -> Xyxy {
257
- debug_assert!(ratio > 1.0);
258
-
259
- let [x1, y1, x2, y2] = rect;
260
- let w = f64::from(x2.saturating_sub(x1));
261
- let h = f64::from(y2.saturating_sub(y1));
262
- if w <= 0.0 || h <= 0.0 || aspect_ratio <= 0.0 {
263
- return [0, 0, 0, 0];
264
- }
265
-
266
- let a = aspect_ratio;
267
- let b = w + h * aspect_ratio;
268
- let c = (1.0 - ratio) * w * h;
269
- let discriminant = (b * b - 4.0 * a * c).max(0.0);
270
- let delta = ((-b + discriminant.sqrt()) / (2.0 * a) / 2.0).round();
271
- let mut delta_h = delta.max(0.0) as u32;
272
- let mut delta_w = (delta * aspect_ratio).round().max(0.0) as u32;
273
-
274
- delta_w = delta_w.min(x1).min(im_w.saturating_sub(x2));
275
- delta_h = delta_h.min(y1).min(im_h.saturating_sub(y2));
276
-
277
- [
278
- x1.saturating_sub(delta_w),
279
- y1.saturating_sub(delta_h),
280
- (x2 + delta_w).min(im_w),
281
- (y2 + delta_h).min(im_h),
282
- ]
283
- }
284
-
285
- fn try_fill_balloon(image: &RgbImage, mask: &GrayImage) -> Option<RgbImage> {
286
- let masks = extract_balloon_mask(image, mask)?;
287
- let average_bg_color = median_rgb(image, &masks.non_text_mask)?;
288
- let std_rgb = color_stddev(image, &masks.non_text_mask, average_bg_color);
289
- let inpaint_thresh = if stddev3(std_rgb) > SIMPLE_BG_CHANNEL_STD_SWITCH {
290
- SIMPLE_BG_THRESHOLD_HIGH_VARIANCE
291
- } else {
292
- SIMPLE_BG_THRESHOLD_LOW_VARIANCE
293
- };
294
- let std_max = std_rgb.into_iter().fold(0.0, f64::max);
295
-
296
- if std_max >= inpaint_thresh {
297
- return None;
298
- }
299
-
300
- let mut result = image.clone();
301
- let fill = [
302
- average_bg_color[0] as u8,
303
- average_bg_color[1] as u8,
304
- average_bg_color[2] as u8,
305
- ];
306
- for (x, y, pixel) in masks.balloon_mask.enumerate_pixels() {
307
- if pixel.0[0] > 0 {
308
- result.put_pixel(x, y, Rgb(fill));
309
- }
310
- }
311
-
312
- Some(result)
313
- }
314
-
315
- fn extract_balloon_mask(image: &RgbImage, mask: &GrayImage) -> Option<BalloonMasks> {
316
- if image.dimensions() != mask.dimensions() {
317
- return None;
318
- }
319
-
320
- let text_bbox = non_zero_bbox(mask)?;
321
- let text_sum = count_nonzero(mask);
322
- if text_sum == 0 {
323
- return None;
324
- }
325
-
326
- let gray = DynamicImage::ImageRgb8(image.clone()).to_luma8();
327
- let blurred = gaussian_blur_f32(&gray, 1.0);
328
- let mut cannyed = canny(&blurred, BALLOON_CANNY_LOW, BALLOON_CANNY_HIGH);
329
- cannyed = dilate(&cannyed, Norm::LInf, 1);
330
- draw_binary_border(&mut cannyed);
331
- subtract_binary_mask(&mut cannyed, mask);
332
-
333
- let contours = find_contours::<i32>(&cannyed);
334
- let (width, height) = cannyed.dimensions();
335
- let mut best_mask = None;
336
- let mut best_area = f64::INFINITY;
337
-
338
- for contour in contours {
339
- let Some(polygon) = contour_polygon(&contour.points) else {
340
- continue;
341
- };
342
- let bbox = polygon_bbox(&polygon)?;
343
- if bbox[0] > text_bbox[0]
344
- || bbox[1] > text_bbox[1]
345
- || bbox[2] < text_bbox[2]
346
- || bbox[3] < text_bbox[3]
347
- {
348
- continue;
349
- }
350
-
351
- let mut candidate = GrayImage::new(width, height);
352
- draw_polygon_mut(&mut candidate, &polygon, Luma([255u8]));
353
- if count_overlap(&candidate, mask) < text_sum {
354
- continue;
355
- }
356
-
357
- let area = polygon_area(&polygon);
358
- if area < best_area {
359
- best_area = area;
360
- best_mask = Some(candidate);
361
- }
362
- }
363
-
364
- let balloon_mask = best_mask?;
365
- let mut non_text_mask = balloon_mask.clone();
366
- for (x, y, pixel) in mask.enumerate_pixels() {
367
- if pixel.0[0] > 0 {
368
- non_text_mask.put_pixel(x, y, Luma([0]));
369
- }
370
  }
371
-
372
- Some(BalloonMasks {
373
- balloon_mask,
374
- non_text_mask,
375
- })
376
- }
377
-
378
- fn contour_polygon(points: &[Point<i32>]) -> Option<Vec<Point<i32>>> {
379
- let mut polygon = points.to_vec();
380
- if polygon.len() < 3 {
381
- return None;
382
- }
383
- if polygon.first() == polygon.last() {
384
- polygon.pop();
385
- }
386
- if polygon.len() < 3 {
387
- return None;
388
- }
389
- Some(polygon)
390
  }
391
 
392
- fn polygon_bbox(points: &[Point<i32>]) -> Option<Xyxy> {
393
- let first = points.first()?;
394
- let mut min_x = first.x;
395
- let mut min_y = first.y;
396
- let mut max_x = first.x;
397
- let mut max_y = first.y;
398
- for point in points.iter().skip(1) {
399
- min_x = min_x.min(point.x);
400
- min_y = min_y.min(point.y);
401
- max_x = max_x.max(point.x);
402
- max_y = max_y.max(point.y);
403
- }
404
-
405
- Some([
406
- min_x.max(0) as u32,
407
- min_y.max(0) as u32,
408
- max_x.max(min_x).saturating_add(1) as u32,
409
- max_y.max(min_y).saturating_add(1) as u32,
410
- ])
411
- }
412
-
413
- fn polygon_area(points: &[Point<i32>]) -> f64 {
414
- let mut area = 0.0;
415
- for index in 0..points.len() {
416
- let current = points[index];
417
- let next = points[(index + 1) % points.len()];
418
- area += f64::from(current.x) * f64::from(next.y) - f64::from(next.x) * f64::from(current.y);
419
- }
420
- area.abs() * 0.5
421
  }
422
 
423
- fn draw_binary_border(image: &mut GrayImage) {
424
- let width = image.width();
425
- let height = image.height();
426
- if width == 0 || height == 0 {
427
- return;
428
- }
429
-
430
- for x in 0..width {
431
- image.put_pixel(x, 0, Luma([255]));
432
- image.put_pixel(x, height - 1, Luma([255]));
433
- }
434
- for y in 0..height {
435
- image.put_pixel(0, y, Luma([255]));
436
- image.put_pixel(width - 1, y, Luma([255]));
437
- }
438
- }
439
-
440
- fn subtract_binary_mask(image: &mut GrayImage, mask: &GrayImage) {
441
- for (x, y, pixel) in image.enumerate_pixels_mut() {
442
- if mask.get_pixel(x, y).0[0] > 0 {
443
- pixel.0[0] = 0;
444
  }
445
- }
446
- }
447
-
448
- fn non_zero_bbox(mask: &GrayImage) -> Option<Xyxy> {
449
- let (width, height) = mask.dimensions();
450
- let mut min_x = width;
451
- let mut min_y = height;
452
- let mut max_x = 0;
453
- let mut max_y = 0;
454
- let mut found = false;
455
-
456
- for (x, y, pixel) in mask.enumerate_pixels() {
457
- if pixel.0[0] == 0 {
458
- continue;
459
- }
460
- found = true;
461
- min_x = min_x.min(x);
462
- min_y = min_y.min(y);
463
- max_x = max_x.max(x);
464
- max_y = max_y.max(y);
465
- }
466
-
467
- found.then_some([
468
- min_x,
469
- min_y,
470
- max_x.saturating_add(1),
471
- max_y.saturating_add(1),
472
- ])
473
- }
474
-
475
- fn clear_mask_bbox(mask: &mut GrayImage, bbox: Xyxy) {
476
- for y in bbox[1]..bbox[3] {
477
- for x in bbox[0]..bbox[2] {
478
- mask.put_pixel(x, y, Luma([0]));
479
- }
480
- }
481
- }
482
-
483
- fn count_nonzero(mask: &GrayImage) -> u32 {
484
- mask.pixels().filter(|pixel| pixel.0[0] > 0).count() as u32
485
- }
486
-
487
- fn count_overlap(left: &GrayImage, right: &GrayImage) -> u32 {
488
- left.pixels()
489
- .zip(right.pixels())
490
- .filter(|(l, r)| l.0[0] > 0 && r.0[0] > 0)
491
- .count() as u32
492
- }
493
-
494
- fn median_rgb(image: &RgbImage, mask: &GrayImage) -> Option<[f64; 3]> {
495
- let mut channels = [Vec::new(), Vec::new(), Vec::new()];
496
- for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
497
- if mask_pixel.0[0] == 0 {
498
- continue;
499
- }
500
- channels[0].push(pixel.0[0]);
501
- channels[1].push(pixel.0[1]);
502
- channels[2].push(pixel.0[2]);
503
- }
504
-
505
- Some([
506
- median_channel(&channels[0])?,
507
- median_channel(&channels[1])?,
508
- median_channel(&channels[2])?,
509
- ])
510
- }
511
-
512
- fn median_channel(values: &[u8]) -> Option<f64> {
513
- if values.is_empty() {
514
- return None;
515
- }
516
-
517
- let mut values = values.to_vec();
518
- values.sort_unstable();
519
- let mid = values.len() / 2;
520
- if values.len().is_multiple_of(2) {
521
- Some((f64::from(values[mid - 1]) + f64::from(values[mid])) / 2.0)
522
- } else {
523
- Some(f64::from(values[mid]))
524
- }
525
- }
526
-
527
- fn color_stddev(image: &RgbImage, mask: &GrayImage, median: [f64; 3]) -> [f64; 3] {
528
- let mut sum_sq = [0.0; 3];
529
- let mut count = 0.0;
530
-
531
- for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
532
- if mask_pixel.0[0] == 0 {
533
- continue;
534
- }
535
- count += 1.0;
536
- for channel in 0..3 {
537
- let diff = f64::from(pixel.0[channel]) - median[channel];
538
- sum_sq[channel] += diff * diff;
539
  }
 
540
  }
541
-
542
- if count == 0.0 {
543
- return [f64::INFINITY; 3];
544
- }
545
-
546
- [
547
- (sum_sq[0] / count).sqrt(),
548
- (sum_sq[1] / count).sqrt(),
549
- (sum_sq[2] / count).sqrt(),
550
- ]
551
- }
552
-
553
- fn stddev3(values: [f64; 3]) -> f64 {
554
- let mean = values.iter().sum::<f64>() / 3.0;
555
- let variance = values
556
- .iter()
557
- .map(|value| {
558
- let diff = value - mean;
559
- diff * diff
560
- })
561
- .sum::<f64>()
562
- / 3.0;
563
- variance.sqrt()
564
  }
565
 
566
  #[cfg(test)]
567
  mod tests {
568
- use super::{
569
- BALLOON_WINDOW_ASPECT_RATIO, BALLOON_WINDOW_RATIO, clear_mask_bbox, count_nonzero,
570
- enlarge_window, extract_balloon_mask, try_fill_balloon,
571
- };
572
  use crate::inpainting::restore_alpha_channel;
573
- use crate::types::TextRegion;
574
  use image::{GrayImage, Luma, Rgb, RgbImage};
575
- use imageproc::drawing::draw_hollow_rect_mut;
576
- use imageproc::rect::Rect;
577
 
578
  const ALPHA_RING_RADIUS: u8 = 7;
579
 
580
- #[test]
581
- fn enlarge_window_matches_ratio_1_7_reference() {
582
- let enlarged = enlarge_window(
583
- [10, 20, 50, 60],
584
- 200,
585
- 150,
586
- BALLOON_WINDOW_RATIO,
587
- BALLOON_WINDOW_ASPECT_RATIO,
588
- );
589
-
590
- assert_eq!(enlarged, [4, 14, 56, 66]);
591
- }
592
-
593
- #[test]
594
- fn extract_balloon_mask_prefers_smallest_covering_contour() {
595
- let mut image = RgbImage::from_pixel(80, 80, Rgb([255, 255, 255]));
596
- draw_hollow_rect_mut(&mut image, Rect::at(4, 4).of_size(72, 72), Rgb([0, 0, 0]));
597
- draw_hollow_rect_mut(&mut image, Rect::at(20, 20).of_size(28, 20), Rgb([0, 0, 0]));
598
-
599
- let mut mask = GrayImage::new(80, 80);
600
- for y in 24..36 {
601
- for x in 24..44 {
602
- mask.put_pixel(x, y, Luma([255]));
603
- }
604
- }
605
-
606
- let masks = extract_balloon_mask(&image, &mask).expect("balloon should be detected");
607
- let balloon_pixels = count_nonzero(&masks.balloon_mask);
608
-
609
- assert!(
610
- balloon_pixels < 900,
611
- "expected inner contour fill, got {balloon_pixels}"
612
- );
613
- assert!(
614
- balloon_pixels > 250,
615
- "expected meaningful bubble area, got {balloon_pixels}"
616
- );
617
- }
618
-
619
- #[test]
620
- fn simple_balloon_chooses_fill_but_textured_balloon_does_not() {
621
- let mut flat = RgbImage::from_pixel(64, 64, Rgb([240, 240, 240]));
622
- draw_hollow_rect_mut(&mut flat, Rect::at(8, 8).of_size(48, 32), Rgb([0, 0, 0]));
623
-
624
- let mut mask = GrayImage::new(64, 64);
625
- for y in 18..30 {
626
- for x in 18..46 {
627
- mask.put_pixel(x, y, Luma([255]));
628
- }
629
- }
630
-
631
- assert!(try_fill_balloon(&flat, &mask).is_some());
632
-
633
- let mut textured = flat.clone();
634
- for y in 9..39 {
635
- for x in 9..55 {
636
- let noise = ((x + y) % 23) as u8;
637
- textured.put_pixel(
638
- x,
639
- y,
640
- Rgb([200 + noise, 210 + (noise / 2), 220 - (noise / 3)]),
641
- );
642
- }
643
- }
644
-
645
- assert!(try_fill_balloon(&textured, &mask).is_none());
646
- }
647
-
648
- #[test]
649
- fn clearing_mask_consumes_only_original_bbox() {
650
- let mut mask = GrayImage::from_pixel(32, 32, Luma([255]));
651
- clear_mask_bbox(&mut mask, [8, 10, 16, 18]);
652
-
653
- for y in 10..18 {
654
- for x in 8..16 {
655
- assert_eq!(mask.get_pixel(x, y).0[0], 0);
656
- }
657
- }
658
-
659
- assert_eq!(mask.get_pixel(7, 10).0[0], 255);
660
- assert_eq!(mask.get_pixel(16, 17).0[0], 255);
661
- assert_eq!(mask.get_pixel(8, 9).0[0], 255);
662
- assert_eq!(mask.get_pixel(15, 18).0[0], 255);
663
- }
664
-
665
  #[test]
666
  fn rgba_alpha_restore_uses_surrounding_ring() {
667
  let image = RgbImage::from_pixel(32, 32, Rgb([20, 30, 40]));
@@ -685,18 +181,4 @@ mod tests {
685
  assert_eq!(restored.get_pixel(15, 15).0[3], 64);
686
  assert_eq!(restored.get_pixel(2, 2).0[3], 255);
687
  }
688
-
689
- #[test]
690
- fn block_xyxy_rounds_and_clamps_document_coords() {
691
- let block = TextRegion {
692
- x: 10.2,
693
- y: 20.7,
694
- width: 15.1,
695
- height: 9.4,
696
- ..Default::default()
697
- };
698
-
699
- let bbox = super::block_xyxy(&block, 100, 100).expect("bbox");
700
- assert_eq!(bbox, [10, 20, 26, 31]);
701
- }
702
  }
 
1
  mod fft;
2
  mod model;
3
 
 
4
  use anyhow::{Result, bail};
5
  use candle_core::{DType, Device, Tensor};
6
+ use image::{DynamicImage, GenericImageView, GrayImage, RgbImage};
 
 
 
 
 
 
 
7
  use koharu_runtime::RuntimeManager;
8
  use tracing::instrument;
9
 
10
  use crate::{
11
  device,
12
+ inpainting::{
13
+ HdStrategyConfig, InpaintForward, binarize_mask, extract_alpha, restore_alpha_channel,
14
+ run_inpaint, try_fill_balloon,
15
+ },
16
  loading,
17
  };
18
 
 
26
  order: 130,
27
  );
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  pub struct Lama {
30
  model: model::Lama,
31
  device: Device,
 
45
  Ok(Self { model, device })
46
  }
47
 
48
+ /// Run inpainting with the manga-tuned default strategy (Crop, 800/128/1280).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  #[instrument(level = "debug", skip_all)]
50
  pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
51
+ self.inference_with_config(image, mask, &HdStrategyConfig::lama_default())
52
  }
53
 
54
+ /// Run inpainting with a caller-supplied [`HdStrategyConfig`]. Use this to
55
+ /// pick a different strategy (Original / Resize) or tune the trigger /
56
+ /// margin / resize-limit for GPUs with less VRAM.
57
  #[instrument(level = "debug", skip_all)]
58
+ pub fn inference_with_config(
59
  &self,
60
  image: &DynamicImage,
61
  mask: &DynamicImage,
62
+ cfg: &HdStrategyConfig,
63
  ) -> Result<DynamicImage> {
64
  if image.dimensions() != mask.dimensions() {
65
  bail!(
 
70
  }
71
 
72
  let binary_mask = binarize_mask(mask);
73
+ let image_rgb = image.to_rgb8();
74
+ let forward = LamaForward { lama: self };
75
+ let output_rgb = run_inpaint(&forward, &image_rgb, &binary_mask, cfg)?;
 
 
 
76
 
77
  if image.color().has_alpha() {
78
  let original_alpha = image.to_rgba8();
 
85
  }
86
 
87
  #[instrument(level = "debug", skip_all)]
88
+ fn forward(&self, image: &Tensor, mask: &Tensor) -> Result<Tensor> {
89
+ self.model.forward(image, mask)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  }
91
 
92
  #[instrument(level = "debug", skip_all)]
93
+ fn inference_model(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
94
+ let (image_tensor, mask_tensor) = self.preprocess(image, mask)?;
95
+ let output = self.forward(&image_tensor, &mask_tensor)?;
96
+ self.postprocess(&output)
 
 
 
97
  }
98
 
99
  #[instrument(level = "debug", skip_all)]
100
+ fn preprocess(&self, image: &RgbImage, mask: &GrayImage) -> Result<(Tensor, Tensor)> {
 
 
 
 
 
 
 
101
  let (w, h) = (image.width() as usize, image.height() as usize);
102
+ let rgb = image.clone().into_raw();
103
+ let luma = mask.clone().into_raw();
 
104
 
105
  let image_tensor = (Tensor::from_vec(rgb, (1, h, w, 3), &self.device)?
106
  .permute((0, 3, 1, 2))?
 
116
  }
117
 
118
  #[instrument(level = "debug", skip_all)]
119
+ fn postprocess(&self, output: &Tensor) -> Result<RgbImage> {
120
  let output = output.squeeze(0)?;
121
  let (channels, height, width) = output.dims3()?;
122
  if channels != 3 {
 
127
  .permute((1, 2, 0))?
128
  .to_dtype(DType::U8)?;
129
  let raw: Vec<u8> = output.flatten_all()?.to_vec1()?;
130
+ RgbImage::from_raw(width as u32, height as u32, raw)
131
+ .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
 
135
+ /// [`InpaintForward`] impl used by the HD-strategy dispatcher. Applies the
136
+ /// balloon-fill fast path on a per-crop basis before falling back to the
137
+ /// model forward flat-background speech bubbles skip the model entirely.
138
+ struct LamaForward<'a> {
139
+ lama: &'a Lama,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  }
141
 
142
+ impl InpaintForward for LamaForward<'_> {
143
+ fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
144
+ if mask.pixels().all(|p| p.0[0] == 0) {
145
+ return Ok(image.clone());
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  }
147
+ if let Some(filled) = try_fill_balloon(image, mask) {
148
+ return Ok(filled);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  }
150
+ self.lama.inference_model(image, mask)
151
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  }
153
 
154
  #[cfg(test)]
155
  mod tests {
 
 
 
 
156
  use crate::inpainting::restore_alpha_channel;
 
157
  use image::{GrayImage, Luma, Rgb, RgbImage};
 
 
158
 
159
  const ALPHA_RING_RADIUS: u8 = 7;
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  #[test]
162
  fn rgba_alpha_restore_uses_surrounding_ring() {
163
  let image = RgbImage::from_pixel(32, 32, Rgb([20, 30, 40]));
 
181
  assert_eq!(restored.get_pixel(15, 15).0[3], 64);
182
  assert_eq!(restored.get_pixel(2, 2).0[3], 255);
183
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  }
koharu-ml/src/lib.rs CHANGED
@@ -1,10 +1,10 @@
1
  mod hf_hub;
2
- mod inpainting;
3
 
4
  pub mod aot_inpainting;
5
  pub mod comic_text_bubble_detector;
6
  pub mod comic_text_detector;
7
  pub mod font_detector;
 
8
  pub mod lama;
9
  pub mod loading;
10
  pub mod manga_ocr;
 
1
  mod hf_hub;
 
2
 
3
  pub mod aot_inpainting;
4
  pub mod comic_text_bubble_detector;
5
  pub mod comic_text_detector;
6
  pub mod font_detector;
7
+ pub mod inpainting;
8
  pub mod lama;
9
  pub mod loading;
10
  pub mod manga_ocr;
koharu-ml/tests/inpaint.rs CHANGED
@@ -1,7 +1,6 @@
1
  use std::path::Path;
2
 
3
  use image::GenericImageView;
4
- use koharu_ml::TextRegion;
5
  use koharu_ml::aot_inpainting::AotInpainting;
6
  use koharu_ml::lama::Lama;
7
 
@@ -40,48 +39,6 @@ async fn lama_inpainting_updates_masked_region() -> anyhow::Result<()> {
40
  Ok(())
41
  }
42
 
43
- #[tokio::test]
44
- #[ignore]
45
- async fn lama_block_aware_inpainting_returns_same_size() -> anyhow::Result<()> {
46
- let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures");
47
-
48
- let runtime = support::cpu_runtime();
49
- let lama = Lama::load(&runtime, false).await?;
50
- let base = image::open(fixtures.join("image.jpg"))?;
51
- let mask = image::open(fixtures.join("mask.png"))?;
52
- let mask_luma = mask.to_luma8();
53
-
54
- let mut min_x = mask_luma.width();
55
- let mut min_y = mask_luma.height();
56
- let mut max_x = 0;
57
- let mut max_y = 0;
58
- let mut found = false;
59
- for (x, y, pixel) in mask_luma.enumerate_pixels() {
60
- if pixel.0[0] == 0 {
61
- continue;
62
- }
63
- found = true;
64
- min_x = min_x.min(x);
65
- min_y = min_y.min(y);
66
- max_x = max_x.max(x);
67
- max_y = max_y.max(y);
68
- }
69
-
70
- assert!(found, "mask fixture should contain a non-empty region");
71
-
72
- let block = TextRegion {
73
- x: min_x as f32,
74
- y: min_y as f32,
75
- width: max_x.saturating_sub(min_x).saturating_add(1) as f32,
76
- height: max_y.saturating_sub(min_y).saturating_add(1) as f32,
77
- ..Default::default()
78
- };
79
-
80
- let output = lama.inference_with_blocks(&base, &mask, Some(&[block]))?;
81
- assert_eq!(output.dimensions(), base.dimensions());
82
- Ok(())
83
- }
84
-
85
  #[tokio::test]
86
  #[ignore]
87
  async fn aot_inpainting_updates_masked_region() -> anyhow::Result<()> {
 
1
  use std::path::Path;
2
 
3
  use image::GenericImageView;
 
4
  use koharu_ml::aot_inpainting::AotInpainting;
5
  use koharu_ml::lama::Lama;
6
 
 
39
  Ok(())
40
  }
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  #[tokio::test]
43
  #[ignore]
44
  async fn aot_inpainting_updates_masked_region() -> anyhow::Result<()> {
koharu-rpc/src/mcp/mod.rs CHANGED
@@ -199,7 +199,7 @@ impl KoharuServer {
199
  let cpu = app.cpu_only();
200
  tokio::spawn(async move {
201
  let _ = koharu_app::pipeline::run(
202
- session, registry, runtime, cpu, llm, renderer, spec, cancel, None,
203
  )
204
  .await;
205
  });
 
199
  let cpu = app.cpu_only();
200
  tokio::spawn(async move {
201
  let _ = koharu_app::pipeline::run(
202
+ session, registry, runtime, cpu, llm, renderer, spec, cancel, None, None,
203
  )
204
  .await;
205
  });
koharu-rpc/src/routes/pipelines.rs CHANGED
@@ -9,10 +9,12 @@ use std::sync::atomic::AtomicBool;
9
 
10
  use axum::Json;
11
  use axum::extract::State;
12
- use koharu_app::pipeline::{self, PipelineRunOptions, PipelineSpec, ProgressTick, Scope};
 
 
13
  use koharu_core::{
14
- AppEvent, JobFinishedEvent, JobStatus, JobSummary, PageId, PipelineProgress, PipelineStatus,
15
- Region,
16
  };
17
  use serde::{Deserialize, Serialize};
18
  use utoipa_axum::{router::OpenApiRouter, routes};
@@ -123,6 +125,17 @@ async fn start_pipeline(
123
  overall_percent: tick.overall_percent,
124
  }));
125
  });
 
 
 
 
 
 
 
 
 
 
 
126
  tokio::spawn(async move {
127
  let result = pipeline::run(
128
  session_c,
@@ -134,12 +147,23 @@ async fn start_pipeline(
134
  spec,
135
  cancel,
136
  Some(progress_sink),
 
137
  )
138
  .await;
139
  let (status, error) = match &result {
140
- Ok(()) => (JobStatus::Completed, None),
 
 
 
 
 
 
 
141
  Err(e) if e.to_string().contains("cancelled") => (JobStatus::Cancelled, None),
142
- Err(e) => (JobStatus::Failed, Some(format!("{e:#}"))),
 
 
 
143
  };
144
  app_c.jobs.insert(
145
  op_id_c.clone(),
 
9
 
10
  use axum::Json;
11
  use axum::extract::State;
12
+ use koharu_app::pipeline::{
13
+ self, PipelineRunOptions, PipelineSpec, ProgressTick, Scope, WarningTick,
14
+ };
15
  use koharu_core::{
16
+ AppEvent, JobFinishedEvent, JobStatus, JobSummary, JobWarningEvent, PageId, PipelineProgress,
17
+ PipelineStatus, Region,
18
  };
19
  use serde::{Deserialize, Serialize};
20
  use utoipa_axum::{router::OpenApiRouter, routes};
 
125
  overall_percent: tick.overall_percent,
126
  }));
127
  });
128
+ let warning_bus = app.bus.clone();
129
+ let warning_op_id = operation_id.clone();
130
+ let warning_sink: pipeline::WarningSink = Arc::new(move |tick: WarningTick| {
131
+ warning_bus.publish(AppEvent::JobWarning(JobWarningEvent {
132
+ job_id: warning_op_id.clone(),
133
+ page_index: tick.page_index,
134
+ total_pages: tick.total_pages,
135
+ step_id: tick.step_id,
136
+ message: tick.message,
137
+ }));
138
+ });
139
  tokio::spawn(async move {
140
  let result = pipeline::run(
141
  session_c,
 
147
  spec,
148
  cancel,
149
  Some(progress_sink),
150
+ Some(warning_sink),
151
  )
152
  .await;
153
  let (status, error) = match &result {
154
+ Ok(outcome) if outcome.warning_count == 0 => (JobStatus::Completed, None),
155
+ Ok(outcome) => (
156
+ JobStatus::CompletedWithErrors,
157
+ Some(format!(
158
+ "{} step(s) failed; see warnings for details",
159
+ outcome.warning_count
160
+ )),
161
+ ),
162
  Err(e) if e.to_string().contains("cancelled") => (JobStatus::Cancelled, None),
163
+ Err(e) => {
164
+ tracing::warn!(operation_id = %op_id_c, "pipeline run failed: {e:#}");
165
+ (JobStatus::Failed, Some(format!("{e:#}")))
166
+ }
167
  };
168
  app_c.jobs.insert(
169
  op_id_c.clone(),
ui/components/ActivityBubble.tsx CHANGED
@@ -1,12 +1,17 @@
1
  'use client'
2
 
3
- import { CircleXIcon } from 'lucide-react'
4
  import { type ReactNode } from 'react'
5
  import { useTranslation } from 'react-i18next'
6
 
7
  import { Button } from '@/components/ui/button'
8
  import { cancelOperation } from '@/lib/api/default/default'
9
- import type { DownloadProgress, JobSummary, PipelineProgress } from '@/lib/api/schemas'
 
 
 
 
 
10
  import { useDownloadsStore } from '@/lib/stores/downloadsStore'
11
  import { useEditorUiStore } from '@/lib/stores/editorUiStore'
12
  import { type JobEntry, useJobsStore } from '@/lib/stores/jobsStore'
@@ -112,6 +117,39 @@ function ErrorCard({
112
  )
113
  }
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t: TranslateFunc }) {
116
  const progress: PipelineProgress | undefined = job.progress
117
  const percent = clampProgress(progress?.overallPercent)
@@ -133,6 +171,7 @@ function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t:
133
  : undefined
134
  const subtitle =
135
  [pageText, stepLabel].filter(Boolean).join(' \u00b7 ') || t('operations.inProgress')
 
136
 
137
  return (
138
  <BubbleCard>
@@ -148,6 +187,7 @@ function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t:
148
  </div>
149
  </div>
150
  <ProgressBar percent={percent} />
 
151
  <div className='mt-3 flex justify-end'>
152
  <Button
153
  data-testid='operation-cancel'
 
1
  'use client'
2
 
3
+ import { AlertTriangleIcon, CircleXIcon } from 'lucide-react'
4
  import { type ReactNode } from 'react'
5
  import { useTranslation } from 'react-i18next'
6
 
7
  import { Button } from '@/components/ui/button'
8
  import { cancelOperation } from '@/lib/api/default/default'
9
+ import type {
10
+ DownloadProgress,
11
+ JobSummary,
12
+ JobWarningEvent,
13
+ PipelineProgress,
14
+ } from '@/lib/api/schemas'
15
  import { useDownloadsStore } from '@/lib/stores/downloadsStore'
16
  import { useEditorUiStore } from '@/lib/stores/editorUiStore'
17
  import { type JobEntry, useJobsStore } from '@/lib/stores/jobsStore'
 
117
  )
118
  }
119
 
120
+ function JobWarnings({ warnings, t }: { warnings: JobWarningEvent[]; t: TranslateFunc }) {
121
+ const latest = warnings[warnings.length - 1]
122
+ const count = warnings.length
123
+ const pageLabel =
124
+ typeof latest.totalPages === 'number' && latest.totalPages > 1
125
+ ? t('operations.imageProgress', {
126
+ current: latest.pageIndex + 1,
127
+ total: latest.totalPages,
128
+ })
129
+ : undefined
130
+ const header =
131
+ count === 1 ? t('operations.warningsOne') : t('operations.warningsOther', { count })
132
+ return (
133
+ <div
134
+ data-testid='operation-warnings'
135
+ className='mt-3 rounded-lg border border-amber-200/70 bg-amber-50/80 p-2.5 dark:border-amber-900/70 dark:bg-amber-950/40'
136
+ >
137
+ <div className='flex items-start gap-2 text-amber-900 dark:text-amber-200'>
138
+ <AlertTriangleIcon className='mt-0.5 size-3.5 shrink-0' />
139
+ <div className='min-w-0 flex-1'>
140
+ <div className='text-[11px] font-semibold'>{header}</div>
141
+ <div className='mt-0.5 truncate text-[11px] text-amber-800/90 dark:text-amber-200/80'>
142
+ {[latest.stepId, pageLabel].filter(Boolean).join(' \u00b7 ')}
143
+ </div>
144
+ <div className='mt-1 line-clamp-2 text-[11px] break-words text-amber-900/80 dark:text-amber-100/80'>
145
+ {latest.message}
146
+ </div>
147
+ </div>
148
+ </div>
149
+ </div>
150
+ )
151
+ }
152
+
153
  function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t: TranslateFunc }) {
154
  const progress: PipelineProgress | undefined = job.progress
155
  const percent = clampProgress(progress?.overallPercent)
 
171
  : undefined
172
  const subtitle =
173
  [pageText, stepLabel].filter(Boolean).join(' \u00b7 ') || t('operations.inProgress')
174
+ const warnings = job.warnings ?? []
175
 
176
  return (
177
  <BubbleCard>
 
187
  </div>
188
  </div>
189
  <ProgressBar percent={percent} />
190
+ {warnings.length > 0 && <JobWarnings warnings={warnings} t={t} />}
191
  <div className='mt-3 flex justify-end'>
192
  <Button
193
  data-testid='operation-cancel'
ui/lib/api/default/default.msw.ts CHANGED
@@ -323,6 +323,16 @@ export const getEventsResponseMock = (): AppEvent =>
323
  },
324
  ...{ event: faker.helpers.arrayElement(['jobProgress'] as const) },
325
  },
 
 
 
 
 
 
 
 
 
 
326
  {
327
  ...{
328
  error: faker.helpers.arrayElement([
 
323
  },
324
  ...{ event: faker.helpers.arrayElement(['jobProgress'] as const) },
325
  },
326
+ {
327
+ ...{
328
+ jobId: faker.string.alpha({ length: { min: 10, max: 20 } }),
329
+ message: faker.string.alpha({ length: { min: 10, max: 20 } }),
330
+ pageIndex: faker.number.int({ min: 0 }),
331
+ stepId: faker.string.alpha({ length: { min: 10, max: 20 } }),
332
+ totalPages: faker.number.int({ min: 0 }),
333
+ },
334
+ ...{ event: faker.helpers.arrayElement(['jobWarning'] as const) },
335
+ },
336
  {
337
  ...{
338
  error: faker.helpers.arrayElement([
ui/lib/api/schemas/appEvent.ts CHANGED
@@ -5,6 +5,7 @@
5
  */
6
  import type { DownloadProgress } from './downloadProgress'
7
  import type { JobFinishedEvent } from './jobFinishedEvent'
 
8
  import type { LlmTarget } from './llmTarget'
9
  import type { PipelineProgress } from './pipelineProgress'
10
  import type { SnapshotEvent } from './snapshotEvent'
@@ -18,6 +19,9 @@ export type AppEvent =
18
  | (PipelineProgress & {
19
  event: 'jobProgress'
20
  })
 
 
 
21
  | (JobFinishedEvent & {
22
  event: 'jobFinished'
23
  })
 
5
  */
6
  import type { DownloadProgress } from './downloadProgress'
7
  import type { JobFinishedEvent } from './jobFinishedEvent'
8
+ import type { JobWarningEvent } from './jobWarningEvent'
9
  import type { LlmTarget } from './llmTarget'
10
  import type { PipelineProgress } from './pipelineProgress'
11
  import type { SnapshotEvent } from './snapshotEvent'
 
19
  | (PipelineProgress & {
20
  event: 'jobProgress'
21
  })
22
+ | (JobWarningEvent & {
23
+ event: 'jobWarning'
24
+ })
25
  | (JobFinishedEvent & {
26
  event: 'jobFinished'
27
  })
ui/lib/api/schemas/index.ts CHANGED
@@ -35,6 +35,7 @@ export * from './imageRole'
35
  export * from './jobFinishedEvent'
36
  export * from './jobStatus'
37
  export * from './jobSummary'
 
38
  export * from './listDownloadsResponse'
39
  export * from './listOperationsResponse'
40
  export * from './listProjectsResponse'
 
35
  export * from './jobFinishedEvent'
36
  export * from './jobStatus'
37
  export * from './jobSummary'
38
+ export * from './jobWarningEvent'
39
  export * from './listDownloadsResponse'
40
  export * from './listOperationsResponse'
41
  export * from './listProjectsResponse'
ui/lib/api/schemas/jobWarningEvent.ts ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Generated by orval v8.8.0 🍺
3
+ * Do not edit manually.
4
+ * OpenAPI spec version: 0.0.1
5
+ */
6
+
7
+ /**
8
+ * A non-fatal step failure during a pipeline run. The pipeline recovers by
9
+ skipping the rest of the current page's steps and moving on to the next
10
+ page; the UI accumulates these into a list during the job.
11
+ */
12
+ export interface JobWarningEvent {
13
+ jobId: string
14
+ message: string
15
+ /**
16
+ * 0-based page index where the failure happened.
17
+ * @minimum 0
18
+ */
19
+ pageIndex: number
20
+ /** Engine id (e.g. `"lama-manga"`) of the step that failed. */
21
+ stepId: string
22
+ /** @minimum 0 */
23
+ totalPages: number
24
+ }
ui/lib/events.ts CHANGED
@@ -151,6 +151,10 @@ function dispatch(event: AppEvent): void {
151
  }
152
  return
153
 
 
 
 
 
154
  case 'jobFinished':
155
  useJobsStore.getState().finished(event.id, event.status, event.error)
156
  lastPageByJob.delete(event.id)
 
151
  }
152
  return
153
 
154
+ case 'jobWarning':
155
+ useJobsStore.getState().warning(event)
156
+ return
157
+
158
  case 'jobFinished':
159
  useJobsStore.getState().finished(event.id, event.status, event.error)
160
  lastPageByJob.delete(event.id)
ui/lib/stores/jobsStore.ts CHANGED
@@ -3,14 +3,17 @@
3
  import { create } from 'zustand'
4
  import { immer } from 'zustand/middleware/immer'
5
 
6
- import type { JobSummary, PipelineProgress } from '@/lib/api/schemas'
7
 
8
  /**
9
  * Live job registry, fed by SSE. Keyed by id. `progress` is attached when
10
- * the backend streams `JobProgress` for a running pipeline job.
 
 
11
  */
12
  export type JobEntry = JobSummary & {
13
  progress?: PipelineProgress
 
14
  }
15
 
16
  type JobsState = {
@@ -18,6 +21,7 @@ type JobsState = {
18
  setSnapshot: (jobs: JobSummary[]) => void
19
  started: (id: string, kind: string) => void
20
  progress: (p: PipelineProgress) => void
 
21
  finished: (id: string, status: JobSummary['status'], error: string | null | undefined) => void
22
  clear: () => void
23
  byStatus: (status: JobSummary['status']) => JobEntry[]
@@ -44,6 +48,16 @@ export const useJobsStore = create<JobsState>()(
44
  }
45
  s.jobs[p.jobId] = { ...existing, progress: p }
46
  }),
 
 
 
 
 
 
 
 
 
 
47
  finished: (id, status, error) =>
48
  set((s) => {
49
  const existing = s.jobs[id] ?? { id, kind: 'pipeline', status }
 
3
  import { create } from 'zustand'
4
  import { immer } from 'zustand/middleware/immer'
5
 
6
+ import type { JobSummary, JobWarningEvent, PipelineProgress } from '@/lib/api/schemas'
7
 
8
  /**
9
  * Live job registry, fed by SSE. Keyed by id. `progress` is attached when
10
+ * the backend streams `JobProgress` for a running pipeline job. `warnings`
11
+ * accumulates non-fatal step failures as they arrive; the pipeline keeps
12
+ * running past them.
13
  */
14
  export type JobEntry = JobSummary & {
15
  progress?: PipelineProgress
16
+ warnings?: JobWarningEvent[]
17
  }
18
 
19
  type JobsState = {
 
21
  setSnapshot: (jobs: JobSummary[]) => void
22
  started: (id: string, kind: string) => void
23
  progress: (p: PipelineProgress) => void
24
+ warning: (w: JobWarningEvent) => void
25
  finished: (id: string, status: JobSummary['status'], error: string | null | undefined) => void
26
  clear: () => void
27
  byStatus: (status: JobSummary['status']) => JobEntry[]
 
48
  }
49
  s.jobs[p.jobId] = { ...existing, progress: p }
50
  }),
51
+ warning: (w) =>
52
+ set((s) => {
53
+ const existing = s.jobs[w.jobId] ?? {
54
+ id: w.jobId,
55
+ kind: 'pipeline',
56
+ status: 'running' as JobSummary['status'],
57
+ }
58
+ const warnings = existing.warnings ?? []
59
+ s.jobs[w.jobId] = { ...existing, warnings: [...warnings, w] }
60
+ }),
61
  finished: (id, status, error) =>
62
  set((s) => {
63
  const existing = s.jobs[id] ?? { id, kind: 'pipeline', status }
ui/openapi.json CHANGED
@@ -894,6 +894,25 @@
894
  }
895
  ]
896
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
  {
898
  "allOf": [
899
  {
@@ -1574,6 +1593,32 @@
1574
  }
1575
  }
1576
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1577
  "ListDownloadsResponse": {
1578
  "type": "object",
1579
  "required": ["downloads"],
 
894
  }
895
  ]
896
  },
897
+ {
898
+ "allOf": [
899
+ {
900
+ "$ref": "#/components/schemas/JobWarningEvent",
901
+ "description": "A single step on one page failed but the pipeline kept running.\nEmitted per failed step so clients can show a non-fatal warning while\nthe job continues with the next page."
902
+ },
903
+ {
904
+ "type": "object",
905
+ "required": ["event"],
906
+ "properties": {
907
+ "event": {
908
+ "type": "string",
909
+ "enum": ["jobWarning"]
910
+ }
911
+ }
912
+ }
913
+ ],
914
+ "description": "A single step on one page failed but the pipeline kept running.\nEmitted per failed step so clients can show a non-fatal warning while\nthe job continues with the next page."
915
+ },
916
  {
917
  "allOf": [
918
  {
 
1593
  }
1594
  }
1595
  },
1596
+ "JobWarningEvent": {
1597
+ "type": "object",
1598
+ "description": "A non-fatal step failure during a pipeline run. The pipeline recovers by\nskipping the rest of the current page's steps and moving on to the next\npage; the UI accumulates these into a list during the job.",
1599
+ "required": ["jobId", "pageIndex", "totalPages", "stepId", "message"],
1600
+ "properties": {
1601
+ "jobId": {
1602
+ "type": "string"
1603
+ },
1604
+ "message": {
1605
+ "type": "string"
1606
+ },
1607
+ "pageIndex": {
1608
+ "type": "integer",
1609
+ "description": "0-based page index where the failure happened.",
1610
+ "minimum": 0
1611
+ },
1612
+ "stepId": {
1613
+ "type": "string",
1614
+ "description": "Engine id (e.g. `\"lama-manga\"`) of the step that failed."
1615
+ },
1616
+ "totalPages": {
1617
+ "type": "integer",
1618
+ "minimum": 0
1619
+ }
1620
+ }
1621
+ },
1622
  "ListDownloadsResponse": {
1623
  "type": "object",
1624
  "required": ["downloads"],
ui/public/locales/en-US/translation.json CHANGED
@@ -75,6 +75,8 @@
75
  "processAll": "Processing all images",
76
  "imageProgress": "Image {{current}} / {{total}}",
77
  "stepProgress": "Step {{current}} / {{total}}: {{step}}",
 
 
78
  "cancel": "Stop",
79
  "cancelling": "Stopping..."
80
  },
 
75
  "processAll": "Processing all images",
76
  "imageProgress": "Image {{current}} / {{total}}",
77
  "stepProgress": "Step {{current}} / {{total}}: {{step}}",
78
+ "warningsOne": "1 step failed, continuing",
79
+ "warningsOther": "{{count}} steps failed, continuing",
80
  "cancel": "Stop",
81
  "cancelling": "Stopping..."
82
  },
ui/public/locales/es-ES/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "Procesando todas las imágenes",
62
  "imageProgress": "Imagen {{current}} / {{total}}",
63
  "stepProgress": "Paso {{current}} / {{total}}: {{step}}",
 
 
64
  "cancel": "Detener",
65
  "cancelling": "Deteniendo..."
66
  },
 
61
  "processAll": "Procesando todas las imágenes",
62
  "imageProgress": "Imagen {{current}} / {{total}}",
63
  "stepProgress": "Paso {{current}} / {{total}}: {{step}}",
64
+ "warningsOne": "1 paso falló, continuando",
65
+ "warningsOther": "{{count}} pasos fallaron, continuando",
66
  "cancel": "Detener",
67
  "cancelling": "Deteniendo..."
68
  },
ui/public/locales/ja-JP/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "すべての画像を一括処理中",
62
  "imageProgress": "画像 {{current}} / {{total}}",
63
  "stepProgress": "ステップ {{current}} / {{total}}:{{step}}",
 
 
64
  "cancel": "停止",
65
  "cancelling": "停止中..."
66
  },
 
61
  "processAll": "すべての画像を一括処理中",
62
  "imageProgress": "画像 {{current}} / {{total}}",
63
  "stepProgress": "ステップ {{current}} / {{total}}:{{step}}",
64
+ "warningsOne": "1 つのステップが失敗しましたが、続行します",
65
+ "warningsOther": "{{count}} 個のステップが失敗しましたが、続行します",
66
  "cancel": "停止",
67
  "cancelling": "停止中..."
68
  },
ui/public/locales/ko-KR/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "모든 이미지 처리 중",
62
  "imageProgress": "이미지 {{current}} / {{total}}",
63
  "stepProgress": "단계 {{current}} / {{total}}: {{step}}",
 
 
64
  "cancel": "취소",
65
  "cancelling": "취소 중..."
66
  },
 
61
  "processAll": "모든 이미지 처리 중",
62
  "imageProgress": "이미지 {{current}} / {{total}}",
63
  "stepProgress": "단계 {{current}} / {{total}}: {{step}}",
64
+ "warningsOne": "1개 단계 실패, 계속 진행",
65
+ "warningsOther": "{{count}}개 단계 실패, 계속 진행",
66
  "cancel": "취소",
67
  "cancelling": "취소 중..."
68
  },
ui/public/locales/pt-BR/translation.json CHANGED
@@ -62,6 +62,8 @@
62
  "processAll": "Processando todas as imagens",
63
  "imageProgress": "Imagem {{current}} / {{total}}",
64
  "stepProgress": "Etapa {{current}} / {{total}}: {{step}}",
 
 
65
  "cancel": "Parar",
66
  "cancelling": "Parando..."
67
  },
 
62
  "processAll": "Processando todas as imagens",
63
  "imageProgress": "Imagem {{current}} / {{total}}",
64
  "stepProgress": "Etapa {{current}} / {{total}}: {{step}}",
65
+ "warningsOne": "1 etapa falhou, continuando",
66
+ "warningsOther": "{{count}} etapas falharam, continuando",
67
  "cancel": "Parar",
68
  "cancelling": "Parando..."
69
  },
ui/public/locales/ru-RU/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "Обработка всех изображений",
62
  "imageProgress": "Изображение {{current}} / {{total}}",
63
  "stepProgress": "Шаг {{current}} / {{total}}: {{step}}",
 
 
64
  "cancel": "Остановить",
65
  "cancelling": "Остановка..."
66
  },
 
61
  "processAll": "Обработка всех изображений",
62
  "imageProgress": "Изображение {{current}} / {{total}}",
63
  "stepProgress": "Шаг {{current}} / {{total}}: {{step}}",
64
+ "warningsOne": "1 шаг не выполнен, продолжаем",
65
+ "warningsOther": "{{count}} шагов не выполнены, продолжаем",
66
  "cancel": "Остановить",
67
  "cancelling": "Остановка..."
68
  },
ui/public/locales/tr-TR/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "Tüm görseller işleniyor",
62
  "imageProgress": "Görsel {{current}} / {{total}}",
63
  "stepProgress": "Adım {{current}} / {{total}}: {{step}}",
 
 
64
  "cancel": "Durdur",
65
  "cancelling": "Durduruluyor..."
66
  },
 
61
  "processAll": "Tüm görseller işleniyor",
62
  "imageProgress": "Görsel {{current}} / {{total}}",
63
  "stepProgress": "Adım {{current}} / {{total}}: {{step}}",
64
+ "warningsOne": "1 adım başarısız oldu, devam ediliyor",
65
+ "warningsOther": "{{count}} adım başarısız oldu, devam ediliyor",
66
  "cancel": "Durdur",
67
  "cancelling": "Durduruluyor..."
68
  },
ui/public/locales/zh-CN/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "正在批量处理所有图片",
62
  "imageProgress": "图片 {{current}} / {{total}}",
63
  "stepProgress": "步骤 {{current}} / {{total}}:{{step}}",
 
 
64
  "cancel": "停止",
65
  "cancelling": "正在停止..."
66
  },
 
61
  "processAll": "正在批量处理所有图片",
62
  "imageProgress": "图片 {{current}} / {{total}}",
63
  "stepProgress": "步骤 {{current}} / {{total}}:{{step}}",
64
+ "warningsOne": "1 个步骤失败,继续处理",
65
+ "warningsOther": "{{count}} 个步骤失败,继续处理",
66
  "cancel": "停止",
67
  "cancelling": "正在停止..."
68
  },
ui/public/locales/zh-TW/translation.json CHANGED
@@ -61,6 +61,8 @@
61
  "processAll": "正在批次處理所有圖片",
62
  "imageProgress": "圖片 {{current}} / {{total}}",
63
  "stepProgress": "步驟 {{current}} / {{total}}:{{step}}",
 
 
64
  "cancel": "停止",
65
  "cancelling": "停止中..."
66
  },
 
61
  "processAll": "正在批次處理所有圖片",
62
  "imageProgress": "圖片 {{current}} / {{total}}",
63
  "stepProgress": "步驟 {{current}} / {{total}}:{{step}}",
64
+ "warningsOne": "1 個步驟失敗,繼續處理",
65
+ "warningsOther": "{{count}} 個步驟失敗,繼續處理",
66
  "cancel": "停止",
67
  "cancelling": "停止中..."
68
  },