trans2

Sleeping

App Files Files Community

Mayo commited on Apr 20

Commit

f05100a

unverified ·

1 Parent(s): f3a6ee8

fix: inpainting OOM & pipeline error handling

Browse files

Files changed (34) hide show

koharu-app/bin/pipeline.rs +17 -2
koharu-app/src/pipeline/engines/aot.rs +33 -24
koharu-app/src/pipeline/engines/lama.rs +37 -18
koharu-app/src/pipeline/engines/support.rs +1 -18
koharu-app/src/pipeline/mod.rs +119 -9
koharu-core/src/events.rs +32 -4
koharu-core/src/lib.rs +1 -1
koharu-ml/bin/aot-inpainting.rs +5 -1
koharu-ml/src/aot_inpainting/mod.rs +56 -161
koharu-ml/src/inpainting/balloon.rs +369 -0
koharu-ml/src/{inpainting.rs → inpainting/mod.rs} +13 -0
koharu-ml/src/inpainting/strategy.rs +539 -0
koharu-ml/src/lama/mod.rs +39 -557
koharu-ml/src/lib.rs +1 -1
koharu-ml/tests/inpaint.rs +0 -43
koharu-rpc/src/mcp/mod.rs +1 -1
koharu-rpc/src/routes/pipelines.rs +29 -5
ui/components/ActivityBubble.tsx +42 -2
ui/lib/api/default/default.msw.ts +10 -0
ui/lib/api/schemas/appEvent.ts +4 -0
ui/lib/api/schemas/index.ts +1 -0
ui/lib/api/schemas/jobWarningEvent.ts +24 -0
ui/lib/events.ts +4 -0
ui/lib/stores/jobsStore.ts +16 -2
ui/openapi.json +45 -0
ui/public/locales/en-US/translation.json +2 -0
ui/public/locales/es-ES/translation.json +2 -0
ui/public/locales/ja-JP/translation.json +2 -0
ui/public/locales/ko-KR/translation.json +2 -0
ui/public/locales/pt-BR/translation.json +2 -0
ui/public/locales/ru-RU/translation.json +2 -0
ui/public/locales/tr-TR/translation.json +2 -0
ui/public/locales/zh-CN/translation.json +2 -0
ui/public/locales/zh-TW/translation.json +2 -0

koharu-app/bin/pipeline.rs CHANGED Viewed

@@ -235,6 +235,16 @@ async fn run() -> Result<()> {
     let ensure_translation_fallback = !cli.with_translate;
     let cancel = Arc::new(AtomicBool::new(false));
     let result = koharu_app::pipeline::run(
         session.clone(),
         app.registry.clone(),
@@ -245,11 +255,16 @@ async fn run() -> Result<()> {
         spec,
         cancel,
         Some(progress_sink),
     )
     .await;
     match &result {
-        Ok(()) => eprintln!("=> pipeline succeeded"),
         Err(e) => eprintln!("=> pipeline failed: {e:#}"),
     }
@@ -261,7 +276,7 @@ async fn run() -> Result<()> {
         .with_context(|| format!("dump artifacts to {}", cli.output_dir.display()))?;
     app.close_project().await.ok();
-    result
 }
 /// Load `AppConfig` from TOML at `path` or default.

     let ensure_translation_fallback = !cli.with_translate;
     let cancel = Arc::new(AtomicBool::new(false));
+    let warning_sink: koharu_app::pipeline::WarningSink =
+        Arc::new(|tick: koharu_app::pipeline::WarningTick| {
+            eprintln!(
+                "warn: step '{}' failed on page {}/{}: {}",
+                tick.step_id,
+                tick.page_index + 1,
+                tick.total_pages,
+                tick.message
+            );
+        });
     let result = koharu_app::pipeline::run(
         session.clone(),
         app.registry.clone(),
         spec,
         cancel,
         Some(progress_sink),
+        Some(warning_sink),
     )
     .await;
     match &result {
+        Ok(outcome) if outcome.warning_count == 0 => eprintln!("=> pipeline succeeded"),
+        Ok(outcome) => eprintln!(
+            "=> pipeline finished with {} failed step(s)",
+            outcome.warning_count
+        ),
         Err(e) => eprintln!("=> pipeline failed: {e:#}"),
     }
         .with_context(|| format!("dump artifacts to {}", cli.output_dir.display()))?;
     app.close_project().await.ok();
+    result.map(|_| ())
 }
 /// Load `AppConfig` from TOML at `path` or default.

koharu-app/src/pipeline/engines/aot.rs CHANGED Viewed

@@ -1,14 +1,15 @@
-//! AOT inpainting. Simpler than Lama: direct source + segment → result.
 //!
-//! With `ctx.options.region`, composites onto the existing `Image { Inpainted }`
-//! (falling back to Source) so repair-brush strokes only affect the touched
-//! area. AOT inference has no blockwise overload, so we crop the base image
-//! and mask to the region, inpaint the crop, and paste back.
 use anyhow::{Result, anyhow};
 use async_trait::async_trait;
-use image::{DynamicImage, GenericImage, GenericImageView};
-use koharu_core::{ImageRole, MaskRole, Op};
 use koharu_ml::aot_inpainting::AotInpainting;
 use crate::pipeline::artifacts::Artifact;
@@ -26,31 +27,22 @@ impl Engine for Model {
             .ok_or_else(|| anyhow!("no Segment mask on page"))?;
         let mask = ctx.blobs.load_image(&mask_ref)?;
-        let result = match ctx.options.region {
-            None => {
-                let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
-                self.0.inference(&image, &mask)?
-            }
             Some(r) => {
                 let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
                     Some((_, blob)) => ctx.blobs.load_image(&blob)?,
                     None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
                 };
-                let (w, h) = base.dimensions();
-                let x0 = r.x.min(w.saturating_sub(1));
-                let y0 = r.y.min(h.saturating_sub(1));
-                let rw = r.width.min(w - x0).max(1);
-                let rh = r.height.min(h - y0).max(1);
-                let image_crop = DynamicImage::ImageRgba8(base.view(x0, y0, rw, rh).to_image());
-                let mask_crop =
-                    DynamicImage::ImageLuma8(mask.to_luma8().view(x0, y0, rw, rh).to_image());
-                let patched = self.0.inference(&image_crop, &mask_crop)?;
-                let mut out = base;
-                out.copy_from(&patched, x0, y0)?;
-                out
             }
         };
         let (w, h) = image_dimensions(&result);
         let blob = ctx.blobs.put_webp(&result)?;
         Ok(vec![upsert_image_blob(
@@ -64,6 +56,23 @@ impl Engine for Model {
     }
 }
 inventory::submit! {
     EngineInfo {
         id: "aot-inpainting",

+//! AOT inpainting. Direct source + segment → result. Subdivision is handled
+//! by [`koharu_ml::inpainting::run_inpaint`] (shared with Lama) — this engine
+//! only wires up the scene I/O.
 //!
+//! For repair-brush (`ctx.options.region`), composite onto the existing
+//! `Image { Inpainted }` if present (fallback Source) and zero out mask
+//! pixels outside the region so only that area is reprocessed.
 use anyhow::{Result, anyhow};
 use async_trait::async_trait;
+use image::{DynamicImage, GrayImage, Luma};
+use koharu_core::{ImageRole, MaskRole, Op, Region};
 use koharu_ml::aot_inpainting::AotInpainting;
 use crate::pipeline::artifacts::Artifact;
             .ok_or_else(|| anyhow!("no Segment mask on page"))?;
         let mask = ctx.blobs.load_image(&mask_ref)?;
+        let (image, mask) = match ctx.options.region {
             Some(r) => {
                 let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
                     Some((_, blob)) => ctx.blobs.load_image(&blob)?,
                     None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
                 };
+                let clipped = clip_mask_to_region(&mask, &r);
+                (base, clipped)
+            }
+            None => {
+                let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
+                (image, mask)
             }
         };
+        let result = self.0.inference(&image, &mask)?;
         let (w, h) = image_dimensions(&result);
         let blob = ctx.blobs.put_webp(&result)?;
         Ok(vec![upsert_image_blob(
     }
 }
+fn clip_mask_to_region(mask: &DynamicImage, region: &Region) -> DynamicImage {
+    let src = mask.to_luma8();
+    let (w, h) = src.dimensions();
+    let x0 = region.x.min(w);
+    let y0 = region.y.min(h);
+    let x1 = region.x.saturating_add(region.width).min(w);
+    let y1 = region.y.saturating_add(region.height).min(h);
+    let mut clipped = GrayImage::new(w, h);
+    for y in y0..y1 {
+        for x in x0..x1 {
+            clipped.put_pixel(x, y, Luma([src.get_pixel(x, y).0[0]]));
+        }
+    }
+    DynamicImage::ImageLuma8(clipped)
+}
 inventory::submit! {
     EngineInfo {
         id: "aot-inpainting",

koharu-app/src/pipeline/engines/lama.rs CHANGED Viewed

@@ -1,23 +1,26 @@
 //! Lama Manga inpainter. Reads source + segmentation mask from the page,
 //! runs the model, writes the output as `Image { role: Inpainted }`.
 //!
-//! When `ctx.options.region` is set (e.g. repair-brush re-inpaint), the
-//! engine composites onto the existing `Image { Inpainted }` if present
-//! (falling back to `Source`) and processes just that one block. Without
-//! a region, behaves as a full-page pass using the scene's text nodes
-//! as block hints.
 use anyhow::{Result, anyhow};
 use async_trait::async_trait;
-use koharu_core::{ImageRole, MaskRole, Op};
 use koharu_ml::lama::Lama;
-use koharu_ml::types::TextRegion;
 use crate::pipeline::artifacts::Artifact;
 use crate::pipeline::engine::{Engine, EngineCtx, EngineInfo};
 use crate::pipeline::engines::support::{
-    find_image_node, find_mask_node, image_dimensions, load_source_image, region_to_text_region,
-    text_node_to_region, text_nodes, upsert_image_blob,
 };
 pub struct Model(Lama);
@@ -29,26 +32,22 @@ impl Engine for Model {
             .ok_or_else(|| anyhow!("no Segment mask on page"))?;
         let mask = ctx.blobs.load_image(&mask_ref)?;
-        let (image, text_regions): (_, Vec<TextRegion>) = match ctx.options.region {
             Some(r) => {
                 let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
                     Some((_, blob)) => ctx.blobs.load_image(&blob)?,
                     None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
                 };
-                (base, vec![region_to_text_region(&r)])
             }
             None => {
                 let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
-                let regions = text_nodes(ctx.scene, ctx.page)
-                    .iter()
-                    .map(|(_, transform, t)| text_node_to_region(transform, t))
-                    .collect();
-                (image, regions)
             }
         };
-        let regions_ref = (!text_regions.is_empty()).then_some(text_regions.as_slice());
-        let result = self.0.inference_with_blocks(&image, &mask, regions_ref)?;
         let (w, h) = image_dimensions(&result);
         let blob = ctx.blobs.put_webp(&result)?;
         Ok(vec![upsert_image_blob(
@@ -62,6 +61,26 @@ impl Engine for Model {
     }
 }
 inventory::submit! {
     EngineInfo {
         id: "lama-manga",

 //! Lama Manga inpainter. Reads source + segmentation mask from the page,
 //! runs the model, writes the output as `Image { role: Inpainted }`.
 //!
+//! Box subdivision (the "which regions to run the model on" question) is
+//! driven by the **mask itself** via `boxes_from_mask` — mirrors IOPaint's
+//! `InpaintModel.__call__`. Text detections are no longer consulted; the
+//! segmentation mask already encodes which pixels to remove.
+//!
+//! When `ctx.options.region` is set (repair-brush re-inpaint), we composite
+//! onto the existing `Image { Inpainted }` if present (falling back to
+//! `Source`) and zero out mask pixels outside the region before dispatch —
+//! so only that region is reprocessed.
 use anyhow::{Result, anyhow};
 use async_trait::async_trait;
+use image::{DynamicImage, GrayImage, Luma};
+use koharu_core::{ImageRole, MaskRole, Op, Region};
 use koharu_ml::lama::Lama;
 use crate::pipeline::artifacts::Artifact;
 use crate::pipeline::engine::{Engine, EngineCtx, EngineInfo};
 use crate::pipeline::engines::support::{
+    find_image_node, find_mask_node, image_dimensions, load_source_image, upsert_image_blob,
 };
 pub struct Model(Lama);
             .ok_or_else(|| anyhow!("no Segment mask on page"))?;
         let mask = ctx.blobs.load_image(&mask_ref)?;
+        let (image, mask) = match ctx.options.region {
             Some(r) => {
                 let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
                     Some((_, blob)) => ctx.blobs.load_image(&blob)?,
                     None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
                 };
+                let clipped = clip_mask_to_region(&mask, &r);
+                (base, clipped)
             }
             None => {
                 let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
+                (image, mask)
             }
         };
+        let result = self.0.inference(&image, &mask)?;
         let (w, h) = image_dimensions(&result);
         let blob = ctx.blobs.put_webp(&result)?;
         Ok(vec![upsert_image_blob(
     }
 }
+/// Zero out every pixel of `mask` that falls outside `region`. The Crop
+/// strategy's `boxes_from_mask` then only finds contours inside the region,
+/// so the inpainter only touches that area.
+fn clip_mask_to_region(mask: &DynamicImage, region: &Region) -> DynamicImage {
+    let src = mask.to_luma8();
+    let (w, h) = src.dimensions();
+    let x0 = region.x.min(w);
+    let y0 = region.y.min(h);
+    let x1 = region.x.saturating_add(region.width).min(w);
+    let y1 = region.y.saturating_add(region.height).min(h);
+    let mut clipped = GrayImage::new(w, h);
+    for y in y0..y1 {
+        for x in x0..x1 {
+            clipped.put_pixel(x, y, Luma([src.get_pixel(x, y).0[0]]));
+        }
+    }
+    DynamicImage::ImageLuma8(clipped)
+}
 inventory::submit! {
     EngineInfo {
         id: "lama-manga",

koharu-app/src/pipeline/engines/support.rs CHANGED Viewed

@@ -7,7 +7,7 @@ use anyhow::{Context, Result};
 use image::{DynamicImage, GenericImageView};
 use koharu_core::{
     BlobRef, ImageData, ImageRole, MaskData, MaskRole, Node, NodeDataPatch, NodeId, NodeKind, Op,
-    PageId, Region, Scene, TextData, Transform,
 };
 use crate::blobs::BlobStore;
@@ -89,23 +89,6 @@ pub fn text_node_to_region(transform: &Transform, text: &TextData) -> koharu_ml:
     }
 }
-/// Wrap a raw pixel `Region` as a `TextRegion` with no text hints. Used when
-/// an inpainter engine receives a region override (repair-brush path).
-pub fn region_to_text_region(r: &Region) -> koharu_ml::types::TextRegion {
-    koharu_ml::types::TextRegion {
-        x: r.x as f32,
-        y: r.y as f32,
-        width: r.width as f32,
-        height: r.height as f32,
-        confidence: 1.0,
-        line_polygons: None,
-        source_direction: None,
-        rotation_deg: None,
-        detected_font_size_px: None,
-        detector: None,
-    }
-}
 /// Inverse of `ml_text_direction_to_core`.
 pub fn core_text_direction_to_ml(d: koharu_core::TextDirection) -> koharu_ml::types::TextDirection {
     match d {

 use image::{DynamicImage, GenericImageView};
 use koharu_core::{
     BlobRef, ImageData, ImageRole, MaskData, MaskRole, Node, NodeDataPatch, NodeId, NodeKind, Op,
+    PageId, Scene, TextData, Transform,
 };
 use crate::blobs::BlobStore;
     }
 }
 /// Inverse of `ml_text_direction_to_core`.
 pub fn core_text_direction_to_ml(d: koharu_core::TextDirection) -> koharu_ml::types::TextDirection {
     match d {

koharu-app/src/pipeline/mod.rs CHANGED Viewed

@@ -27,6 +27,11 @@ use tracing::Instrument;
 /// about to run (or just finished); step_index / page_index are 0-based.
 pub type ProgressSink = Arc<dyn Fn(ProgressTick) + Send + Sync>;
 #[derive(Debug, Clone)]
 pub struct ProgressTick {
     /// Coarse UI-facing step tag derived from the engine's primary
@@ -42,6 +47,20 @@ pub struct ProgressTick {
     pub overall_percent: u8,
 }
 /// Map an engine's produced artifact to its UI step category. Stays
 /// co-located with the engine metadata so adding a new engine can't
 /// silently bypass the toolbar spinner — only the registered artifact
@@ -89,6 +108,12 @@ pub enum Scope {
 /// Execute `spec` against `session`. Each engine step becomes one `Op::Batch`
 /// applied via the session's history (one undo step per step per page).
 #[allow(clippy::too_many_arguments)]
 #[tracing::instrument(level = "info", skip_all)]
 pub async fn run(
@@ -101,7 +126,8 @@ pub async fn run(
     spec: PipelineSpec,
     cancel: Arc<AtomicBool>,
     progress: Option<ProgressSink>,
-) -> Result<()> {
     let infos: Vec<&EngineInfo> = spec
         .steps
         .iter()
@@ -124,8 +150,9 @@ pub async fn run(
     let total_steps = order.len().max(1);
     let total_units = (total_pages * total_steps) as u64;
     let mut completed: u64 = 0;
-    for (page_index, page_id) in pages.iter().enumerate() {
         for (seq, &i) in order.iter().enumerate() {
             if cancel.load(Ordering::Relaxed) {
                 bail!("cancelled");
@@ -147,11 +174,31 @@ pub async fn run(
             // The page must still exist (user may have deleted it mid-run).
             if !session.scene.read().pages.contains_key(page_id) {
-                completed += 1;
-                continue;
             }
-            let engine = registry.get(info.id, &runtime, cpu).await?;
             let scene_snap = session.scene_snapshot();
             let ctx = EngineCtx {
                 scene: &scene_snap,
@@ -163,9 +210,29 @@ pub async fn run(
                 llm: &llm,
                 renderer: &renderer,
             };
-            let ops = async { engine.run(ctx).await }
                 .instrument(tracing::info_span!("step", engine = info.id, page = %page_id))
-                .await?;
             completed += 1;
             if ops.is_empty() {
                 continue;
@@ -174,7 +241,20 @@ pub async fn run(
                 ops,
                 label: format!("{}: page {}", info.id, page_id),
             };
-            session.apply(batch)?;
         }
     }
@@ -189,7 +269,37 @@ pub async fn run(
             overall_percent: 100,
         });
     }
-    Ok(())
 }
 // ---------------------------------------------------------------------------

 /// about to run (or just finished); step_index / page_index are 0-based.
 pub type ProgressSink = Arc<dyn Fn(ProgressTick) + Send + Sync>;
+/// Observer for non-fatal step failures. Called once per failed step; the
+/// pipeline skips the rest of that page's steps and moves on to the next
+/// page.
+pub type WarningSink = Arc<dyn Fn(WarningTick) + Send + Sync>;
 #[derive(Debug, Clone)]
 pub struct ProgressTick {
     /// Coarse UI-facing step tag derived from the engine's primary
     pub overall_percent: u8,
 }
+#[derive(Debug, Clone)]
+pub struct WarningTick {
+    pub step_id: String,
+    pub page_index: usize,
+    pub total_pages: usize,
+    pub message: String,
+}
+/// Returned by [`run`]. `warning_count == 0` means the run finished cleanly.
+#[derive(Debug, Clone, Default)]
+pub struct RunOutcome {
+    pub warning_count: usize,
+}
 /// Map an engine's produced artifact to its UI step category. Stays
 /// co-located with the engine metadata so adding a new engine can't
 /// silently bypass the toolbar spinner — only the registered artifact
 /// Execute `spec` against `session`. Each engine step becomes one `Op::Batch`
 /// applied via the session's history (one undo step per step per page).
+///
+/// A failed step on a given page is non-fatal: the rest of that page's steps
+/// are skipped (they typically depend on the failed step's output), one
+/// [`WarningTick`] is emitted via `warnings`, and the driver moves on to the
+/// next page. The function returns the total number of per-step warnings
+/// that fired, letting callers flag the run as `CompletedWithErrors`.
 #[allow(clippy::too_many_arguments)]
 #[tracing::instrument(level = "info", skip_all)]
 pub async fn run(
     spec: PipelineSpec,
     cancel: Arc<AtomicBool>,
     progress: Option<ProgressSink>,
+    warnings: Option<WarningSink>,
+) -> Result<RunOutcome> {
     let infos: Vec<&EngineInfo> = spec
         .steps
         .iter()
     let total_steps = order.len().max(1);
     let total_units = (total_pages * total_steps) as u64;
     let mut completed: u64 = 0;
+    let mut warning_count: usize = 0;
+    'pages: for (page_index, page_id) in pages.iter().enumerate() {
         for (seq, &i) in order.iter().enumerate() {
             if cancel.load(Ordering::Relaxed) {
                 bail!("cancelled");
             // The page must still exist (user may have deleted it mid-run).
             if !session.scene.read().pages.contains_key(page_id) {
+                // Skip the remaining steps for a deleted page and credit all
+                // of them against total_units so progress still reaches 100%.
+                completed += (total_steps - seq) as u64;
+                continue 'pages;
             }
+            let engine = match registry.get(info.id, &runtime, cpu).await {
+                Ok(e) => e,
+                Err(err) => {
+                    // Engine *load* failure: same recovery as a run failure.
+                    report_step_failure(
+                        info.id,
+                        page_id,
+                        seq,
+                        page_index,
+                        total_pages,
+                        total_steps,
+                        &err,
+                        &mut warning_count,
+                        warnings.as_ref(),
+                    );
+                    completed += (total_steps - seq) as u64;
+                    continue 'pages;
+                }
+            };
             let scene_snap = session.scene_snapshot();
             let ctx = EngineCtx {
                 scene: &scene_snap,
                 llm: &llm,
                 renderer: &renderer,
             };
+            let step_result = async { engine.run(ctx).await }
                 .instrument(tracing::info_span!("step", engine = info.id, page = %page_id))
+                .await;
+            let ops = match step_result {
+                Ok(ops) => ops,
+                Err(err) => {
+                    report_step_failure(
+                        info.id,
+                        page_id,
+                        seq,
+                        page_index,
+                        total_pages,
+                        total_steps,
+                        &err,
+                        &mut warning_count,
+                        warnings.as_ref(),
+                    );
+                    // Subsequent steps on this page almost always consume the
+                    // failed step's artifact; skip the rest and move on.
+                    completed += (total_steps - seq) as u64;
+                    continue 'pages;
+                }
+            };
             completed += 1;
             if ops.is_empty() {
                 continue;
                 ops,
                 label: format!("{}: page {}", info.id, page_id),
             };
+            if let Err(err) = session.apply(batch) {
+                report_step_failure(
+                    info.id,
+                    page_id,
+                    seq,
+                    page_index,
+                    total_pages,
+                    total_steps,
+                    &err,
+                    &mut warning_count,
+                    warnings.as_ref(),
+                );
+                continue 'pages;
+            }
         }
     }
             overall_percent: 100,
         });
     }
+    Ok(RunOutcome { warning_count })
+}
+#[allow(clippy::too_many_arguments)]
+fn report_step_failure(
+    engine_id: &str,
+    page_id: &PageId,
+    step_index: usize,
+    page_index: usize,
+    total_pages: usize,
+    total_steps: usize,
+    err: &anyhow::Error,
+    warning_count: &mut usize,
+    sink: Option<&WarningSink>,
+) {
+    let _ = total_steps;
+    tracing::warn!(
+        engine = engine_id,
+        page = %page_id,
+        step_index,
+        "pipeline step failed: {err:#}"
+    );
+    *warning_count += 1;
+    if let Some(sink) = sink {
+        sink(WarningTick {
+            step_id: engine_id.to_string(),
+            page_index,
+            total_pages,
+            message: format!("{err:#}"),
+        });
+    }
 }
 // ---------------------------------------------------------------------------

koharu-core/src/events.rs CHANGED Viewed

@@ -21,8 +21,15 @@ use crate::protocol::LlmTarget;
 #[serde(tag = "event", rename_all = "camelCase")]
 pub enum AppEvent {
     // Pipeline jobs.
-    JobStarted { id: String, kind: String },
     JobProgress(PipelineProgress),
     JobFinished(JobFinishedEvent),
     // Runtime library / model downloads.
@@ -36,9 +43,15 @@ pub enum AppEvent {
     // - `LlmLoaded`   — model is on the GPU and ready for inference.
     // - `LlmFailed`   — load failed; see `GET /llm/current` for the reason.
     // - `LlmUnloaded` — model released.
-    LlmLoading { target: LlmTarget },
-    LlmLoaded { target: LlmTarget },
-    LlmFailed { target: Option<LlmTarget> },
     LlmUnloaded,
     // (Re)connect replay so the client can seed in-flight state.
@@ -122,6 +135,21 @@ pub struct JobFinishedEvent {
     pub error: Option<String>,
 }
 // ---------------------------------------------------------------------------
 // Downloads
 // ---------------------------------------------------------------------------

 #[serde(tag = "event", rename_all = "camelCase")]
 pub enum AppEvent {
     // Pipeline jobs.
+    JobStarted {
+        id: String,
+        kind: String,
+    },
     JobProgress(PipelineProgress),
+    /// A single step on one page failed but the pipeline kept running.
+    /// Emitted per failed step so clients can show a non-fatal warning while
+    /// the job continues with the next page.
+    JobWarning(JobWarningEvent),
     JobFinished(JobFinishedEvent),
     // Runtime library / model downloads.
     // - `LlmLoaded`   — model is on the GPU and ready for inference.
     // - `LlmFailed`   — load failed; see `GET /llm/current` for the reason.
     // - `LlmUnloaded` — model released.
+    LlmLoading {
+        target: LlmTarget,
+    },
+    LlmLoaded {
+        target: LlmTarget,
+    },
+    LlmFailed {
+        target: Option<LlmTarget>,
+    },
     LlmUnloaded,
     // (Re)connect replay so the client can seed in-flight state.
     pub error: Option<String>,
 }
+/// A non-fatal step failure during a pipeline run. The pipeline recovers by
+/// skipping the rest of the current page's steps and moving on to the next
+/// page; the UI accumulates these into a list during the job.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct JobWarningEvent {
+    pub job_id: String,
+    /// 0-based page index where the failure happened.
+    pub page_index: usize,
+    pub total_pages: usize,
+    /// Engine id (e.g. `"lama-manga"`) of the step that failed.
+    pub step_id: String,
+    pub message: String,
+}
 // ---------------------------------------------------------------------------
 // Downloads
 // ---------------------------------------------------------------------------

koharu-core/src/lib.rs CHANGED Viewed

@@ -19,7 +19,7 @@ pub mod style;
 pub use blob::BlobRef;
 pub use events::{
     AppEvent, DownloadProgress, DownloadStatus, JobFinishedEvent, JobStatus, JobSummary,
-    PipelineProgress, PipelineStatus, PipelineStep, ProjectSummary, SnapshotEvent,
 };
 pub use font::{FontPrediction, NamedFontPrediction, TextDirection, TopFont};
 pub use google_fonts::{FontSource, GoogleFontCatalog, GoogleFontEntry, GoogleFontVariant};

 pub use blob::BlobRef;
 pub use events::{
     AppEvent, DownloadProgress, DownloadStatus, JobFinishedEvent, JobStatus, JobSummary,
+    JobWarningEvent, PipelineProgress, PipelineStatus, PipelineStep, ProjectSummary, SnapshotEvent,
 };
 pub use font::{FontPrediction, NamedFontPrediction, TextDirection, TopFont};
 pub use google_fonts::{FontSource, GoogleFontCatalog, GoogleFontEntry, GoogleFontVariant};

koharu-ml/bin/aot-inpainting.rs CHANGED Viewed

@@ -57,7 +57,11 @@ async fn main() -> Result<()> {
     let mask = image::open(&cli.mask)?;
     let started = std::time::Instant::now();
     let output = if let Some(max_side) = cli.max_side {
-        model.inference_with_max_side(&image, &mask, max_side)?
     } else {
         model.inference(&image, &mask)?
     };

     let mask = image::open(&cli.mask)?;
     let started = std::time::Instant::now();
     let output = if let Some(max_side) = cli.max_side {
+        let cfg = koharu_ml::inpainting::HdStrategyConfig {
+            resize_limit: max_side,
+            ..model.default_config()
+        };
+        model.inference_with_config(&image, &mask, &cfg)?
     } else {
         model.inference(&image, &mask)?
     };

koharu-ml/src/aot_inpainting/mod.rs CHANGED Viewed

@@ -7,17 +7,17 @@ use std::{
 use anyhow::{Context, Result, bail};
 use candle_core::{DType, Device, Tensor};
-use image::{
-    DynamicImage, GenericImageView, GrayImage, RgbImage,
-    imageops::{FilterType, resize},
-};
 use koharu_runtime::RuntimeManager;
 use serde::Deserialize;
 use tracing::instrument;
 use crate::{
     device,
-    inpainting::{binarize_mask, extract_alpha, restore_alpha_channel},
     loading,
 };
@@ -49,16 +49,6 @@ pub struct AotInpainting {
     device: Device,
 }
-#[derive(Debug, Clone)]
-struct PreparedInput {
-    pixel_values: Tensor,
-    mask_values: Tensor,
-    original_rgb: RgbImage,
-    original_mask: GrayImage,
-    model_width: u32,
-    model_height: u32,
-}
 #[derive(Debug, Clone, Deserialize)]
 struct AotInpaintingConfig {
     model_type: String,
@@ -137,21 +127,27 @@ impl AotInpainting {
         })
     }
     #[instrument(level = "debug", skip_all)]
     pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
-        self.inference_with_max_side(image, mask, self.config.default_max_side)
     }
     #[instrument(level = "debug", skip_all)]
-    pub fn inference_with_max_side(
         &self,
         image: &DynamicImage,
         mask: &DynamicImage,
-        max_side: u32,
     ) -> Result<DynamicImage> {
-        if max_side == 0 {
-            bail!("max_side must be positive");
-        }
         if image.dimensions() != mask.dimensions() {
             bail!(
                 "image and mask dimensions dismatch: image is {:?}, mask is {:?}",
@@ -161,84 +157,36 @@ impl AotInpainting {
         }
         let started = Instant::now();
-        let prepared = self.preprocess(image, mask, max_side)?;
-        let output = self
-            .model
-            .forward(&prepared.pixel_values, &prepared.mask_values)?;
-        let composited = self.postprocess(&output, &prepared)?;
         tracing::info!(
             width = image.width(),
             height = image.height(),
-            model_width = prepared.model_width,
-            model_height = prepared.model_height,
-            max_side,
             total_ms = started.elapsed().as_millis(),
             "aot inpainting timings"
         );
         if image.color().has_alpha() {
             let alpha = extract_alpha(&image.to_rgba8());
-            let rgba = restore_alpha_channel(&composited, &alpha, &prepared.original_mask);
             Ok(DynamicImage::ImageRgba8(rgba))
         } else {
-            Ok(DynamicImage::ImageRgb8(composited))
         }
     }
-    fn preprocess(
-        &self,
-        image: &DynamicImage,
-        mask: &DynamicImage,
-        max_side: u32,
-    ) -> Result<PreparedInput> {
-        let original_rgb = image.to_rgb8();
-        let original_mask = binarize_mask(mask);
-        let mut working_rgb = original_rgb.clone();
-        let mut working_mask = original_mask.clone();
-        if working_rgb.width().max(working_rgb.height()) > max_side {
-            let (resized_width, resized_height) =
-                resize_keep_aspect_dims(working_rgb.width(), working_rgb.height(), max_side);
-            working_rgb = resize(
-                &working_rgb,
-                resized_width,
-                resized_height,
-                FilterType::Triangle,
-            );
-            working_mask = resize(
-                &working_mask,
-                resized_width,
-                resized_height,
-                FilterType::Triangle,
-            );
-        }
-        let model_width = round_up_multiple(working_rgb.width(), self.config.pad_multiple as u32);
-        let model_height = round_up_multiple(working_rgb.height(), self.config.pad_multiple as u32);
-        if model_width != working_rgb.width() || model_height != working_rgb.height() {
-            working_rgb = resize(
-                &working_rgb,
-                model_width,
-                model_height,
-                FilterType::Triangle,
-            );
-            working_mask = resize(
-                &working_mask,
-                model_width,
-                model_height,
-                FilterType::Triangle,
-            );
-        }
-        let mut binary_model_mask = working_mask;
-        for pixel in binary_model_mask.pixels_mut() {
-            pixel.0[0] = if pixel.0[0] >= 127 { 255 } else { 0 };
-        }
         let image_tensor = (Tensor::from_vec(
-            working_rgb.into_raw(),
-            (1, model_height as usize, model_width as usize, 3),
             &self.device,
         )?
         .permute((0, 3, 1, 2))?
@@ -247,29 +195,22 @@ impl AotInpainting {
         let image_tensor = (image_tensor - 1.0)?;
         let mask_tensor = Tensor::from_vec(
-            binary_model_mask.clone().into_raw(),
-            (1, model_height as usize, model_width as usize, 1),
             &self.device,
         )?
         .permute((0, 3, 1, 2))?
         .to_dtype(DType::F32)?;
         let mask_tensor = (mask_tensor / 255.0)?;
         let mask_inv = (Tensor::ones_like(&mask_tensor)? - &mask_tensor)?;
-        let mask_inv_rgb =
-            mask_inv.broadcast_as((1, 3, model_height as usize, model_width as usize))?;
         let masked_image = (&image_tensor * &mask_inv_rgb)?;
-        Ok(PreparedInput {
-            pixel_values: masked_image,
-            mask_values: mask_tensor,
-            original_rgb,
-            original_mask,
-            model_width,
-            model_height,
-        })
     }
-    fn postprocess(&self, output: &Tensor, prepared: &PreparedInput) -> Result<RgbImage> {
         let output = output.to_device(&Device::Cpu)?.squeeze(0)?;
         let (channels, height, width) = output.dims3()?;
         if channels != 3 {
@@ -282,27 +223,27 @@ impl AotInpainting {
             .to_dtype(DType::U8)?
             .flatten_all()?
             .to_vec1::<u8>()?;
-        let predicted = RgbImage::from_raw(width as u32, height as u32, raw)
-            .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))?;
-        let predicted = if width as u32 != prepared.original_rgb.width()
-            || height as u32 != prepared.original_rgb.height()
-        {
-            resize(
-                &predicted,
-                prepared.original_rgb.width(),
-                prepared.original_rgb.height(),
-                FilterType::Triangle,
-            )
-        } else {
-            predicted
-        };
-        Ok(composite_rgb(
-            &prepared.original_rgb,
-            &predicted,
-            &prepared.original_mask,
-        ))
     }
 }
@@ -323,49 +264,3 @@ async fn resolve_model_paths(runtime: &RuntimeManager) -> Result<(PathBuf, PathB
         .with_context(|| format!("failed to download {SAFETENSORS_FILENAME} from {HF_REPO}"))?;
     Ok((config, weights))
 }
-fn resize_keep_aspect_dims(width: u32, height: u32, max_side: u32) -> (u32, u32) {
-    let ratio = max_side as f32 / width.max(height) as f32;
-    (
-        ((width as f32 * ratio).round() as u32).max(1),
-        ((height as f32 * ratio).round() as u32).max(1),
-    )
-}
-fn round_up_multiple(value: u32, multiple: u32) -> u32 {
-    if value.is_multiple_of(multiple) {
-        value
-    } else {
-        value + (multiple - value % multiple)
-    }
-}
-fn composite_rgb(original: &RgbImage, predicted: &RgbImage, mask: &GrayImage) -> RgbImage {
-    let mut composited = original.clone();
-    for y in 0..original.height() {
-        for x in 0..original.width() {
-            if mask.get_pixel(x, y).0[0] > 0 {
-                composited.put_pixel(x, y, *predicted.get_pixel(x, y));
-            }
-        }
-    }
-    composited
-}
-#[cfg(test)]
-mod tests {
-    use super::{resize_keep_aspect_dims, round_up_multiple};
-    #[test]
-    fn resize_keep_aspect_matches_upstream_rounding() {
-        assert_eq!(resize_keep_aspect_dims(1600, 900, 1024), (1024, 576));
-        assert_eq!(resize_keep_aspect_dims(900, 1600, 1024), (576, 1024));
-    }
-    #[test]
-    fn round_up_multiple_expands_to_next_valid_shape() {
-        assert_eq!(round_up_multiple(1024, 8), 1024);
-        assert_eq!(round_up_multiple(1025, 8), 1032);
-        assert_eq!(round_up_multiple(7, 8), 8);
-    }
-}

 use anyhow::{Context, Result, bail};
 use candle_core::{DType, Device, Tensor};
+use image::{DynamicImage, GenericImageView, GrayImage, RgbImage};
 use koharu_runtime::RuntimeManager;
 use serde::Deserialize;
 use tracing::instrument;
 use crate::{
     device,
+    inpainting::{
+        HdStrategyConfig, InpaintForward, binarize_mask, extract_alpha, restore_alpha_channel,
+        run_inpaint, try_fill_balloon,
+    },
     loading,
 };
     device: Device,
 }
 #[derive(Debug, Clone, Deserialize)]
 struct AotInpaintingConfig {
     model_type: String,
         })
     }
+    /// Default strategy: Resize, using the model's shipped `default_max_side`
+    /// as the resize limit. Matches pre-refactor behaviour.
+    pub fn default_config(&self) -> HdStrategyConfig {
+        HdStrategyConfig::aot_default(
+            self.config.default_max_side,
+            self.config.pad_multiple as u32,
+        )
+    }
     #[instrument(level = "debug", skip_all)]
     pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
+        self.inference_with_config(image, mask, &self.default_config())
     }
     #[instrument(level = "debug", skip_all)]
+    pub fn inference_with_config(
         &self,
         image: &DynamicImage,
         mask: &DynamicImage,
+        cfg: &HdStrategyConfig,
     ) -> Result<DynamicImage> {
         if image.dimensions() != mask.dimensions() {
             bail!(
                 "image and mask dimensions dismatch: image is {:?}, mask is {:?}",
         }
         let started = Instant::now();
+        let binary_mask = binarize_mask(mask);
+        let image_rgb = image.to_rgb8();
+        let forward = AotForward { aot: self };
+        let output_rgb = run_inpaint(&forward, &image_rgb, &binary_mask, cfg)?;
         tracing::info!(
             width = image.width(),
             height = image.height(),
+            resize_limit = cfg.resize_limit,
             total_ms = started.elapsed().as_millis(),
             "aot inpainting timings"
         );
         if image.color().has_alpha() {
             let alpha = extract_alpha(&image.to_rgba8());
+            let rgba = restore_alpha_channel(&output_rgb, &alpha, &binary_mask);
             Ok(DynamicImage::ImageRgba8(rgba))
         } else {
+            Ok(DynamicImage::ImageRgb8(output_rgb))
         }
     }
+    /// Raw model forward on a pre-padded RGB image + mask. Input spatial dims
+    /// must already be multiples of `pad_multiple` — the HD-strategy dispatcher
+    /// handles this.
+    fn forward_rgb(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
+        let (w, h) = image.dimensions();
         let image_tensor = (Tensor::from_vec(
+            image.clone().into_raw(),
+            (1, h as usize, w as usize, 3),
             &self.device,
         )?
         .permute((0, 3, 1, 2))?
         let image_tensor = (image_tensor - 1.0)?;
         let mask_tensor = Tensor::from_vec(
+            mask.clone().into_raw(),
+            (1, h as usize, w as usize, 1),
             &self.device,
         )?
         .permute((0, 3, 1, 2))?
         .to_dtype(DType::F32)?;
         let mask_tensor = (mask_tensor / 255.0)?;
         let mask_inv = (Tensor::ones_like(&mask_tensor)? - &mask_tensor)?;
+        let mask_inv_rgb = mask_inv.broadcast_as((1, 3, h as usize, w as usize))?;
         let masked_image = (&image_tensor * &mask_inv_rgb)?;
+        let output = self.model.forward(&masked_image, &mask_tensor)?;
+        self.postprocess(&output)
     }
+    fn postprocess(&self, output: &Tensor) -> Result<RgbImage> {
         let output = output.to_device(&Device::Cpu)?.squeeze(0)?;
         let (channels, height, width) = output.dims3()?;
         if channels != 3 {
             .to_dtype(DType::U8)?
             .flatten_all()?
             .to_vec1::<u8>()?;
+        RgbImage::from_raw(width as u32, height as u32, raw)
+            .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
+    }
+}
+struct AotForward<'a> {
+    aot: &'a AotInpainting,
+}
+impl InpaintForward for AotForward<'_> {
+    fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
+        if mask.pixels().all(|p| p.0[0] == 0) {
+            return Ok(image.clone());
+        }
+        // Same flat-balloon fast path as Lama: skip the model when the mask
+        // sits in a uniform-background bubble. Fires per-crop under the Crop
+        // strategy; generally no-ops on whole-image forwards under Resize.
+        if let Some(filled) = try_fill_balloon(image, mask) {
+            return Ok(filled);
+        }
+        self.aot.forward_rgb(image, mask)
     }
 }
         .with_context(|| format!("failed to download {SAFETENSORS_FILENAME} from {HF_REPO}"))?;
     Ok((config, weights))
 }

koharu-ml/src/inpainting/balloon.rs ADDED Viewed

	@@ -0,0 +1,369 @@

+//! Balloon-fill fast path for inpainting.
+//!
+//! When a mask sits inside a speech bubble with a near-uniform background,
+//! the model can be skipped entirely: fill the masked pixels with the median
+//! background colour of the balloon. This is purely image processing, so
+//! every erase model (Lama, AoT) can use it as a pre-model pass.
+//!
+//! Effectiveness depends on the caller handing us one bubble at a time —
+//! which is exactly what the Crop strategy does, since each crop corresponds
+//! to a connected mask contour. On a whole-image forward (Resize strategy),
+//! `extract_balloon_mask` usually fails to find a single containing contour
+//! and we fall through to the model.
+use image::{DynamicImage, GrayImage, Luma, Rgb, RgbImage};
+use imageproc::{
+    contours::find_contours, distance_transform::Norm, drawing::draw_polygon_mut, edges::canny,
+    filter::gaussian_blur_f32, morphology::dilate, point::Point,
+};
+const BALLOON_CANNY_LOW: f32 = 70.0;
+const BALLOON_CANNY_HIGH: f32 = 140.0;
+const SIMPLE_BG_THRESHOLD_LOW_VARIANCE: f64 = 10.0;
+const SIMPLE_BG_THRESHOLD_HIGH_VARIANCE: f64 = 7.0;
+const SIMPLE_BG_CHANNEL_STD_SWITCH: f64 = 1.0;
+type Xyxy = [u32; 4];
+pub(crate) struct BalloonMasks {
+    pub balloon_mask: GrayImage,
+    pub non_text_mask: GrayImage,
+}
+/// Return an image with the masked pixels painted the balloon's median
+/// background colour, iff a containing bubble with low background variance
+/// can be identified. `None` means "no confident fast path; call the model".
+pub fn try_fill_balloon(image: &RgbImage, mask: &GrayImage) -> Option<RgbImage> {
+    let masks = extract_balloon_mask(image, mask)?;
+    let average_bg_color = median_rgb(image, &masks.non_text_mask)?;
+    let std_rgb = color_stddev(image, &masks.non_text_mask, average_bg_color);
+    let inpaint_thresh = if stddev3(std_rgb) > SIMPLE_BG_CHANNEL_STD_SWITCH {
+        SIMPLE_BG_THRESHOLD_HIGH_VARIANCE
+    } else {
+        SIMPLE_BG_THRESHOLD_LOW_VARIANCE
+    };
+    let std_max = std_rgb.into_iter().fold(0.0, f64::max);
+    if std_max >= inpaint_thresh {
+        return None;
+    }
+    let mut result = image.clone();
+    let fill = [
+        average_bg_color[0] as u8,
+        average_bg_color[1] as u8,
+        average_bg_color[2] as u8,
+    ];
+    for (x, y, pixel) in masks.balloon_mask.enumerate_pixels() {
+        if pixel.0[0] > 0 {
+            result.put_pixel(x, y, Rgb(fill));
+        }
+    }
+    Some(result)
+}
+pub(crate) fn extract_balloon_mask(image: &RgbImage, mask: &GrayImage) -> Option<BalloonMasks> {
+    if image.dimensions() != mask.dimensions() {
+        return None;
+    }
+    let text_bbox = non_zero_bbox(mask)?;
+    let text_sum = count_nonzero(mask);
+    if text_sum == 0 {
+        return None;
+    }
+    let gray = DynamicImage::ImageRgb8(image.clone()).to_luma8();
+    let blurred = gaussian_blur_f32(&gray, 1.0);
+    let mut cannyed = canny(&blurred, BALLOON_CANNY_LOW, BALLOON_CANNY_HIGH);
+    cannyed = dilate(&cannyed, Norm::LInf, 1);
+    draw_binary_border(&mut cannyed);
+    subtract_binary_mask(&mut cannyed, mask);
+    let contours = find_contours::<i32>(&cannyed);
+    let (width, height) = cannyed.dimensions();
+    let mut best_mask = None;
+    let mut best_area = f64::INFINITY;
+    for contour in contours {
+        let Some(polygon) = contour_polygon(&contour.points) else {
+            continue;
+        };
+        let bbox = polygon_bbox(&polygon)?;
+        if bbox[0] > text_bbox[0]
+            || bbox[1] > text_bbox[1]
+            || bbox[2] < text_bbox[2]
+            || bbox[3] < text_bbox[3]
+        {
+            continue;
+        }
+        let mut candidate = GrayImage::new(width, height);
+        draw_polygon_mut(&mut candidate, &polygon, Luma([255u8]));
+        if count_overlap(&candidate, mask) < text_sum {
+            continue;
+        }
+        let area = polygon_area(&polygon);
+        if area < best_area {
+            best_area = area;
+            best_mask = Some(candidate);
+        }
+    }
+    let balloon_mask = best_mask?;
+    let mut non_text_mask = balloon_mask.clone();
+    for (x, y, pixel) in mask.enumerate_pixels() {
+        if pixel.0[0] > 0 {
+            non_text_mask.put_pixel(x, y, Luma([0]));
+        }
+    }
+    Some(BalloonMasks {
+        balloon_mask,
+        non_text_mask,
+    })
+}
+fn contour_polygon(points: &[Point<i32>]) -> Option<Vec<Point<i32>>> {
+    let mut polygon = points.to_vec();
+    if polygon.len() < 3 {
+        return None;
+    }
+    if polygon.first() == polygon.last() {
+        polygon.pop();
+    }
+    if polygon.len() < 3 {
+        return None;
+    }
+    Some(polygon)
+}
+fn polygon_bbox(points: &[Point<i32>]) -> Option<Xyxy> {
+    let first = points.first()?;
+    let mut min_x = first.x;
+    let mut min_y = first.y;
+    let mut max_x = first.x;
+    let mut max_y = first.y;
+    for point in points.iter().skip(1) {
+        min_x = min_x.min(point.x);
+        min_y = min_y.min(point.y);
+        max_x = max_x.max(point.x);
+        max_y = max_y.max(point.y);
+    }
+    Some([
+        min_x.max(0) as u32,
+        min_y.max(0) as u32,
+        max_x.max(min_x).saturating_add(1) as u32,
+        max_y.max(min_y).saturating_add(1) as u32,
+    ])
+}
+fn polygon_area(points: &[Point<i32>]) -> f64 {
+    let mut area = 0.0;
+    for index in 0..points.len() {
+        let current = points[index];
+        let next = points[(index + 1) % points.len()];
+        area += f64::from(current.x) * f64::from(next.y) - f64::from(next.x) * f64::from(current.y);
+    }
+    area.abs() * 0.5
+}
+fn draw_binary_border(image: &mut GrayImage) {
+    let width = image.width();
+    let height = image.height();
+    if width == 0 || height == 0 {
+        return;
+    }
+    for x in 0..width {
+        image.put_pixel(x, 0, Luma([255]));
+        image.put_pixel(x, height - 1, Luma([255]));
+    }
+    for y in 0..height {
+        image.put_pixel(0, y, Luma([255]));
+        image.put_pixel(width - 1, y, Luma([255]));
+    }
+}
+fn subtract_binary_mask(image: &mut GrayImage, mask: &GrayImage) {
+    for (x, y, pixel) in image.enumerate_pixels_mut() {
+        if mask.get_pixel(x, y).0[0] > 0 {
+            pixel.0[0] = 0;
+        }
+    }
+}
+fn non_zero_bbox(mask: &GrayImage) -> Option<Xyxy> {
+    let (width, height) = mask.dimensions();
+    let mut min_x = width;
+    let mut min_y = height;
+    let mut max_x = 0;
+    let mut max_y = 0;
+    let mut found = false;
+    for (x, y, pixel) in mask.enumerate_pixels() {
+        if pixel.0[0] == 0 {
+            continue;
+        }
+        found = true;
+        min_x = min_x.min(x);
+        min_y = min_y.min(y);
+        max_x = max_x.max(x);
+        max_y = max_y.max(y);
+    }
+    found.then_some([
+        min_x,
+        min_y,
+        max_x.saturating_add(1),
+        max_y.saturating_add(1),
+    ])
+}
+fn count_nonzero(mask: &GrayImage) -> u32 {
+    mask.pixels().filter(|pixel| pixel.0[0] > 0).count() as u32
+}
+fn count_overlap(left: &GrayImage, right: &GrayImage) -> u32 {
+    left.pixels()
+        .zip(right.pixels())
+        .filter(|(l, r)| l.0[0] > 0 && r.0[0] > 0)
+        .count() as u32
+}
+fn median_rgb(image: &RgbImage, mask: &GrayImage) -> Option<[f64; 3]> {
+    let mut channels = [Vec::new(), Vec::new(), Vec::new()];
+    for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
+        if mask_pixel.0[0] == 0 {
+            continue;
+        }
+        channels[0].push(pixel.0[0]);
+        channels[1].push(pixel.0[1]);
+        channels[2].push(pixel.0[2]);
+    }
+    Some([
+        median_channel(&channels[0])?,
+        median_channel(&channels[1])?,
+        median_channel(&channels[2])?,
+    ])
+}
+fn median_channel(values: &[u8]) -> Option<f64> {
+    if values.is_empty() {
+        return None;
+    }
+    let mut values = values.to_vec();
+    values.sort_unstable();
+    let mid = values.len() / 2;
+    if values.len().is_multiple_of(2) {
+        Some((f64::from(values[mid - 1]) + f64::from(values[mid])) / 2.0)
+    } else {
+        Some(f64::from(values[mid]))
+    }
+}
+fn color_stddev(image: &RgbImage, mask: &GrayImage, median: [f64; 3]) -> [f64; 3] {
+    let mut sum_sq = [0.0; 3];
+    let mut count = 0.0;
+    for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
+        if mask_pixel.0[0] == 0 {
+            continue;
+        }
+        count += 1.0;
+        for channel in 0..3 {
+            let diff = f64::from(pixel.0[channel]) - median[channel];
+            sum_sq[channel] += diff * diff;
+        }
+    }
+    if count == 0.0 {
+        return [f64::INFINITY; 3];
+    }
+    [
+        (sum_sq[0] / count).sqrt(),
+        (sum_sq[1] / count).sqrt(),
+        (sum_sq[2] / count).sqrt(),
+    ]
+}
+fn stddev3(values: [f64; 3]) -> f64 {
+    let mean = values.iter().sum::<f64>() / 3.0;
+    let variance = values
+        .iter()
+        .map(|value| {
+            let diff = value - mean;
+            diff * diff
+        })
+        .sum::<f64>()
+        / 3.0;
+    variance.sqrt()
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use imageproc::drawing::draw_hollow_rect_mut;
+    use imageproc::rect::Rect;
+    #[test]
+    fn extract_balloon_mask_prefers_smallest_covering_contour() {
+        let mut image = RgbImage::from_pixel(80, 80, Rgb([255, 255, 255]));
+        draw_hollow_rect_mut(&mut image, Rect::at(4, 4).of_size(72, 72), Rgb([0, 0, 0]));
+        draw_hollow_rect_mut(&mut image, Rect::at(20, 20).of_size(28, 20), Rgb([0, 0, 0]));
+        let mut mask = GrayImage::new(80, 80);
+        for y in 24..36 {
+            for x in 24..44 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        let masks = extract_balloon_mask(&image, &mask).expect("balloon should be detected");
+        let balloon_pixels = count_nonzero(&masks.balloon_mask);
+        assert!(
+            balloon_pixels < 900,
+            "expected inner contour fill, got {balloon_pixels}"
+        );
+        assert!(
+            balloon_pixels > 250,
+            "expected meaningful bubble area, got {balloon_pixels}"
+        );
+    }
+    #[test]
+    fn simple_balloon_chooses_fill_but_textured_balloon_does_not() {
+        let mut flat = RgbImage::from_pixel(64, 64, Rgb([240, 240, 240]));
+        draw_hollow_rect_mut(&mut flat, Rect::at(8, 8).of_size(48, 32), Rgb([0, 0, 0]));
+        let mut mask = GrayImage::new(64, 64);
+        for y in 18..30 {
+            for x in 18..46 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        assert!(try_fill_balloon(&flat, &mask).is_some());
+        let mut textured = flat.clone();
+        for y in 9..39 {
+            for x in 9..55 {
+                let noise = ((x + y) % 23) as u8;
+                textured.put_pixel(
+                    x,
+                    y,
+                    Rgb([200 + noise, 210 + (noise / 2), 220 - (noise / 3)]),
+                );
+            }
+        }
+        assert!(try_fill_balloon(&textured, &mask).is_none());
+    }
+}

koharu-ml/src/{inpainting.rs → inpainting/mod.rs} RENAMED Viewed

@@ -1,3 +1,16 @@
 use image::{DynamicImage, GrayImage, Luma, RgbImage, Rgba, RgbaImage};
 use imageproc::{distance_transform::Norm, morphology::dilate};

+//! Shared inpainting infrastructure: alpha handling, mask prep, and the
+//! HD-strategy dispatcher used by every erase model (Lama, AoT).
+//!
+//! The strategy dispatcher mirrors IOPaint's `InpaintModel.__call__`: one place
+//! decides between Original / Resize / Crop based on image size and a
+//! per-model config. Concrete models only implement the raw forward pass.
+pub mod balloon;
+pub mod strategy;
+pub use balloon::try_fill_balloon;
+pub use strategy::{HdStrategy, HdStrategyConfig, InpaintForward, run_inpaint};
 use image::{DynamicImage, GrayImage, Luma, RgbImage, Rgba, RgbaImage};
 use imageproc::{distance_transform::Norm, morphology::dilate};

koharu-ml/src/inpainting/strategy.rs ADDED Viewed

	@@ -0,0 +1,539 @@

+//! HD-strategy dispatcher for erase models.
+//!
+//! Mirrors IOPaint's `InpaintModel.__call__` (`iopaint/model/base.py`): one
+//! entry point chooses between Original / Resize / Crop based on image size,
+//! then delegates the raw forward to a model-specific [`InpaintForward`].
+//!
+//! ## Strategies
+//!
+//! - **Original** — pad to `pad_mod`, forward, unpad. Highest VRAM.
+//! - **Resize** — downscale so `max(h,w) <= resize_limit`, pad, forward, unpad,
+//!   upscale, then restore pixels outside the mask from the original. Medium
+//!   VRAM, preserves quality outside the mask.
+//! - **Crop** — extract one bounding box per connected mask contour, expand by
+//!   `crop_margin` on each side, forward each crop independently, paste back.
+//!   Lowest VRAM. Default for manga (many small speech bubbles).
+//!
+//! The Crop path uses [`pad_forward_bounded`] per crop, so an oversized crop
+//! (e.g. a brush stroke covering most of a page) falls back to the Resize path
+//! inside that single crop. No `HdStrategy` ever OOMs on a reasonable GPU
+//! provided `resize_limit` is within VRAM budget.
+//!
+//! Mask boxes come from `imageproc::contours::find_contours` on the binarized
+//! mask — equivalent to OpenCV's `cv2.findContours(RETR_EXTERNAL)` that IOPaint
+//! uses. Only `BorderType::Outer` contours become boxes (holes are ignored).
+use anyhow::Result;
+use image::{
+    GrayImage, RgbImage,
+    imageops::{FilterType, crop_imm, replace, resize},
+};
+use imageproc::contours::{BorderType, find_contours};
+/// Which preprocessing strategy to apply before the raw forward. See the
+/// module docs for the semantics of each variant.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum HdStrategy {
+    Original,
+    Resize,
+    Crop,
+}
+/// Tunable knobs for [`run_inpaint`]. Defaults match IOPaint
+/// (`iopaint/schema.py` — trigger 800, margin 128, resize limit 1280).
+#[derive(Debug, Clone, Copy)]
+pub struct HdStrategyConfig {
+    pub strategy: HdStrategy,
+    /// Crop strategy only activates when `max(image.w, image.h) >
+    /// crop_trigger_size`. Smaller images fall through to Original.
+    pub crop_trigger_size: u32,
+    /// Additive margin (pixels) added to each side of a mask bounding box when
+    /// cropping. Controls how much context the model sees around the mask.
+    pub crop_margin: u32,
+    /// Hard ceiling on the forward's longer side. Applied by Resize strategy at
+    /// the top level, and as a nested fallback inside oversized crops.
+    pub resize_limit: u32,
+    /// Model-required spatial divisor. LaMa / AoT both need 8; larger for
+    /// models with deeper downsampling.
+    pub pad_mod: u32,
+}
+impl HdStrategyConfig {
+    /// Manga-tuned default for Lama: Crop strategy with IOPaint's defaults.
+    /// Many small speech bubbles → many small per-bubble crops → trivial VRAM.
+    pub const fn lama_default() -> Self {
+        Self {
+            strategy: HdStrategy::Crop,
+            crop_trigger_size: 800,
+            crop_margin: 128,
+            resize_limit: 1280,
+            pad_mod: 8,
+        }
+    }
+    /// Default for AoT: whole-image Resize with a fixed upper bound (AoT's
+    /// upstream config calls this `default_max_side`).
+    pub const fn aot_default(resize_limit: u32, pad_mod: u32) -> Self {
+        Self {
+            strategy: HdStrategy::Resize,
+            crop_trigger_size: 800,
+            crop_margin: 128,
+            resize_limit,
+            pad_mod,
+        }
+    }
+}
+/// `[x1, y1, x2, y2]` half-open rectangle: `x1,y1` inclusive, `x2,y2` exclusive.
+pub type Xyxy = [u32; 4];
+/// A raw forward pass on a (padded) image + mask, returning an image of the
+/// same spatial size. Implementors are free to apply fast paths (e.g. Lama's
+/// balloon-fill shortcut) before the model forward.
+pub trait InpaintForward {
+    fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage>;
+}
+/// Entry point: dispatch on `cfg.strategy` and return an RGB image with the
+/// masked region inpainted. `mask` must already be binarized (0 or 255).
+pub fn run_inpaint<F: InpaintForward>(
+    model: &F,
+    image: &RgbImage,
+    mask: &GrayImage,
+    cfg: &HdStrategyConfig,
+) -> Result<RgbImage> {
+    assert_eq!(image.dimensions(), mask.dimensions());
+    let max_side = image.width().max(image.height());
+    match cfg.strategy {
+        HdStrategy::Crop if max_side > cfg.crop_trigger_size => run_crop(model, image, mask, cfg),
+        HdStrategy::Resize if max_side > cfg.resize_limit => run_resize(model, image, mask, cfg),
+        _ => pad_forward(model, image, mask, cfg.pad_mod),
+    }
+}
+fn run_crop<F: InpaintForward>(
+    model: &F,
+    image: &RgbImage,
+    mask: &GrayImage,
+    cfg: &HdStrategyConfig,
+) -> Result<RgbImage> {
+    let boxes = boxes_from_mask(mask);
+    if boxes.is_empty() {
+        return Ok(image.clone());
+    }
+    tracing::debug!(
+        count = boxes.len(),
+        "inpaint crop strategy: one forward per mask contour"
+    );
+    let mut out = image.clone();
+    for b in boxes {
+        let (crop_img, crop_mask, [l, t, _r, _bt]) = crop_box(image, mask, b, cfg.crop_margin);
+        let crop_result = pad_forward_bounded(model, &crop_img, &crop_mask, cfg)?;
+        replace(&mut out, &crop_result, i64::from(l), i64::from(t));
+    }
+    Ok(out)
+}
+fn run_resize<F: InpaintForward>(
+    model: &F,
+    image: &RgbImage,
+    mask: &GrayImage,
+    cfg: &HdStrategyConfig,
+) -> Result<RgbImage> {
+    let (w, h) = image.dimensions();
+    let (nw, nh) = scaled_dims(w, h, cfg.resize_limit);
+    tracing::debug!(
+        from_w = w,
+        from_h = h,
+        to_w = nw,
+        to_h = nh,
+        "inpaint resize strategy"
+    );
+    let small_img = resize(image, nw, nh, FilterType::Triangle);
+    let small_mask = rebinarize(&resize(mask, nw, nh, FilterType::Triangle));
+    let small_out = pad_forward(model, &small_img, &small_mask, cfg.pad_mod)?;
+    let full_out = resize(&small_out, w, h, FilterType::CatmullRom);
+    // Restore untouched pixels from the original so Resize only loses quality
+    // where we actually inpainted. Matches IOPaint's
+    // `original_pixel_indices = mask < 127`.
+    let mut out = full_out;
+    for y in 0..h {
+        for x in 0..w {
+            if mask.get_pixel(x, y).0[0] < 127 {
+                out.put_pixel(x, y, *image.get_pixel(x, y));
+            }
+        }
+    }
+    Ok(out)
+}
+/// `pad_forward` with a nested Resize fallback when the input exceeds
+/// `resize_limit`. Used inside the Crop loop so oversized crops don't OOM.
+fn pad_forward_bounded<F: InpaintForward>(
+    model: &F,
+    image: &RgbImage,
+    mask: &GrayImage,
+    cfg: &HdStrategyConfig,
+) -> Result<RgbImage> {
+    if image.width().max(image.height()) > cfg.resize_limit {
+        run_resize(model, image, mask, cfg)
+    } else {
+        pad_forward(model, image, mask, cfg.pad_mod)
+    }
+}
+/// Pad both tensors to `pad_mod` on right/bottom with symmetric reflection,
+/// forward through the model, then crop the output back to the input size.
+/// Matches IOPaint's `_pad_forward` / `pad_img_to_modulo`.
+fn pad_forward<F: InpaintForward>(
+    model: &F,
+    image: &RgbImage,
+    mask: &GrayImage,
+    pad_mod: u32,
+) -> Result<RgbImage> {
+    let (w, h) = image.dimensions();
+    let pad_w = ceil_multiple(w, pad_mod);
+    let pad_h = ceil_multiple(h, pad_mod);
+    let out = if pad_w == w && pad_h == h {
+        model.forward(image, mask)?
+    } else {
+        let pad_img = symmetric_pad_rgb(image, pad_w, pad_h);
+        let pad_msk = symmetric_pad_gray(mask, pad_w, pad_h);
+        let padded_out = model.forward(&pad_img, &pad_msk)?;
+        crop_imm(&padded_out, 0, 0, w, h).to_image()
+    };
+    Ok(out)
+}
+/// External-contour bounding boxes of a binarized mask. Equivalent to
+/// IOPaint's `boxes_from_mask` (`cv2.findContours(RETR_EXTERNAL)` +
+/// `cv2.boundingRect`). Hole borders are discarded.
+pub fn boxes_from_mask(mask: &GrayImage) -> Vec<Xyxy> {
+    let contours = find_contours::<i32>(mask);
+    let (mw, mh) = mask.dimensions();
+    let mut boxes = Vec::new();
+    for contour in contours {
+        if contour.border_type != BorderType::Outer || contour.points.is_empty() {
+            continue;
+        }
+        let mut min_x = i32::MAX;
+        let mut min_y = i32::MAX;
+        let mut max_x = i32::MIN;
+        let mut max_y = i32::MIN;
+        for p in &contour.points {
+            min_x = min_x.min(p.x);
+            min_y = min_y.min(p.y);
+            max_x = max_x.max(p.x);
+            max_y = max_y.max(p.y);
+        }
+        let x1 = (min_x.max(0) as u32).min(mw);
+        let y1 = (min_y.max(0) as u32).min(mh);
+        let x2 = (max_x.saturating_add(1).max(0) as u32).min(mw);
+        let y2 = (max_y.saturating_add(1).max(0) as u32).min(mh);
+        if x2 > x1 && y2 > y1 {
+            boxes.push([x1, y1, x2, y2]);
+        }
+    }
+    boxes
+}
+/// Expand `box_xyxy` by `margin` pixels on each side, clamped to the image.
+/// When the expanded rect would overflow one edge, shift inward so the full
+/// `(box + margin*2)` footprint still fits when possible — matches IOPaint's
+/// `_crop_box` (`iopaint/model/base.py`).
+pub fn crop_box(
+    image: &RgbImage,
+    mask: &GrayImage,
+    box_xyxy: Xyxy,
+    margin: u32,
+) -> (RgbImage, GrayImage, Xyxy) {
+    let [bx1, by1, bx2, by2] = box_xyxy;
+    let (img_w, img_h) = image.dimensions();
+    let cx = (bx1 + bx2) / 2;
+    let cy = (by1 + by2) / 2;
+    let want_w = (bx2 - bx1) + margin * 2;
+    let want_h = (by2 - by1) + margin * 2;
+    let half_w = want_w / 2;
+    let half_h = want_h / 2;
+    // Signed desired bounds before clamping (i64 to preserve negatives).
+    let desire_l = cx as i64 - half_w as i64;
+    let desire_r = cx as i64 + half_w as i64;
+    let desire_t = cy as i64 - half_h as i64;
+    let desire_b = cy as i64 + half_h as i64;
+    let img_w_i = img_w as i64;
+    let img_h_i = img_h as i64;
+    let mut l = desire_l.max(0);
+    let mut r = desire_r.min(img_w_i);
+    let mut t = desire_t.max(0);
+    let mut b = desire_b.min(img_h_i);
+    if desire_l < 0 {
+        r = (r - desire_l).min(img_w_i);
+    }
+    if desire_r > img_w_i {
+        l = (l - (desire_r - img_w_i)).max(0);
+    }
+    if desire_t < 0 {
+        b = (b - desire_t).min(img_h_i);
+    }
+    if desire_b > img_h_i {
+        t = (t - (desire_b - img_h_i)).max(0);
+    }
+    let l = l.clamp(0, img_w_i) as u32;
+    let r = r.clamp(0, img_w_i) as u32;
+    let t = t.clamp(0, img_h_i) as u32;
+    let b = b.clamp(0, img_h_i) as u32;
+    let r = r.max(l + 1).min(img_w);
+    let b = b.max(t + 1).min(img_h);
+    let cw = r - l;
+    let ch = b - t;
+    let crop_img = crop_imm(image, l, t, cw, ch).to_image();
+    let crop_mask = crop_imm(mask, l, t, cw, ch).to_image();
+    (crop_img, crop_mask, [l, t, r, b])
+}
+/// Scale `(w, h)` so `max(w, h) == max_side`, preserving aspect ratio. No-op
+/// when the image already fits. Mirrors IOPaint's `resize_max_size`.
+pub fn scaled_dims(w: u32, h: u32, max_side: u32) -> (u32, u32) {
+    let longer = w.max(h);
+    if longer <= max_side {
+        return (w, h);
+    }
+    let ratio = f64::from(max_side) / f64::from(longer);
+    let nw = ((f64::from(w) * ratio).round() as u32).max(1);
+    let nh = ((f64::from(h) * ratio).round() as u32).max(1);
+    (nw, nh)
+}
+fn ceil_multiple(v: u32, m: u32) -> u32 {
+    if m == 0 {
+        return v;
+    }
+    let r = v % m;
+    if r == 0 { v } else { v + (m - r) }
+}
+fn rebinarize(mask: &GrayImage) -> GrayImage {
+    let mut out = mask.clone();
+    for p in out.pixels_mut() {
+        p.0[0] = if p.0[0] > 127 { 255 } else { 0 };
+    }
+    out
+}
+/// Numpy-style `mode="symmetric"` padding, but only on the right/bottom edges
+/// (we only ever pad up to `pad_mod - 1` pixels to reach a modulo boundary).
+fn symmetric_pad_rgb(img: &RgbImage, new_w: u32, new_h: u32) -> RgbImage {
+    let (w, h) = img.dimensions();
+    if new_w == w && new_h == h {
+        return img.clone();
+    }
+    let mut out = RgbImage::new(new_w, new_h);
+    for y in 0..new_h {
+        let sy = reflect_index(y, h);
+        for x in 0..new_w {
+            let sx = reflect_index(x, w);
+            out.put_pixel(x, y, *img.get_pixel(sx, sy));
+        }
+    }
+    out
+}
+fn symmetric_pad_gray(img: &GrayImage, new_w: u32, new_h: u32) -> GrayImage {
+    let (w, h) = img.dimensions();
+    if new_w == w && new_h == h {
+        return img.clone();
+    }
+    let mut out = GrayImage::new(new_w, new_h);
+    for y in 0..new_h {
+        let sy = reflect_index(y, h);
+        for x in 0..new_w {
+            let sx = reflect_index(x, w);
+            out.put_pixel(x, y, *img.get_pixel(sx, sy));
+        }
+    }
+    out
+}
+/// Reflect index for symmetric padding: `[0..len-1]` maps to itself, `[len..]`
+/// reflects. Padding is always less than `len` for our use (right/bottom only,
+/// by `pad_mod - 1` pixels max).
+fn reflect_index(i: u32, len: u32) -> u32 {
+    if len == 0 {
+        return 0;
+    }
+    if i < len {
+        return i;
+    }
+    let past = i - len;
+    if past < len {
+        len - 1 - past
+    } else {
+        past % len
+    }
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use image::{Luma, Rgb};
+    fn solid_rgb(w: u32, h: u32, rgb: [u8; 3]) -> RgbImage {
+        RgbImage::from_pixel(w, h, Rgb(rgb))
+    }
+    struct IdentityForward;
+    impl InpaintForward for IdentityForward {
+        fn forward(&self, image: &RgbImage, _mask: &GrayImage) -> Result<RgbImage> {
+            Ok(image.clone())
+        }
+    }
+    #[test]
+    fn ceil_multiple_rounds_up() {
+        assert_eq!(ceil_multiple(8, 8), 8);
+        assert_eq!(ceil_multiple(9, 8), 16);
+        assert_eq!(ceil_multiple(0, 8), 0);
+    }
+    #[test]
+    fn reflect_index_mirrors_beyond_boundary() {
+        // len=5 → symmetric pads: [..., 2, 1, 0, 1, 2, 3, 4, 4, 3, 2, ...]
+        // but our padding is right-side only so we only care about i >= len:
+        assert_eq!(reflect_index(0, 5), 0);
+        assert_eq!(reflect_index(4, 5), 4);
+        assert_eq!(reflect_index(5, 5), 4);
+        assert_eq!(reflect_index(6, 5), 3);
+        assert_eq!(reflect_index(9, 5), 0);
+    }
+    #[test]
+    fn scaled_dims_preserves_aspect() {
+        assert_eq!(scaled_dims(1600, 900, 1280), (1280, 720));
+        assert_eq!(scaled_dims(800, 600, 1280), (800, 600));
+        assert_eq!(scaled_dims(1000, 2000, 1280), (640, 1280));
+    }
+    #[test]
+    fn boxes_from_mask_finds_each_contour() {
+        let mut mask = GrayImage::new(100, 100);
+        for y in 10..20 {
+            for x in 10..25 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        for y in 50..60 {
+            for x in 70..80 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        let boxes = boxes_from_mask(&mask);
+        assert_eq!(boxes.len(), 2);
+        let mut sorted = boxes;
+        sorted.sort_by_key(|b| b[0]);
+        assert_eq!(sorted[0], [10, 10, 25, 20]);
+        assert_eq!(sorted[1], [70, 50, 80, 60]);
+    }
+    #[test]
+    fn boxes_from_mask_ignores_holes() {
+        // Filled rectangle with a hole in the middle.
+        let mut mask = GrayImage::new(50, 50);
+        for y in 5..45 {
+            for x in 5..45 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        for y in 20..30 {
+            for x in 20..30 {
+                mask.put_pixel(x, y, Luma([0]));
+            }
+        }
+        let boxes = boxes_from_mask(&mask);
+        assert_eq!(boxes.len(), 1, "hole must not produce a second box");
+    }
+    #[test]
+    fn crop_box_expands_by_margin_additively() {
+        let img = solid_rgb(200, 200, [255, 255, 255]);
+        let mask = GrayImage::new(200, 200);
+        let (ci, _cm, [l, t, r, b]) = crop_box(&img, &mask, [80, 80, 120, 120], 20);
+        assert_eq!([l, t, r, b], [60, 60, 140, 140]);
+        assert_eq!(ci.dimensions(), (80, 80));
+    }
+    #[test]
+    fn crop_box_shifts_inward_at_edges() {
+        let img = solid_rgb(100, 100, [255, 255, 255]);
+        let mask = GrayImage::new(100, 100);
+        // Box hugging the left edge — desired crop starts at -10, so we shift
+        // the right edge outward to keep the full (box + margin*2) width.
+        let (_ci, _cm, [l, t, r, b]) = crop_box(&img, &mask, [0, 40, 20, 60], 10);
+        assert_eq!(l, 0);
+        assert_eq!(r, 40);
+        assert_eq!(t, 30);
+        assert_eq!(b, 70);
+    }
+    #[test]
+    fn crop_strategy_skips_when_mask_empty() {
+        let img = solid_rgb(900, 900, [50, 60, 70]);
+        let mask = GrayImage::new(900, 900);
+        let cfg = HdStrategyConfig::lama_default();
+        let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
+        assert_eq!(out.get_pixel(0, 0).0, [50, 60, 70]);
+    }
+    #[test]
+    fn resize_strategy_restores_unmasked_pixels() {
+        // Small image → even under Resize, unmasked pixels must be identical.
+        let mut img = solid_rgb(1600, 1200, [10, 20, 30]);
+        // One pixel in the masked area, different value.
+        img.put_pixel(500, 500, Rgb([200, 200, 200]));
+        let mut mask = GrayImage::new(1600, 1200);
+        mask.put_pixel(500, 500, Luma([255]));
+        let cfg = HdStrategyConfig {
+            strategy: HdStrategy::Resize,
+            resize_limit: 640,
+            ..HdStrategyConfig::lama_default()
+        };
+        let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
+        assert_eq!(out.get_pixel(0, 0).0, [10, 20, 30]);
+        assert_eq!(out.get_pixel(1599, 1199).0, [10, 20, 30]);
+    }
+    #[test]
+    fn crop_strategy_paste_bounds() {
+        // Two masked blobs → two crops → full image untouched outside crops.
+        let img = solid_rgb(1200, 1200, [100, 100, 100]);
+        let mut mask = GrayImage::new(1200, 1200);
+        for y in 100..120 {
+            for x in 100..120 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        for y in 900..920 {
+            for x in 900..920 {
+                mask.put_pixel(x, y, Luma([255]));
+            }
+        }
+        let cfg = HdStrategyConfig::lama_default();
+        let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
+        // IdentityForward is a no-op, so output == input everywhere.
+        assert_eq!(out.get_pixel(0, 0).0, [100, 100, 100]);
+        assert_eq!(out.get_pixel(500, 500).0, [100, 100, 100]);
+    }
+}

koharu-ml/src/lama/mod.rs CHANGED Viewed

@@ -1,23 +1,18 @@
 mod fft;
 mod model;
-use crate::types::TextRegion;
 use anyhow::{Result, bail};
 use candle_core::{DType, Device, Tensor};
-use image::{
-    DynamicImage, GenericImageView, GrayImage, Luma, Rgb, RgbImage,
-    imageops::{crop_imm, replace},
-};
-use imageproc::{
-    contours::find_contours, distance_transform::Norm, drawing::draw_polygon_mut, edges::canny,
-    filter::gaussian_blur_f32, morphology::dilate, point::Point,
-};
 use koharu_runtime::RuntimeManager;
 use tracing::instrument;
 use crate::{
     device,
-    inpainting::{binarize_mask, extract_alpha, restore_alpha_channel},
     loading,
 };
@@ -31,20 +26,6 @@ koharu_runtime::declare_hf_model_package!(
     order: 130,
 );
-const BALLOON_CANNY_LOW: f32 = 70.0;
-const BALLOON_CANNY_HIGH: f32 = 140.0;
-const BALLOON_WINDOW_RATIO: f64 = 1.7;
-const BALLOON_WINDOW_ASPECT_RATIO: f64 = 1.0;
-const SIMPLE_BG_THRESHOLD_LOW_VARIANCE: f64 = 10.0;
-const SIMPLE_BG_THRESHOLD_HIGH_VARIANCE: f64 = 7.0;
-const SIMPLE_BG_CHANNEL_STD_SWITCH: f64 = 1.0;
-type Xyxy = [u32; 4];
-struct BalloonMasks {
-    balloon_mask: GrayImage,
-    non_text_mask: GrayImage,
-}
 pub struct Lama {
     model: model::Lama,
     device: Device,
@@ -64,33 +45,21 @@ impl Lama {
         Ok(Self { model, device })
     }
-    #[instrument(level = "debug", skip_all)]
-    fn forward(&self, image: &Tensor, mask: &Tensor) -> Result<Tensor> {
-        self.model.forward(image, mask)
-    }
-    #[instrument(level = "debug", skip_all)]
-    pub fn inference_model(
-        &self,
-        image: &DynamicImage,
-        mask: &DynamicImage,
-    ) -> Result<DynamicImage> {
-        let (image_tensor, mask_tensor) = self.preprocess(image, mask)?;
-        let output = self.forward(&image_tensor, &mask_tensor)?;
-        self.postprocess(&output)
-    }
     #[instrument(level = "debug", skip_all)]
     pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
-        self.inference_with_blocks(image, mask, None)
     }
     #[instrument(level = "debug", skip_all)]
-    pub fn inference_with_blocks(
         &self,
         image: &DynamicImage,
         mask: &DynamicImage,
-        text_blocks: Option<&[TextRegion]>,
     ) -> Result<DynamicImage> {
         if image.dimensions() != mask.dimensions() {
             bail!(
@@ -101,12 +70,9 @@ impl Lama {
         }
         let binary_mask = binarize_mask(mask);
-        let output_rgb = if let Some(blocks) = text_blocks.filter(|blocks| !blocks.is_empty()) {
-            let image_rgb = image.to_rgb8();
-            self.inference_blockwise(&image_rgb, &binary_mask, blocks)?
-        } else {
-            self.inference_crop(&image.to_rgb8(), &binary_mask)?
-        };
         if image.color().has_alpha() {
             let original_alpha = image.to_rgba8();
@@ -119,90 +85,22 @@ impl Lama {
     }
     #[instrument(level = "debug", skip_all)]
-    fn inference_crop(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
-        if let Some(filled) = try_fill_balloon(image, mask) {
-            return Ok(filled);
-        }
-        self.inference_model_rgb(image, mask)
-    }
-    #[instrument(level = "debug", skip_all)]
-    fn inference_blockwise(
-        &self,
-        image: &RgbImage,
-        mask: &GrayImage,
-        text_blocks: &[TextRegion],
-    ) -> Result<RgbImage> {
-        let (im_w, im_h) = image.dimensions();
-        let mut inpainted = image.clone();
-        let mut working_mask = mask.clone();
-        for block in text_blocks {
-            let Some(xyxy) = block_xyxy(block, im_w, im_h) else {
-                continue;
-            };
-            let xyxy_e = enlarge_window(
-                xyxy,
-                im_w,
-                im_h,
-                BALLOON_WINDOW_RATIO,
-                BALLOON_WINDOW_ASPECT_RATIO,
-            );
-            let crop_width = xyxy_e[2].saturating_sub(xyxy_e[0]);
-            let crop_height = xyxy_e[3].saturating_sub(xyxy_e[1]);
-            if crop_width == 0 || crop_height == 0 {
-                continue;
-            }
-            let crop_image =
-                crop_imm(&inpainted, xyxy_e[0], xyxy_e[1], crop_width, crop_height).to_image();
-            let crop_mask =
-                crop_imm(&working_mask, xyxy_e[0], xyxy_e[1], crop_width, crop_height).to_image();
-            let output = if count_nonzero(&crop_mask) == 0 {
-                crop_image
-            } else if let Some(filled) = try_fill_balloon(&crop_image, &crop_mask) {
-                filled
-            } else {
-                self.inference_model_rgb(&crop_image, &crop_mask)?
-            };
-            replace(
-                &mut inpainted,
-                &output,
-                i64::from(xyxy_e[0]),
-                i64::from(xyxy_e[1]),
-            );
-            clear_mask_bbox(&mut working_mask, xyxy);
-        }
-        Ok(inpainted)
     }
     #[instrument(level = "debug", skip_all)]
-    fn inference_model_rgb(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
-        Ok(self
-            .inference_model(
-                &DynamicImage::ImageRgb8(image.clone()),
-                &DynamicImage::ImageLuma8(mask.clone()),
-            )?
-            .to_rgb8())
     }
     #[instrument(level = "debug", skip_all)]
-    fn preprocess(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<(Tensor, Tensor)> {
-        if image.dimensions() != mask.dimensions() {
-            bail!(
-                "image and mask dimensions dismatch: image is {:?}, mask is {:?}",
-                image.dimensions(),
-                mask.dimensions()
-            );
-        }
         let (w, h) = (image.width() as usize, image.height() as usize);
-        let rgb = image.to_rgb8().into_raw();
-        let luma = mask.to_luma8().into_raw();
         let image_tensor = (Tensor::from_vec(rgb, (1, h, w, 3), &self.device)?
             .permute((0, 3, 1, 2))?
@@ -218,7 +116,7 @@ impl Lama {
     }
     #[instrument(level = "debug", skip_all)]
-    fn postprocess(&self, output: &Tensor) -> Result<DynamicImage> {
         let output = output.squeeze(0)?;
         let (channels, height, width) = output.dims3()?;
         if channels != 3 {
@@ -229,439 +127,37 @@ impl Lama {
             .permute((1, 2, 0))?
             .to_dtype(DType::U8)?;
         let raw: Vec<u8> = output.flatten_all()?.to_vec1()?;
-        let image = RgbImage::from_raw(width as u32, height as u32, raw)
-            .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))?;
-        Ok(DynamicImage::ImageRgb8(image))
-    }
-}
-fn block_xyxy(block: &TextRegion, width: u32, height: u32) -> Option<Xyxy> {
-    let x1 = block.x.floor().max(0.0) as u32;
-    let y1 = block.y.floor().max(0.0) as u32;
-    let x2 = (block.x + block.width).ceil().max(block.x.floor()) as u32;
-    let y2 = (block.y + block.height).ceil().max(block.y.floor()) as u32;
-    let x1 = x1.min(width);
-    let y1 = y1.min(height);
-    let x2 = x2.min(width);
-    let y2 = y2.min(height);
-    if x2 <= x1 || y2 <= y1 {
-        return None;
-    }
-    Some([x1, y1, x2, y2])
-}
-fn enlarge_window(rect: Xyxy, im_w: u32, im_h: u32, ratio: f64, aspect_ratio: f64) -> Xyxy {
-    debug_assert!(ratio > 1.0);
-    let [x1, y1, x2, y2] = rect;
-    let w = f64::from(x2.saturating_sub(x1));
-    let h = f64::from(y2.saturating_sub(y1));
-    if w <= 0.0 || h <= 0.0 || aspect_ratio <= 0.0 {
-        return [0, 0, 0, 0];
-    }
-    let a = aspect_ratio;
-    let b = w + h * aspect_ratio;
-    let c = (1.0 - ratio) * w * h;
-    let discriminant = (b * b - 4.0 * a * c).max(0.0);
-    let delta = ((-b + discriminant.sqrt()) / (2.0 * a) / 2.0).round();
-    let mut delta_h = delta.max(0.0) as u32;
-    let mut delta_w = (delta * aspect_ratio).round().max(0.0) as u32;
-    delta_w = delta_w.min(x1).min(im_w.saturating_sub(x2));
-    delta_h = delta_h.min(y1).min(im_h.saturating_sub(y2));
-    [
-        x1.saturating_sub(delta_w),
-        y1.saturating_sub(delta_h),
-        (x2 + delta_w).min(im_w),
-        (y2 + delta_h).min(im_h),
-    ]
-}
-fn try_fill_balloon(image: &RgbImage, mask: &GrayImage) -> Option<RgbImage> {
-    let masks = extract_balloon_mask(image, mask)?;
-    let average_bg_color = median_rgb(image, &masks.non_text_mask)?;
-    let std_rgb = color_stddev(image, &masks.non_text_mask, average_bg_color);
-    let inpaint_thresh = if stddev3(std_rgb) > SIMPLE_BG_CHANNEL_STD_SWITCH {
-        SIMPLE_BG_THRESHOLD_HIGH_VARIANCE
-    } else {
-        SIMPLE_BG_THRESHOLD_LOW_VARIANCE
-    };
-    let std_max = std_rgb.into_iter().fold(0.0, f64::max);
-    if std_max >= inpaint_thresh {
-        return None;
-    }
-    let mut result = image.clone();
-    let fill = [
-        average_bg_color[0] as u8,
-        average_bg_color[1] as u8,
-        average_bg_color[2] as u8,
-    ];
-    for (x, y, pixel) in masks.balloon_mask.enumerate_pixels() {
-        if pixel.0[0] > 0 {
-            result.put_pixel(x, y, Rgb(fill));
-        }
-    }
-    Some(result)
-}
-fn extract_balloon_mask(image: &RgbImage, mask: &GrayImage) -> Option<BalloonMasks> {
-    if image.dimensions() != mask.dimensions() {
-        return None;
-    }
-    let text_bbox = non_zero_bbox(mask)?;
-    let text_sum = count_nonzero(mask);
-    if text_sum == 0 {
-        return None;
-    }
-    let gray = DynamicImage::ImageRgb8(image.clone()).to_luma8();
-    let blurred = gaussian_blur_f32(&gray, 1.0);
-    let mut cannyed = canny(&blurred, BALLOON_CANNY_LOW, BALLOON_CANNY_HIGH);
-    cannyed = dilate(&cannyed, Norm::LInf, 1);
-    draw_binary_border(&mut cannyed);
-    subtract_binary_mask(&mut cannyed, mask);
-    let contours = find_contours::<i32>(&cannyed);
-    let (width, height) = cannyed.dimensions();
-    let mut best_mask = None;
-    let mut best_area = f64::INFINITY;
-    for contour in contours {
-        let Some(polygon) = contour_polygon(&contour.points) else {
-            continue;
-        };
-        let bbox = polygon_bbox(&polygon)?;
-        if bbox[0] > text_bbox[0]
-            || bbox[1] > text_bbox[1]
-            || bbox[2] < text_bbox[2]
-            || bbox[3] < text_bbox[3]
-        {
-            continue;
-        }
-        let mut candidate = GrayImage::new(width, height);
-        draw_polygon_mut(&mut candidate, &polygon, Luma([255u8]));
-        if count_overlap(&candidate, mask) < text_sum {
-            continue;
-        }
-        let area = polygon_area(&polygon);
-        if area < best_area {
-            best_area = area;
-            best_mask = Some(candidate);
-        }
-    }
-    let balloon_mask = best_mask?;
-    let mut non_text_mask = balloon_mask.clone();
-    for (x, y, pixel) in mask.enumerate_pixels() {
-        if pixel.0[0] > 0 {
-            non_text_mask.put_pixel(x, y, Luma([0]));
-        }
     }
-    Some(BalloonMasks {
-        balloon_mask,
-        non_text_mask,
-    })
-}
-fn contour_polygon(points: &[Point<i32>]) -> Option<Vec<Point<i32>>> {
-    let mut polygon = points.to_vec();
-    if polygon.len() < 3 {
-        return None;
-    }
-    if polygon.first() == polygon.last() {
-        polygon.pop();
-    }
-    if polygon.len() < 3 {
-        return None;
-    }
-    Some(polygon)
 }
-fn polygon_bbox(points: &[Point<i32>]) -> Option<Xyxy> {
-    let first = points.first()?;
-    let mut min_x = first.x;
-    let mut min_y = first.y;
-    let mut max_x = first.x;
-    let mut max_y = first.y;
-    for point in points.iter().skip(1) {
-        min_x = min_x.min(point.x);
-        min_y = min_y.min(point.y);
-        max_x = max_x.max(point.x);
-        max_y = max_y.max(point.y);
-    }
-    Some([
-        min_x.max(0) as u32,
-        min_y.max(0) as u32,
-        max_x.max(min_x).saturating_add(1) as u32,
-        max_y.max(min_y).saturating_add(1) as u32,
-    ])
-}
-fn polygon_area(points: &[Point<i32>]) -> f64 {
-    let mut area = 0.0;
-    for index in 0..points.len() {
-        let current = points[index];
-        let next = points[(index + 1) % points.len()];
-        area += f64::from(current.x) * f64::from(next.y) - f64::from(next.x) * f64::from(current.y);
-    }
-    area.abs() * 0.5
 }
-fn draw_binary_border(image: &mut GrayImage) {
-    let width = image.width();
-    let height = image.height();
-    if width == 0 || height == 0 {
-        return;
-    }
-    for x in 0..width {
-        image.put_pixel(x, 0, Luma([255]));
-        image.put_pixel(x, height - 1, Luma([255]));
-    }
-    for y in 0..height {
-        image.put_pixel(0, y, Luma([255]));
-        image.put_pixel(width - 1, y, Luma([255]));
-    }
-}
-fn subtract_binary_mask(image: &mut GrayImage, mask: &GrayImage) {
-    for (x, y, pixel) in image.enumerate_pixels_mut() {
-        if mask.get_pixel(x, y).0[0] > 0 {
-            pixel.0[0] = 0;
         }
-    }
-}
-fn non_zero_bbox(mask: &GrayImage) -> Option<Xyxy> {
-    let (width, height) = mask.dimensions();
-    let mut min_x = width;
-    let mut min_y = height;
-    let mut max_x = 0;
-    let mut max_y = 0;
-    let mut found = false;
-    for (x, y, pixel) in mask.enumerate_pixels() {
-        if pixel.0[0] == 0 {
-            continue;
-        }
-        found = true;
-        min_x = min_x.min(x);
-        min_y = min_y.min(y);
-        max_x = max_x.max(x);
-        max_y = max_y.max(y);
-    }
-    found.then_some([
-        min_x,
-        min_y,
-        max_x.saturating_add(1),
-        max_y.saturating_add(1),
-    ])
-}
-fn clear_mask_bbox(mask: &mut GrayImage, bbox: Xyxy) {
-    for y in bbox[1]..bbox[3] {
-        for x in bbox[0]..bbox[2] {
-            mask.put_pixel(x, y, Luma([0]));
-        }
-    }
-}
-fn count_nonzero(mask: &GrayImage) -> u32 {
-    mask.pixels().filter(|pixel| pixel.0[0] > 0).count() as u32
-}
-fn count_overlap(left: &GrayImage, right: &GrayImage) -> u32 {
-    left.pixels()
-        .zip(right.pixels())
-        .filter(|(l, r)| l.0[0] > 0 && r.0[0] > 0)
-        .count() as u32
-}
-fn median_rgb(image: &RgbImage, mask: &GrayImage) -> Option<[f64; 3]> {
-    let mut channels = [Vec::new(), Vec::new(), Vec::new()];
-    for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
-        if mask_pixel.0[0] == 0 {
-            continue;
-        }
-        channels[0].push(pixel.0[0]);
-        channels[1].push(pixel.0[1]);
-        channels[2].push(pixel.0[2]);
-    }
-    Some([
-        median_channel(&channels[0])?,
-        median_channel(&channels[1])?,
-        median_channel(&channels[2])?,
-    ])
-}
-fn median_channel(values: &[u8]) -> Option<f64> {
-    if values.is_empty() {
-        return None;
-    }
-    let mut values = values.to_vec();
-    values.sort_unstable();
-    let mid = values.len() / 2;
-    if values.len().is_multiple_of(2) {
-        Some((f64::from(values[mid - 1]) + f64::from(values[mid])) / 2.0)
-    } else {
-        Some(f64::from(values[mid]))
-    }
-}
-fn color_stddev(image: &RgbImage, mask: &GrayImage, median: [f64; 3]) -> [f64; 3] {
-    let mut sum_sq = [0.0; 3];
-    let mut count = 0.0;
-    for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
-        if mask_pixel.0[0] == 0 {
-            continue;
-        }
-        count += 1.0;
-        for channel in 0..3 {
-            let diff = f64::from(pixel.0[channel]) - median[channel];
-            sum_sq[channel] += diff * diff;
         }
     }
-    if count == 0.0 {
-        return [f64::INFINITY; 3];
-    }
-    [
-        (sum_sq[0] / count).sqrt(),
-        (sum_sq[1] / count).sqrt(),
-        (sum_sq[2] / count).sqrt(),
-    ]
-}
-fn stddev3(values: [f64; 3]) -> f64 {
-    let mean = values.iter().sum::<f64>() / 3.0;
-    let variance = values
-        .iter()
-        .map(|value| {
-            let diff = value - mean;
-            diff * diff
-        })
-        .sum::<f64>()
-        / 3.0;
-    variance.sqrt()
 }
 #[cfg(test)]
 mod tests {
-    use super::{
-        BALLOON_WINDOW_ASPECT_RATIO, BALLOON_WINDOW_RATIO, clear_mask_bbox, count_nonzero,
-        enlarge_window, extract_balloon_mask, try_fill_balloon,
-    };
     use crate::inpainting::restore_alpha_channel;
-    use crate::types::TextRegion;
     use image::{GrayImage, Luma, Rgb, RgbImage};
-    use imageproc::drawing::draw_hollow_rect_mut;
-    use imageproc::rect::Rect;
     const ALPHA_RING_RADIUS: u8 = 7;
-    #[test]
-    fn enlarge_window_matches_ratio_1_7_reference() {
-        let enlarged = enlarge_window(
-            [10, 20, 50, 60],
-            200,
-            150,
-            BALLOON_WINDOW_RATIO,
-            BALLOON_WINDOW_ASPECT_RATIO,
-        );
-        assert_eq!(enlarged, [4, 14, 56, 66]);
-    }
-    #[test]
-    fn extract_balloon_mask_prefers_smallest_covering_contour() {
-        let mut image = RgbImage::from_pixel(80, 80, Rgb([255, 255, 255]));
-        draw_hollow_rect_mut(&mut image, Rect::at(4, 4).of_size(72, 72), Rgb([0, 0, 0]));
-        draw_hollow_rect_mut(&mut image, Rect::at(20, 20).of_size(28, 20), Rgb([0, 0, 0]));
-        let mut mask = GrayImage::new(80, 80);
-        for y in 24..36 {
-            for x in 24..44 {
-                mask.put_pixel(x, y, Luma([255]));
-            }
-        }
-        let masks = extract_balloon_mask(&image, &mask).expect("balloon should be detected");
-        let balloon_pixels = count_nonzero(&masks.balloon_mask);
-        assert!(
-            balloon_pixels < 900,
-            "expected inner contour fill, got {balloon_pixels}"
-        );
-        assert!(
-            balloon_pixels > 250,
-            "expected meaningful bubble area, got {balloon_pixels}"
-        );
-    }
-    #[test]
-    fn simple_balloon_chooses_fill_but_textured_balloon_does_not() {
-        let mut flat = RgbImage::from_pixel(64, 64, Rgb([240, 240, 240]));
-        draw_hollow_rect_mut(&mut flat, Rect::at(8, 8).of_size(48, 32), Rgb([0, 0, 0]));
-        let mut mask = GrayImage::new(64, 64);
-        for y in 18..30 {
-            for x in 18..46 {
-                mask.put_pixel(x, y, Luma([255]));
-            }
-        }
-        assert!(try_fill_balloon(&flat, &mask).is_some());
-        let mut textured = flat.clone();
-        for y in 9..39 {
-            for x in 9..55 {
-                let noise = ((x + y) % 23) as u8;
-                textured.put_pixel(
-                    x,
-                    y,
-                    Rgb([200 + noise, 210 + (noise / 2), 220 - (noise / 3)]),
-                );
-            }
-        }
-        assert!(try_fill_balloon(&textured, &mask).is_none());
-    }
-    #[test]
-    fn clearing_mask_consumes_only_original_bbox() {
-        let mut mask = GrayImage::from_pixel(32, 32, Luma([255]));
-        clear_mask_bbox(&mut mask, [8, 10, 16, 18]);
-        for y in 10..18 {
-            for x in 8..16 {
-                assert_eq!(mask.get_pixel(x, y).0[0], 0);
-            }
-        }
-        assert_eq!(mask.get_pixel(7, 10).0[0], 255);
-        assert_eq!(mask.get_pixel(16, 17).0[0], 255);
-        assert_eq!(mask.get_pixel(8, 9).0[0], 255);
-        assert_eq!(mask.get_pixel(15, 18).0[0], 255);
-    }
     #[test]
     fn rgba_alpha_restore_uses_surrounding_ring() {
         let image = RgbImage::from_pixel(32, 32, Rgb([20, 30, 40]));
@@ -685,18 +181,4 @@ mod tests {
         assert_eq!(restored.get_pixel(15, 15).0[3], 64);
         assert_eq!(restored.get_pixel(2, 2).0[3], 255);
     }
-    #[test]
-    fn block_xyxy_rounds_and_clamps_document_coords() {
-        let block = TextRegion {
-            x: 10.2,
-            y: 20.7,
-            width: 15.1,
-            height: 9.4,
-            ..Default::default()
-        };
-        let bbox = super::block_xyxy(&block, 100, 100).expect("bbox");
-        assert_eq!(bbox, [10, 20, 26, 31]);
-    }
 }

 mod fft;
 mod model;
 use anyhow::{Result, bail};
 use candle_core::{DType, Device, Tensor};
+use image::{DynamicImage, GenericImageView, GrayImage, RgbImage};
 use koharu_runtime::RuntimeManager;
 use tracing::instrument;
 use crate::{
     device,
+    inpainting::{
+        HdStrategyConfig, InpaintForward, binarize_mask, extract_alpha, restore_alpha_channel,
+        run_inpaint, try_fill_balloon,
+    },
     loading,
 };
     order: 130,
 );
 pub struct Lama {
     model: model::Lama,
     device: Device,
         Ok(Self { model, device })
     }
+    /// Run inpainting with the manga-tuned default strategy (Crop, 800/128/1280).
     #[instrument(level = "debug", skip_all)]
     pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
+        self.inference_with_config(image, mask, &HdStrategyConfig::lama_default())
     }
+    /// Run inpainting with a caller-supplied [`HdStrategyConfig`]. Use this to
+    /// pick a different strategy (Original / Resize) or tune the trigger /
+    /// margin / resize-limit for GPUs with less VRAM.
     #[instrument(level = "debug", skip_all)]
+    pub fn inference_with_config(
         &self,
         image: &DynamicImage,
         mask: &DynamicImage,
+        cfg: &HdStrategyConfig,
     ) -> Result<DynamicImage> {
         if image.dimensions() != mask.dimensions() {
             bail!(
         }
         let binary_mask = binarize_mask(mask);
+        let image_rgb = image.to_rgb8();
+        let forward = LamaForward { lama: self };
+        let output_rgb = run_inpaint(&forward, &image_rgb, &binary_mask, cfg)?;
         if image.color().has_alpha() {
             let original_alpha = image.to_rgba8();
     }
     #[instrument(level = "debug", skip_all)]
+    fn forward(&self, image: &Tensor, mask: &Tensor) -> Result<Tensor> {
+        self.model.forward(image, mask)
     }
     #[instrument(level = "debug", skip_all)]
+    fn inference_model(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
+        let (image_tensor, mask_tensor) = self.preprocess(image, mask)?;
+        let output = self.forward(&image_tensor, &mask_tensor)?;
+        self.postprocess(&output)
     }
     #[instrument(level = "debug", skip_all)]
+    fn preprocess(&self, image: &RgbImage, mask: &GrayImage) -> Result<(Tensor, Tensor)> {
         let (w, h) = (image.width() as usize, image.height() as usize);
+        let rgb = image.clone().into_raw();
+        let luma = mask.clone().into_raw();
         let image_tensor = (Tensor::from_vec(rgb, (1, h, w, 3), &self.device)?
             .permute((0, 3, 1, 2))?
     }
     #[instrument(level = "debug", skip_all)]
+    fn postprocess(&self, output: &Tensor) -> Result<RgbImage> {
         let output = output.squeeze(0)?;
         let (channels, height, width) = output.dims3()?;
         if channels != 3 {
             .permute((1, 2, 0))?
             .to_dtype(DType::U8)?;
         let raw: Vec<u8> = output.flatten_all()?.to_vec1()?;
+        RgbImage::from_raw(width as u32, height as u32, raw)
+            .ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
     }
 }
+/// [`InpaintForward`] impl used by the HD-strategy dispatcher. Applies the
+/// balloon-fill fast path on a per-crop basis before falling back to the
+/// model forward — flat-background speech bubbles skip the model entirely.
+struct LamaForward<'a> {
+    lama: &'a Lama,
 }
+impl InpaintForward for LamaForward<'_> {
+    fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
+        if mask.pixels().all(|p| p.0[0] == 0) {
+            return Ok(image.clone());
         }
+        if let Some(filled) = try_fill_balloon(image, mask) {
+            return Ok(filled);
         }
+        self.lama.inference_model(image, mask)
     }
 }
 #[cfg(test)]
 mod tests {
     use crate::inpainting::restore_alpha_channel;
     use image::{GrayImage, Luma, Rgb, RgbImage};
     const ALPHA_RING_RADIUS: u8 = 7;
     #[test]
     fn rgba_alpha_restore_uses_surrounding_ring() {
         let image = RgbImage::from_pixel(32, 32, Rgb([20, 30, 40]));
         assert_eq!(restored.get_pixel(15, 15).0[3], 64);
         assert_eq!(restored.get_pixel(2, 2).0[3], 255);
     }
 }

koharu-ml/src/lib.rs CHANGED Viewed

@@ -1,10 +1,10 @@
 mod hf_hub;
-mod inpainting;
 pub mod aot_inpainting;
 pub mod comic_text_bubble_detector;
 pub mod comic_text_detector;
 pub mod font_detector;
 pub mod lama;
 pub mod loading;
 pub mod manga_ocr;

 mod hf_hub;
 pub mod aot_inpainting;
 pub mod comic_text_bubble_detector;
 pub mod comic_text_detector;
 pub mod font_detector;
+pub mod inpainting;
 pub mod lama;
 pub mod loading;
 pub mod manga_ocr;

koharu-ml/tests/inpaint.rs CHANGED Viewed

@@ -1,7 +1,6 @@
 use std::path::Path;
 use image::GenericImageView;
-use koharu_ml::TextRegion;
 use koharu_ml::aot_inpainting::AotInpainting;
 use koharu_ml::lama::Lama;
@@ -40,48 +39,6 @@ async fn lama_inpainting_updates_masked_region() -> anyhow::Result<()> {
     Ok(())
 }
-#[tokio::test]
-#[ignore]
-async fn lama_block_aware_inpainting_returns_same_size() -> anyhow::Result<()> {
-    let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures");
-    let runtime = support::cpu_runtime();
-    let lama = Lama::load(&runtime, false).await?;
-    let base = image::open(fixtures.join("image.jpg"))?;
-    let mask = image::open(fixtures.join("mask.png"))?;
-    let mask_luma = mask.to_luma8();
-    let mut min_x = mask_luma.width();
-    let mut min_y = mask_luma.height();
-    let mut max_x = 0;
-    let mut max_y = 0;
-    let mut found = false;
-    for (x, y, pixel) in mask_luma.enumerate_pixels() {
-        if pixel.0[0] == 0 {
-            continue;
-        }
-        found = true;
-        min_x = min_x.min(x);
-        min_y = min_y.min(y);
-        max_x = max_x.max(x);
-        max_y = max_y.max(y);
-    }
-    assert!(found, "mask fixture should contain a non-empty region");
-    let block = TextRegion {
-        x: min_x as f32,
-        y: min_y as f32,
-        width: max_x.saturating_sub(min_x).saturating_add(1) as f32,
-        height: max_y.saturating_sub(min_y).saturating_add(1) as f32,
-        ..Default::default()
-    };
-    let output = lama.inference_with_blocks(&base, &mask, Some(&[block]))?;
-    assert_eq!(output.dimensions(), base.dimensions());
-    Ok(())
-}
 #[tokio::test]
 #[ignore]
 async fn aot_inpainting_updates_masked_region() -> anyhow::Result<()> {

 use std::path::Path;
 use image::GenericImageView;
 use koharu_ml::aot_inpainting::AotInpainting;
 use koharu_ml::lama::Lama;
     Ok(())
 }
 #[tokio::test]
 #[ignore]
 async fn aot_inpainting_updates_masked_region() -> anyhow::Result<()> {

koharu-rpc/src/mcp/mod.rs CHANGED Viewed

@@ -199,7 +199,7 @@ impl KoharuServer {
         let cpu = app.cpu_only();
         tokio::spawn(async move {
             let _ = koharu_app::pipeline::run(
-                session, registry, runtime, cpu, llm, renderer, spec, cancel, None,
             )
             .await;
         });

         let cpu = app.cpu_only();
         tokio::spawn(async move {
             let _ = koharu_app::pipeline::run(
+                session, registry, runtime, cpu, llm, renderer, spec, cancel, None, None,
             )
             .await;
         });

koharu-rpc/src/routes/pipelines.rs CHANGED Viewed

@@ -9,10 +9,12 @@ use std::sync::atomic::AtomicBool;
 use axum::Json;
 use axum::extract::State;
-use koharu_app::pipeline::{self, PipelineRunOptions, PipelineSpec, ProgressTick, Scope};
 use koharu_core::{
-    AppEvent, JobFinishedEvent, JobStatus, JobSummary, PageId, PipelineProgress, PipelineStatus,
-    Region,
 };
 use serde::{Deserialize, Serialize};
 use utoipa_axum::{router::OpenApiRouter, routes};
@@ -123,6 +125,17 @@ async fn start_pipeline(
             overall_percent: tick.overall_percent,
         }));
     });
     tokio::spawn(async move {
         let result = pipeline::run(
             session_c,
@@ -134,12 +147,23 @@ async fn start_pipeline(
             spec,
             cancel,
             Some(progress_sink),
         )
         .await;
         let (status, error) = match &result {
-            Ok(()) => (JobStatus::Completed, None),
             Err(e) if e.to_string().contains("cancelled") => (JobStatus::Cancelled, None),
-            Err(e) => (JobStatus::Failed, Some(format!("{e:#}"))),
         };
         app_c.jobs.insert(
             op_id_c.clone(),

 use axum::Json;
 use axum::extract::State;
+use koharu_app::pipeline::{
+    self, PipelineRunOptions, PipelineSpec, ProgressTick, Scope, WarningTick,
+};
 use koharu_core::{
+    AppEvent, JobFinishedEvent, JobStatus, JobSummary, JobWarningEvent, PageId, PipelineProgress,
+    PipelineStatus, Region,
 };
 use serde::{Deserialize, Serialize};
 use utoipa_axum::{router::OpenApiRouter, routes};
             overall_percent: tick.overall_percent,
         }));
     });
+    let warning_bus = app.bus.clone();
+    let warning_op_id = operation_id.clone();
+    let warning_sink: pipeline::WarningSink = Arc::new(move |tick: WarningTick| {
+        warning_bus.publish(AppEvent::JobWarning(JobWarningEvent {
+            job_id: warning_op_id.clone(),
+            page_index: tick.page_index,
+            total_pages: tick.total_pages,
+            step_id: tick.step_id,
+            message: tick.message,
+        }));
+    });
     tokio::spawn(async move {
         let result = pipeline::run(
             session_c,
             spec,
             cancel,
             Some(progress_sink),
+            Some(warning_sink),
         )
         .await;
         let (status, error) = match &result {
+            Ok(outcome) if outcome.warning_count == 0 => (JobStatus::Completed, None),
+            Ok(outcome) => (
+                JobStatus::CompletedWithErrors,
+                Some(format!(
+                    "{} step(s) failed; see warnings for details",
+                    outcome.warning_count
+                )),
+            ),
             Err(e) if e.to_string().contains("cancelled") => (JobStatus::Cancelled, None),
+            Err(e) => {
+                tracing::warn!(operation_id = %op_id_c, "pipeline run failed: {e:#}");
+                (JobStatus::Failed, Some(format!("{e:#}")))
+            }
         };
         app_c.jobs.insert(
             op_id_c.clone(),

ui/components/ActivityBubble.tsx CHANGED Viewed

@@ -1,12 +1,17 @@
 'use client'
-import { CircleXIcon } from 'lucide-react'
 import { type ReactNode } from 'react'
 import { useTranslation } from 'react-i18next'
 import { Button } from '@/components/ui/button'
 import { cancelOperation } from '@/lib/api/default/default'
-import type { DownloadProgress, JobSummary, PipelineProgress } from '@/lib/api/schemas'
 import { useDownloadsStore } from '@/lib/stores/downloadsStore'
 import { useEditorUiStore } from '@/lib/stores/editorUiStore'
 import { type JobEntry, useJobsStore } from '@/lib/stores/jobsStore'
@@ -112,6 +117,39 @@ function ErrorCard({
   )
 }
 function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t: TranslateFunc }) {
   const progress: PipelineProgress | undefined = job.progress
   const percent = clampProgress(progress?.overallPercent)
@@ -133,6 +171,7 @@ function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t:
       : undefined
   const subtitle =
     [pageText, stepLabel].filter(Boolean).join(' \u00b7 ') || t('operations.inProgress')
   return (
     <BubbleCard>
@@ -148,6 +187,7 @@ function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t:
             </div>
           </div>
           <ProgressBar percent={percent} />
           <div className='mt-3 flex justify-end'>
             <Button
               data-testid='operation-cancel'

 'use client'
+import { AlertTriangleIcon, CircleXIcon } from 'lucide-react'
 import { type ReactNode } from 'react'
 import { useTranslation } from 'react-i18next'
 import { Button } from '@/components/ui/button'
 import { cancelOperation } from '@/lib/api/default/default'
+import type {
+  DownloadProgress,
+  JobSummary,
+  JobWarningEvent,
+  PipelineProgress,
+} from '@/lib/api/schemas'
 import { useDownloadsStore } from '@/lib/stores/downloadsStore'
 import { useEditorUiStore } from '@/lib/stores/editorUiStore'
 import { type JobEntry, useJobsStore } from '@/lib/stores/jobsStore'
   )
 }
+function JobWarnings({ warnings, t }: { warnings: JobWarningEvent[]; t: TranslateFunc }) {
+  const latest = warnings[warnings.length - 1]
+  const count = warnings.length
+  const pageLabel =
+    typeof latest.totalPages === 'number' && latest.totalPages > 1
+      ? t('operations.imageProgress', {
+          current: latest.pageIndex + 1,
+          total: latest.totalPages,
+        })
+      : undefined
+  const header =
+    count === 1 ? t('operations.warningsOne') : t('operations.warningsOther', { count })
+  return (
+    <div
+      data-testid='operation-warnings'
+      className='mt-3 rounded-lg border border-amber-200/70 bg-amber-50/80 p-2.5 dark:border-amber-900/70 dark:bg-amber-950/40'
+    >
+      <div className='flex items-start gap-2 text-amber-900 dark:text-amber-200'>
+        <AlertTriangleIcon className='mt-0.5 size-3.5 shrink-0' />
+        <div className='min-w-0 flex-1'>
+          <div className='text-[11px] font-semibold'>{header}</div>
+          <div className='mt-0.5 truncate text-[11px] text-amber-800/90 dark:text-amber-200/80'>
+            {[latest.stepId, pageLabel].filter(Boolean).join(' \u00b7 ')}
+          </div>
+          <div className='mt-1 line-clamp-2 text-[11px] break-words text-amber-900/80 dark:text-amber-100/80'>
+            {latest.message}
+          </div>
+        </div>
+      </div>
+    </div>
+  )
+}
 function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t: TranslateFunc }) {
   const progress: PipelineProgress | undefined = job.progress
   const percent = clampProgress(progress?.overallPercent)
       : undefined
   const subtitle =
     [pageText, stepLabel].filter(Boolean).join(' \u00b7 ') || t('operations.inProgress')
+  const warnings = job.warnings ?? []
   return (
     <BubbleCard>
             </div>
           </div>
           <ProgressBar percent={percent} />
+          {warnings.length > 0 && <JobWarnings warnings={warnings} t={t} />}
           <div className='mt-3 flex justify-end'>
             <Button
               data-testid='operation-cancel'

ui/lib/api/default/default.msw.ts CHANGED Viewed

@@ -323,6 +323,16 @@ export const getEventsResponseMock = (): AppEvent =>
       },
       ...{ event: faker.helpers.arrayElement(['jobProgress'] as const) },
     },
     {
       ...{
         error: faker.helpers.arrayElement([

       },
       ...{ event: faker.helpers.arrayElement(['jobProgress'] as const) },
     },
+    {
+      ...{
+        jobId: faker.string.alpha({ length: { min: 10, max: 20 } }),
+        message: faker.string.alpha({ length: { min: 10, max: 20 } }),
+        pageIndex: faker.number.int({ min: 0 }),
+        stepId: faker.string.alpha({ length: { min: 10, max: 20 } }),
+        totalPages: faker.number.int({ min: 0 }),
+      },
+      ...{ event: faker.helpers.arrayElement(['jobWarning'] as const) },
+    },
     {
       ...{
         error: faker.helpers.arrayElement([

ui/lib/api/schemas/appEvent.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  */
 import type { DownloadProgress } from './downloadProgress'
 import type { JobFinishedEvent } from './jobFinishedEvent'
 import type { LlmTarget } from './llmTarget'
 import type { PipelineProgress } from './pipelineProgress'
 import type { SnapshotEvent } from './snapshotEvent'
@@ -18,6 +19,9 @@ export type AppEvent =
   | (PipelineProgress & {
       event: 'jobProgress'
     })
   | (JobFinishedEvent & {
       event: 'jobFinished'
     })

  */
 import type { DownloadProgress } from './downloadProgress'
 import type { JobFinishedEvent } from './jobFinishedEvent'
+import type { JobWarningEvent } from './jobWarningEvent'
 import type { LlmTarget } from './llmTarget'
 import type { PipelineProgress } from './pipelineProgress'
 import type { SnapshotEvent } from './snapshotEvent'
   | (PipelineProgress & {
       event: 'jobProgress'
     })
+  | (JobWarningEvent & {
+      event: 'jobWarning'
+    })
   | (JobFinishedEvent & {
       event: 'jobFinished'
     })

ui/lib/api/schemas/index.ts CHANGED Viewed

@@ -35,6 +35,7 @@ export * from './imageRole'
 export * from './jobFinishedEvent'
 export * from './jobStatus'
 export * from './jobSummary'
 export * from './listDownloadsResponse'
 export * from './listOperationsResponse'
 export * from './listProjectsResponse'

 export * from './jobFinishedEvent'
 export * from './jobStatus'
 export * from './jobSummary'
+export * from './jobWarningEvent'
 export * from './listDownloadsResponse'
 export * from './listOperationsResponse'
 export * from './listProjectsResponse'

ui/lib/api/schemas/jobWarningEvent.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+/**
+ * Generated by orval v8.8.0 🍺
+ * Do not edit manually.
+ * OpenAPI spec version: 0.0.1
+ */
+/**
+ * A non-fatal step failure during a pipeline run. The pipeline recovers by
+skipping the rest of the current page's steps and moving on to the next
+page; the UI accumulates these into a list during the job.
+ */
+export interface JobWarningEvent {
+  jobId: string
+  message: string
+  /**
+   * 0-based page index where the failure happened.
+   * @minimum 0
+   */
+  pageIndex: number
+  /** Engine id (e.g. `"lama-manga"`) of the step that failed. */
+  stepId: string
+  /** @minimum 0 */
+  totalPages: number
+}

ui/lib/events.ts CHANGED Viewed

@@ -151,6 +151,10 @@ function dispatch(event: AppEvent): void {
       }
       return
     case 'jobFinished':
       useJobsStore.getState().finished(event.id, event.status, event.error)
       lastPageByJob.delete(event.id)

       }
       return
+    case 'jobWarning':
+      useJobsStore.getState().warning(event)
+      return
     case 'jobFinished':
       useJobsStore.getState().finished(event.id, event.status, event.error)
       lastPageByJob.delete(event.id)

ui/lib/stores/jobsStore.ts CHANGED Viewed

@@ -3,14 +3,17 @@
 import { create } from 'zustand'
 import { immer } from 'zustand/middleware/immer'
-import type { JobSummary, PipelineProgress } from '@/lib/api/schemas'
 /**
  * Live job registry, fed by SSE. Keyed by id. `progress` is attached when
- * the backend streams `JobProgress` for a running pipeline job.
  */
 export type JobEntry = JobSummary & {
   progress?: PipelineProgress
 }
 type JobsState = {
@@ -18,6 +21,7 @@ type JobsState = {
   setSnapshot: (jobs: JobSummary[]) => void
   started: (id: string, kind: string) => void
   progress: (p: PipelineProgress) => void
   finished: (id: string, status: JobSummary['status'], error: string | null | undefined) => void
   clear: () => void
   byStatus: (status: JobSummary['status']) => JobEntry[]
@@ -44,6 +48,16 @@ export const useJobsStore = create<JobsState>()(
         }
         s.jobs[p.jobId] = { ...existing, progress: p }
       }),
     finished: (id, status, error) =>
       set((s) => {
         const existing = s.jobs[id] ?? { id, kind: 'pipeline', status }

 import { create } from 'zustand'
 import { immer } from 'zustand/middleware/immer'
+import type { JobSummary, JobWarningEvent, PipelineProgress } from '@/lib/api/schemas'
 /**
  * Live job registry, fed by SSE. Keyed by id. `progress` is attached when
+ * the backend streams `JobProgress` for a running pipeline job. `warnings`
+ * accumulates non-fatal step failures as they arrive; the pipeline keeps
+ * running past them.
  */
 export type JobEntry = JobSummary & {
   progress?: PipelineProgress
+  warnings?: JobWarningEvent[]
 }
 type JobsState = {
   setSnapshot: (jobs: JobSummary[]) => void
   started: (id: string, kind: string) => void
   progress: (p: PipelineProgress) => void
+  warning: (w: JobWarningEvent) => void
   finished: (id: string, status: JobSummary['status'], error: string | null | undefined) => void
   clear: () => void
   byStatus: (status: JobSummary['status']) => JobEntry[]
         }
         s.jobs[p.jobId] = { ...existing, progress: p }
       }),
+    warning: (w) =>
+      set((s) => {
+        const existing = s.jobs[w.jobId] ?? {
+          id: w.jobId,
+          kind: 'pipeline',
+          status: 'running' as JobSummary['status'],
+        }
+        const warnings = existing.warnings ?? []
+        s.jobs[w.jobId] = { ...existing, warnings: [...warnings, w] }
+      }),
     finished: (id, status, error) =>
       set((s) => {
         const existing = s.jobs[id] ?? { id, kind: 'pipeline', status }

ui/openapi.json CHANGED Viewed

@@ -894,6 +894,25 @@
               }
             ]
           },
           {
             "allOf": [
               {
@@ -1574,6 +1593,32 @@
           }
         }
       },
       "ListDownloadsResponse": {
         "type": "object",
         "required": ["downloads"],

               }
             ]
           },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/JobWarningEvent",
+                "description": "A single step on one page failed but the pipeline kept running.\nEmitted per failed step so clients can show a non-fatal warning while\nthe job continues with the next page."
+              },
+              {
+                "type": "object",
+                "required": ["event"],
+                "properties": {
+                  "event": {
+                    "type": "string",
+                    "enum": ["jobWarning"]
+                  }
+                }
+              }
+            ],
+            "description": "A single step on one page failed but the pipeline kept running.\nEmitted per failed step so clients can show a non-fatal warning while\nthe job continues with the next page."
+          },
           {
             "allOf": [
               {
           }
         }
       },
+      "JobWarningEvent": {
+        "type": "object",
+        "description": "A non-fatal step failure during a pipeline run. The pipeline recovers by\nskipping the rest of the current page's steps and moving on to the next\npage; the UI accumulates these into a list during the job.",
+        "required": ["jobId", "pageIndex", "totalPages", "stepId", "message"],
+        "properties": {
+          "jobId": {
+            "type": "string"
+          },
+          "message": {
+            "type": "string"
+          },
+          "pageIndex": {
+            "type": "integer",
+            "description": "0-based page index where the failure happened.",
+            "minimum": 0
+          },
+          "stepId": {
+            "type": "string",
+            "description": "Engine id (e.g. `\"lama-manga\"`) of the step that failed."
+          },
+          "totalPages": {
+            "type": "integer",
+            "minimum": 0
+          }
+        }
+      },
       "ListDownloadsResponse": {
         "type": "object",
         "required": ["downloads"],

ui/public/locales/en-US/translation.json CHANGED Viewed

@@ -75,6 +75,8 @@
     "processAll": "Processing all images",
     "imageProgress": "Image {{current}} / {{total}}",
     "stepProgress": "Step {{current}} / {{total}}: {{step}}",
     "cancel": "Stop",
     "cancelling": "Stopping..."
   },

     "processAll": "Processing all images",
     "imageProgress": "Image {{current}} / {{total}}",
     "stepProgress": "Step {{current}} / {{total}}: {{step}}",
+    "warningsOne": "1 step failed, continuing",
+    "warningsOther": "{{count}} steps failed, continuing",
     "cancel": "Stop",
     "cancelling": "Stopping..."
   },

ui/public/locales/es-ES/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "Procesando todas las imágenes",
     "imageProgress": "Imagen {{current}} / {{total}}",
     "stepProgress": "Paso {{current}} / {{total}}: {{step}}",
     "cancel": "Detener",
     "cancelling": "Deteniendo..."
   },

     "processAll": "Procesando todas las imágenes",
     "imageProgress": "Imagen {{current}} / {{total}}",
     "stepProgress": "Paso {{current}} / {{total}}: {{step}}",
+    "warningsOne": "1 paso falló, continuando",
+    "warningsOther": "{{count}} pasos fallaron, continuando",
     "cancel": "Detener",
     "cancelling": "Deteniendo..."
   },

ui/public/locales/ja-JP/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "すべての画像を一括処理中",
     "imageProgress": "画像 {{current}} / {{total}}",
     "stepProgress": "ステップ {{current}} / {{total}}：{{step}}",
     "cancel": "停止",
     "cancelling": "停止中..."
   },

     "processAll": "すべての画像を一括処理中",
     "imageProgress": "画像 {{current}} / {{total}}",
     "stepProgress": "ステップ {{current}} / {{total}}：{{step}}",
+    "warningsOne": "1 つのステップが失敗しましたが、続行します",
+    "warningsOther": "{{count}} 個のステップが失敗しましたが、続行します",
     "cancel": "停止",
     "cancelling": "停止中..."
   },

ui/public/locales/ko-KR/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "모든 이미지 처리 중",
     "imageProgress": "이미지 {{current}} / {{total}}",
     "stepProgress": "단계 {{current}} / {{total}}: {{step}}",
     "cancel": "취소",
     "cancelling": "취소 중..."
   },

     "processAll": "모든 이미지 처리 중",
     "imageProgress": "이미지 {{current}} / {{total}}",
     "stepProgress": "단계 {{current}} / {{total}}: {{step}}",
+    "warningsOne": "1개 단계 실패, 계속 진행",
+    "warningsOther": "{{count}}개 단계 실패, 계속 진행",
     "cancel": "취소",
     "cancelling": "취소 중..."
   },

ui/public/locales/pt-BR/translation.json CHANGED Viewed

@@ -62,6 +62,8 @@
     "processAll": "Processando todas as imagens",
     "imageProgress": "Imagem {{current}} / {{total}}",
     "stepProgress": "Etapa {{current}} / {{total}}: {{step}}",
     "cancel": "Parar",
     "cancelling": "Parando..."
   },

     "processAll": "Processando todas as imagens",
     "imageProgress": "Imagem {{current}} / {{total}}",
     "stepProgress": "Etapa {{current}} / {{total}}: {{step}}",
+    "warningsOne": "1 etapa falhou, continuando",
+    "warningsOther": "{{count}} etapas falharam, continuando",
     "cancel": "Parar",
     "cancelling": "Parando..."
   },

ui/public/locales/ru-RU/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "Обработка всех изображений",
     "imageProgress": "Изображение {{current}} / {{total}}",
     "stepProgress": "Шаг {{current}} / {{total}}: {{step}}",
     "cancel": "Остановить",
     "cancelling": "Остановка..."
   },

     "processAll": "Обработка всех изображений",
     "imageProgress": "Изображение {{current}} / {{total}}",
     "stepProgress": "Шаг {{current}} / {{total}}: {{step}}",
+    "warningsOne": "1 шаг не выполнен, продолжаем",
+    "warningsOther": "{{count}} шагов не выполнены, продолжаем",
     "cancel": "Остановить",
     "cancelling": "Остановка..."
   },

ui/public/locales/tr-TR/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "Tüm görseller işleniyor",
     "imageProgress": "Görsel {{current}} / {{total}}",
     "stepProgress": "Adım {{current}} / {{total}}: {{step}}",
     "cancel": "Durdur",
     "cancelling": "Durduruluyor..."
   },

     "processAll": "Tüm görseller işleniyor",
     "imageProgress": "Görsel {{current}} / {{total}}",
     "stepProgress": "Adım {{current}} / {{total}}: {{step}}",
+    "warningsOne": "1 adım başarısız oldu, devam ediliyor",
+    "warningsOther": "{{count}} adım başarısız oldu, devam ediliyor",
     "cancel": "Durdur",
     "cancelling": "Durduruluyor..."
   },

ui/public/locales/zh-CN/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "正在批量处理所有图片",
     "imageProgress": "图片 {{current}} / {{total}}",
     "stepProgress": "步骤 {{current}} / {{total}}：{{step}}",
     "cancel": "停止",
     "cancelling": "正在停止..."
   },

     "processAll": "正在批量处理所有图片",
     "imageProgress": "图片 {{current}} / {{total}}",
     "stepProgress": "步骤 {{current}} / {{total}}：{{step}}",
+    "warningsOne": "1 个步骤失败，继续处理",
+    "warningsOther": "{{count}} 个步骤失败，继续处理",
     "cancel": "停止",
     "cancelling": "正在停止..."
   },

ui/public/locales/zh-TW/translation.json CHANGED Viewed

@@ -61,6 +61,8 @@
     "processAll": "正在批次處理所有圖片",
     "imageProgress": "圖片 {{current}} / {{total}}",
     "stepProgress": "步驟 {{current}} / {{total}}：{{step}}",
     "cancel": "停止",
     "cancelling": "停止中..."
   },

     "processAll": "正在批次處理所有圖片",
     "imageProgress": "圖片 {{current}} / {{total}}",
     "stepProgress": "步驟 {{current}} / {{total}}：{{step}}",
+    "warningsOne": "1 個步驟失敗，繼續處理",
+    "warningsOther": "{{count}} 個步驟失敗，繼續處理",
     "cancel": "停止",
     "cancelling": "停止中..."
   },