Mayo commited on
fix: inpainting OOM & pipeline error handling
Browse files- koharu-app/bin/pipeline.rs +17 -2
- koharu-app/src/pipeline/engines/aot.rs +33 -24
- koharu-app/src/pipeline/engines/lama.rs +37 -18
- koharu-app/src/pipeline/engines/support.rs +1 -18
- koharu-app/src/pipeline/mod.rs +119 -9
- koharu-core/src/events.rs +32 -4
- koharu-core/src/lib.rs +1 -1
- koharu-ml/bin/aot-inpainting.rs +5 -1
- koharu-ml/src/aot_inpainting/mod.rs +56 -161
- koharu-ml/src/inpainting/balloon.rs +369 -0
- koharu-ml/src/{inpainting.rs → inpainting/mod.rs} +13 -0
- koharu-ml/src/inpainting/strategy.rs +539 -0
- koharu-ml/src/lama/mod.rs +39 -557
- koharu-ml/src/lib.rs +1 -1
- koharu-ml/tests/inpaint.rs +0 -43
- koharu-rpc/src/mcp/mod.rs +1 -1
- koharu-rpc/src/routes/pipelines.rs +29 -5
- ui/components/ActivityBubble.tsx +42 -2
- ui/lib/api/default/default.msw.ts +10 -0
- ui/lib/api/schemas/appEvent.ts +4 -0
- ui/lib/api/schemas/index.ts +1 -0
- ui/lib/api/schemas/jobWarningEvent.ts +24 -0
- ui/lib/events.ts +4 -0
- ui/lib/stores/jobsStore.ts +16 -2
- ui/openapi.json +45 -0
- ui/public/locales/en-US/translation.json +2 -0
- ui/public/locales/es-ES/translation.json +2 -0
- ui/public/locales/ja-JP/translation.json +2 -0
- ui/public/locales/ko-KR/translation.json +2 -0
- ui/public/locales/pt-BR/translation.json +2 -0
- ui/public/locales/ru-RU/translation.json +2 -0
- ui/public/locales/tr-TR/translation.json +2 -0
- ui/public/locales/zh-CN/translation.json +2 -0
- ui/public/locales/zh-TW/translation.json +2 -0
koharu-app/bin/pipeline.rs
CHANGED
|
@@ -235,6 +235,16 @@ async fn run() -> Result<()> {
|
|
| 235 |
let ensure_translation_fallback = !cli.with_translate;
|
| 236 |
|
| 237 |
let cancel = Arc::new(AtomicBool::new(false));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
let result = koharu_app::pipeline::run(
|
| 239 |
session.clone(),
|
| 240 |
app.registry.clone(),
|
|
@@ -245,11 +255,16 @@ async fn run() -> Result<()> {
|
|
| 245 |
spec,
|
| 246 |
cancel,
|
| 247 |
Some(progress_sink),
|
|
|
|
| 248 |
)
|
| 249 |
.await;
|
| 250 |
|
| 251 |
match &result {
|
| 252 |
-
Ok(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
Err(e) => eprintln!("=> pipeline failed: {e:#}"),
|
| 254 |
}
|
| 255 |
|
|
@@ -261,7 +276,7 @@ async fn run() -> Result<()> {
|
|
| 261 |
.with_context(|| format!("dump artifacts to {}", cli.output_dir.display()))?;
|
| 262 |
|
| 263 |
app.close_project().await.ok();
|
| 264 |
-
result
|
| 265 |
}
|
| 266 |
|
| 267 |
/// Load `AppConfig` from TOML at `path` or default.
|
|
|
|
| 235 |
let ensure_translation_fallback = !cli.with_translate;
|
| 236 |
|
| 237 |
let cancel = Arc::new(AtomicBool::new(false));
|
| 238 |
+
let warning_sink: koharu_app::pipeline::WarningSink =
|
| 239 |
+
Arc::new(|tick: koharu_app::pipeline::WarningTick| {
|
| 240 |
+
eprintln!(
|
| 241 |
+
"warn: step '{}' failed on page {}/{}: {}",
|
| 242 |
+
tick.step_id,
|
| 243 |
+
tick.page_index + 1,
|
| 244 |
+
tick.total_pages,
|
| 245 |
+
tick.message
|
| 246 |
+
);
|
| 247 |
+
});
|
| 248 |
let result = koharu_app::pipeline::run(
|
| 249 |
session.clone(),
|
| 250 |
app.registry.clone(),
|
|
|
|
| 255 |
spec,
|
| 256 |
cancel,
|
| 257 |
Some(progress_sink),
|
| 258 |
+
Some(warning_sink),
|
| 259 |
)
|
| 260 |
.await;
|
| 261 |
|
| 262 |
match &result {
|
| 263 |
+
Ok(outcome) if outcome.warning_count == 0 => eprintln!("=> pipeline succeeded"),
|
| 264 |
+
Ok(outcome) => eprintln!(
|
| 265 |
+
"=> pipeline finished with {} failed step(s)",
|
| 266 |
+
outcome.warning_count
|
| 267 |
+
),
|
| 268 |
Err(e) => eprintln!("=> pipeline failed: {e:#}"),
|
| 269 |
}
|
| 270 |
|
|
|
|
| 276 |
.with_context(|| format!("dump artifacts to {}", cli.output_dir.display()))?;
|
| 277 |
|
| 278 |
app.close_project().await.ok();
|
| 279 |
+
result.map(|_| ())
|
| 280 |
}
|
| 281 |
|
| 282 |
/// Load `AppConfig` from TOML at `path` or default.
|
koharu-app/src/pipeline/engines/aot.rs
CHANGED
|
@@ -1,14 +1,15 @@
|
|
| 1 |
-
//! AOT inpainting.
|
|
|
|
|
|
|
| 2 |
//!
|
| 3 |
-
//!
|
| 4 |
-
//!
|
| 5 |
-
//!
|
| 6 |
-
//! and mask to the region, inpaint the crop, and paste back.
|
| 7 |
|
| 8 |
use anyhow::{Result, anyhow};
|
| 9 |
use async_trait::async_trait;
|
| 10 |
-
use image::{DynamicImage,
|
| 11 |
-
use koharu_core::{ImageRole, MaskRole, Op};
|
| 12 |
use koharu_ml::aot_inpainting::AotInpainting;
|
| 13 |
|
| 14 |
use crate::pipeline::artifacts::Artifact;
|
|
@@ -26,31 +27,22 @@ impl Engine for Model {
|
|
| 26 |
.ok_or_else(|| anyhow!("no Segment mask on page"))?;
|
| 27 |
let mask = ctx.blobs.load_image(&mask_ref)?;
|
| 28 |
|
| 29 |
-
let
|
| 30 |
-
None => {
|
| 31 |
-
let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
|
| 32 |
-
self.0.inference(&image, &mask)?
|
| 33 |
-
}
|
| 34 |
Some(r) => {
|
| 35 |
let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
|
| 36 |
Some((_, blob)) => ctx.blobs.load_image(&blob)?,
|
| 37 |
None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
|
| 38 |
};
|
| 39 |
-
let
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
let
|
| 44 |
-
|
| 45 |
-
let mask_crop =
|
| 46 |
-
DynamicImage::ImageLuma8(mask.to_luma8().view(x0, y0, rw, rh).to_image());
|
| 47 |
-
let patched = self.0.inference(&image_crop, &mask_crop)?;
|
| 48 |
-
let mut out = base;
|
| 49 |
-
out.copy_from(&patched, x0, y0)?;
|
| 50 |
-
out
|
| 51 |
}
|
| 52 |
};
|
| 53 |
|
|
|
|
| 54 |
let (w, h) = image_dimensions(&result);
|
| 55 |
let blob = ctx.blobs.put_webp(&result)?;
|
| 56 |
Ok(vec![upsert_image_blob(
|
|
@@ -64,6 +56,23 @@ impl Engine for Model {
|
|
| 64 |
}
|
| 65 |
}
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
inventory::submit! {
|
| 68 |
EngineInfo {
|
| 69 |
id: "aot-inpainting",
|
|
|
|
| 1 |
+
//! AOT inpainting. Direct source + segment → result. Subdivision is handled
|
| 2 |
+
//! by [`koharu_ml::inpainting::run_inpaint`] (shared with Lama) — this engine
|
| 3 |
+
//! only wires up the scene I/O.
|
| 4 |
//!
|
| 5 |
+
//! For repair-brush (`ctx.options.region`), composite onto the existing
|
| 6 |
+
//! `Image { Inpainted }` if present (fallback Source) and zero out mask
|
| 7 |
+
//! pixels outside the region so only that area is reprocessed.
|
|
|
|
| 8 |
|
| 9 |
use anyhow::{Result, anyhow};
|
| 10 |
use async_trait::async_trait;
|
| 11 |
+
use image::{DynamicImage, GrayImage, Luma};
|
| 12 |
+
use koharu_core::{ImageRole, MaskRole, Op, Region};
|
| 13 |
use koharu_ml::aot_inpainting::AotInpainting;
|
| 14 |
|
| 15 |
use crate::pipeline::artifacts::Artifact;
|
|
|
|
| 27 |
.ok_or_else(|| anyhow!("no Segment mask on page"))?;
|
| 28 |
let mask = ctx.blobs.load_image(&mask_ref)?;
|
| 29 |
|
| 30 |
+
let (image, mask) = match ctx.options.region {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
Some(r) => {
|
| 32 |
let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
|
| 33 |
Some((_, blob)) => ctx.blobs.load_image(&blob)?,
|
| 34 |
None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
|
| 35 |
};
|
| 36 |
+
let clipped = clip_mask_to_region(&mask, &r);
|
| 37 |
+
(base, clipped)
|
| 38 |
+
}
|
| 39 |
+
None => {
|
| 40 |
+
let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
|
| 41 |
+
(image, mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
| 43 |
};
|
| 44 |
|
| 45 |
+
let result = self.0.inference(&image, &mask)?;
|
| 46 |
let (w, h) = image_dimensions(&result);
|
| 47 |
let blob = ctx.blobs.put_webp(&result)?;
|
| 48 |
Ok(vec![upsert_image_blob(
|
|
|
|
| 56 |
}
|
| 57 |
}
|
| 58 |
|
| 59 |
+
fn clip_mask_to_region(mask: &DynamicImage, region: &Region) -> DynamicImage {
|
| 60 |
+
let src = mask.to_luma8();
|
| 61 |
+
let (w, h) = src.dimensions();
|
| 62 |
+
let x0 = region.x.min(w);
|
| 63 |
+
let y0 = region.y.min(h);
|
| 64 |
+
let x1 = region.x.saturating_add(region.width).min(w);
|
| 65 |
+
let y1 = region.y.saturating_add(region.height).min(h);
|
| 66 |
+
|
| 67 |
+
let mut clipped = GrayImage::new(w, h);
|
| 68 |
+
for y in y0..y1 {
|
| 69 |
+
for x in x0..x1 {
|
| 70 |
+
clipped.put_pixel(x, y, Luma([src.get_pixel(x, y).0[0]]));
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
DynamicImage::ImageLuma8(clipped)
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
inventory::submit! {
|
| 77 |
EngineInfo {
|
| 78 |
id: "aot-inpainting",
|
koharu-app/src/pipeline/engines/lama.rs
CHANGED
|
@@ -1,23 +1,26 @@
|
|
| 1 |
//! Lama Manga inpainter. Reads source + segmentation mask from the page,
|
| 2 |
//! runs the model, writes the output as `Image { role: Inpainted }`.
|
| 3 |
//!
|
| 4 |
-
//!
|
| 5 |
-
//!
|
| 6 |
-
//!
|
| 7 |
-
//!
|
| 8 |
-
//!
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
use anyhow::{Result, anyhow};
|
| 11 |
use async_trait::async_trait;
|
| 12 |
-
use
|
|
|
|
| 13 |
use koharu_ml::lama::Lama;
|
| 14 |
-
use koharu_ml::types::TextRegion;
|
| 15 |
|
| 16 |
use crate::pipeline::artifacts::Artifact;
|
| 17 |
use crate::pipeline::engine::{Engine, EngineCtx, EngineInfo};
|
| 18 |
use crate::pipeline::engines::support::{
|
| 19 |
-
find_image_node, find_mask_node, image_dimensions, load_source_image,
|
| 20 |
-
text_node_to_region, text_nodes, upsert_image_blob,
|
| 21 |
};
|
| 22 |
|
| 23 |
pub struct Model(Lama);
|
|
@@ -29,26 +32,22 @@ impl Engine for Model {
|
|
| 29 |
.ok_or_else(|| anyhow!("no Segment mask on page"))?;
|
| 30 |
let mask = ctx.blobs.load_image(&mask_ref)?;
|
| 31 |
|
| 32 |
-
let (image,
|
| 33 |
Some(r) => {
|
| 34 |
let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
|
| 35 |
Some((_, blob)) => ctx.blobs.load_image(&blob)?,
|
| 36 |
None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
|
| 37 |
};
|
| 38 |
-
(
|
|
|
|
| 39 |
}
|
| 40 |
None => {
|
| 41 |
let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
|
| 42 |
-
|
| 43 |
-
.iter()
|
| 44 |
-
.map(|(_, transform, t)| text_node_to_region(transform, t))
|
| 45 |
-
.collect();
|
| 46 |
-
(image, regions)
|
| 47 |
}
|
| 48 |
};
|
| 49 |
|
| 50 |
-
let
|
| 51 |
-
let result = self.0.inference_with_blocks(&image, &mask, regions_ref)?;
|
| 52 |
let (w, h) = image_dimensions(&result);
|
| 53 |
let blob = ctx.blobs.put_webp(&result)?;
|
| 54 |
Ok(vec![upsert_image_blob(
|
|
@@ -62,6 +61,26 @@ impl Engine for Model {
|
|
| 62 |
}
|
| 63 |
}
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
inventory::submit! {
|
| 66 |
EngineInfo {
|
| 67 |
id: "lama-manga",
|
|
|
|
| 1 |
//! Lama Manga inpainter. Reads source + segmentation mask from the page,
|
| 2 |
//! runs the model, writes the output as `Image { role: Inpainted }`.
|
| 3 |
//!
|
| 4 |
+
//! Box subdivision (the "which regions to run the model on" question) is
|
| 5 |
+
//! driven by the **mask itself** via `boxes_from_mask` — mirrors IOPaint's
|
| 6 |
+
//! `InpaintModel.__call__`. Text detections are no longer consulted; the
|
| 7 |
+
//! segmentation mask already encodes which pixels to remove.
|
| 8 |
+
//!
|
| 9 |
+
//! When `ctx.options.region` is set (repair-brush re-inpaint), we composite
|
| 10 |
+
//! onto the existing `Image { Inpainted }` if present (falling back to
|
| 11 |
+
//! `Source`) and zero out mask pixels outside the region before dispatch —
|
| 12 |
+
//! so only that region is reprocessed.
|
| 13 |
|
| 14 |
use anyhow::{Result, anyhow};
|
| 15 |
use async_trait::async_trait;
|
| 16 |
+
use image::{DynamicImage, GrayImage, Luma};
|
| 17 |
+
use koharu_core::{ImageRole, MaskRole, Op, Region};
|
| 18 |
use koharu_ml::lama::Lama;
|
|
|
|
| 19 |
|
| 20 |
use crate::pipeline::artifacts::Artifact;
|
| 21 |
use crate::pipeline::engine::{Engine, EngineCtx, EngineInfo};
|
| 22 |
use crate::pipeline::engines::support::{
|
| 23 |
+
find_image_node, find_mask_node, image_dimensions, load_source_image, upsert_image_blob,
|
|
|
|
| 24 |
};
|
| 25 |
|
| 26 |
pub struct Model(Lama);
|
|
|
|
| 32 |
.ok_or_else(|| anyhow!("no Segment mask on page"))?;
|
| 33 |
let mask = ctx.blobs.load_image(&mask_ref)?;
|
| 34 |
|
| 35 |
+
let (image, mask) = match ctx.options.region {
|
| 36 |
Some(r) => {
|
| 37 |
let base = match find_image_node(ctx.scene, ctx.page, ImageRole::Inpainted) {
|
| 38 |
Some((_, blob)) => ctx.blobs.load_image(&blob)?,
|
| 39 |
None => load_source_image(ctx.scene, ctx.page, ctx.blobs)?,
|
| 40 |
};
|
| 41 |
+
let clipped = clip_mask_to_region(&mask, &r);
|
| 42 |
+
(base, clipped)
|
| 43 |
}
|
| 44 |
None => {
|
| 45 |
let image = load_source_image(ctx.scene, ctx.page, ctx.blobs)?;
|
| 46 |
+
(image, mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
}
|
| 48 |
};
|
| 49 |
|
| 50 |
+
let result = self.0.inference(&image, &mask)?;
|
|
|
|
| 51 |
let (w, h) = image_dimensions(&result);
|
| 52 |
let blob = ctx.blobs.put_webp(&result)?;
|
| 53 |
Ok(vec![upsert_image_blob(
|
|
|
|
| 61 |
}
|
| 62 |
}
|
| 63 |
|
| 64 |
+
/// Zero out every pixel of `mask` that falls outside `region`. The Crop
|
| 65 |
+
/// strategy's `boxes_from_mask` then only finds contours inside the region,
|
| 66 |
+
/// so the inpainter only touches that area.
|
| 67 |
+
fn clip_mask_to_region(mask: &DynamicImage, region: &Region) -> DynamicImage {
|
| 68 |
+
let src = mask.to_luma8();
|
| 69 |
+
let (w, h) = src.dimensions();
|
| 70 |
+
let x0 = region.x.min(w);
|
| 71 |
+
let y0 = region.y.min(h);
|
| 72 |
+
let x1 = region.x.saturating_add(region.width).min(w);
|
| 73 |
+
let y1 = region.y.saturating_add(region.height).min(h);
|
| 74 |
+
|
| 75 |
+
let mut clipped = GrayImage::new(w, h);
|
| 76 |
+
for y in y0..y1 {
|
| 77 |
+
for x in x0..x1 {
|
| 78 |
+
clipped.put_pixel(x, y, Luma([src.get_pixel(x, y).0[0]]));
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
DynamicImage::ImageLuma8(clipped)
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
inventory::submit! {
|
| 85 |
EngineInfo {
|
| 86 |
id: "lama-manga",
|
koharu-app/src/pipeline/engines/support.rs
CHANGED
|
@@ -7,7 +7,7 @@ use anyhow::{Context, Result};
|
|
| 7 |
use image::{DynamicImage, GenericImageView};
|
| 8 |
use koharu_core::{
|
| 9 |
BlobRef, ImageData, ImageRole, MaskData, MaskRole, Node, NodeDataPatch, NodeId, NodeKind, Op,
|
| 10 |
-
PageId,
|
| 11 |
};
|
| 12 |
|
| 13 |
use crate::blobs::BlobStore;
|
|
@@ -89,23 +89,6 @@ pub fn text_node_to_region(transform: &Transform, text: &TextData) -> koharu_ml:
|
|
| 89 |
}
|
| 90 |
}
|
| 91 |
|
| 92 |
-
/// Wrap a raw pixel `Region` as a `TextRegion` with no text hints. Used when
|
| 93 |
-
/// an inpainter engine receives a region override (repair-brush path).
|
| 94 |
-
pub fn region_to_text_region(r: &Region) -> koharu_ml::types::TextRegion {
|
| 95 |
-
koharu_ml::types::TextRegion {
|
| 96 |
-
x: r.x as f32,
|
| 97 |
-
y: r.y as f32,
|
| 98 |
-
width: r.width as f32,
|
| 99 |
-
height: r.height as f32,
|
| 100 |
-
confidence: 1.0,
|
| 101 |
-
line_polygons: None,
|
| 102 |
-
source_direction: None,
|
| 103 |
-
rotation_deg: None,
|
| 104 |
-
detected_font_size_px: None,
|
| 105 |
-
detector: None,
|
| 106 |
-
}
|
| 107 |
-
}
|
| 108 |
-
|
| 109 |
/// Inverse of `ml_text_direction_to_core`.
|
| 110 |
pub fn core_text_direction_to_ml(d: koharu_core::TextDirection) -> koharu_ml::types::TextDirection {
|
| 111 |
match d {
|
|
|
|
| 7 |
use image::{DynamicImage, GenericImageView};
|
| 8 |
use koharu_core::{
|
| 9 |
BlobRef, ImageData, ImageRole, MaskData, MaskRole, Node, NodeDataPatch, NodeId, NodeKind, Op,
|
| 10 |
+
PageId, Scene, TextData, Transform,
|
| 11 |
};
|
| 12 |
|
| 13 |
use crate::blobs::BlobStore;
|
|
|
|
| 89 |
}
|
| 90 |
}
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
/// Inverse of `ml_text_direction_to_core`.
|
| 93 |
pub fn core_text_direction_to_ml(d: koharu_core::TextDirection) -> koharu_ml::types::TextDirection {
|
| 94 |
match d {
|
koharu-app/src/pipeline/mod.rs
CHANGED
|
@@ -27,6 +27,11 @@ use tracing::Instrument;
|
|
| 27 |
/// about to run (or just finished); step_index / page_index are 0-based.
|
| 28 |
pub type ProgressSink = Arc<dyn Fn(ProgressTick) + Send + Sync>;
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
#[derive(Debug, Clone)]
|
| 31 |
pub struct ProgressTick {
|
| 32 |
/// Coarse UI-facing step tag derived from the engine's primary
|
|
@@ -42,6 +47,20 @@ pub struct ProgressTick {
|
|
| 42 |
pub overall_percent: u8,
|
| 43 |
}
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
/// Map an engine's produced artifact to its UI step category. Stays
|
| 46 |
/// co-located with the engine metadata so adding a new engine can't
|
| 47 |
/// silently bypass the toolbar spinner — only the registered artifact
|
|
@@ -89,6 +108,12 @@ pub enum Scope {
|
|
| 89 |
|
| 90 |
/// Execute `spec` against `session`. Each engine step becomes one `Op::Batch`
|
| 91 |
/// applied via the session's history (one undo step per step per page).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
#[allow(clippy::too_many_arguments)]
|
| 93 |
#[tracing::instrument(level = "info", skip_all)]
|
| 94 |
pub async fn run(
|
|
@@ -101,7 +126,8 @@ pub async fn run(
|
|
| 101 |
spec: PipelineSpec,
|
| 102 |
cancel: Arc<AtomicBool>,
|
| 103 |
progress: Option<ProgressSink>,
|
| 104 |
-
|
|
|
|
| 105 |
let infos: Vec<&EngineInfo> = spec
|
| 106 |
.steps
|
| 107 |
.iter()
|
|
@@ -124,8 +150,9 @@ pub async fn run(
|
|
| 124 |
let total_steps = order.len().max(1);
|
| 125 |
let total_units = (total_pages * total_steps) as u64;
|
| 126 |
let mut completed: u64 = 0;
|
|
|
|
| 127 |
|
| 128 |
-
for (page_index, page_id) in pages.iter().enumerate() {
|
| 129 |
for (seq, &i) in order.iter().enumerate() {
|
| 130 |
if cancel.load(Ordering::Relaxed) {
|
| 131 |
bail!("cancelled");
|
|
@@ -147,11 +174,31 @@ pub async fn run(
|
|
| 147 |
|
| 148 |
// The page must still exist (user may have deleted it mid-run).
|
| 149 |
if !session.scene.read().pages.contains_key(page_id) {
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
| 152 |
}
|
| 153 |
|
| 154 |
-
let engine = registry.get(info.id, &runtime, cpu).await
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
let scene_snap = session.scene_snapshot();
|
| 156 |
let ctx = EngineCtx {
|
| 157 |
scene: &scene_snap,
|
|
@@ -163,9 +210,29 @@ pub async fn run(
|
|
| 163 |
llm: &llm,
|
| 164 |
renderer: &renderer,
|
| 165 |
};
|
| 166 |
-
let
|
| 167 |
.instrument(tracing::info_span!("step", engine = info.id, page = %page_id))
|
| 168 |
-
.await
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
completed += 1;
|
| 170 |
if ops.is_empty() {
|
| 171 |
continue;
|
|
@@ -174,7 +241,20 @@ pub async fn run(
|
|
| 174 |
ops,
|
| 175 |
label: format!("{}: page {}", info.id, page_id),
|
| 176 |
};
|
| 177 |
-
session.apply(batch)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
}
|
| 180 |
|
|
@@ -189,7 +269,37 @@ pub async fn run(
|
|
| 189 |
overall_percent: 100,
|
| 190 |
});
|
| 191 |
}
|
| 192 |
-
Ok(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
}
|
| 194 |
|
| 195 |
// ---------------------------------------------------------------------------
|
|
|
|
| 27 |
/// about to run (or just finished); step_index / page_index are 0-based.
|
| 28 |
pub type ProgressSink = Arc<dyn Fn(ProgressTick) + Send + Sync>;
|
| 29 |
|
| 30 |
+
/// Observer for non-fatal step failures. Called once per failed step; the
|
| 31 |
+
/// pipeline skips the rest of that page's steps and moves on to the next
|
| 32 |
+
/// page.
|
| 33 |
+
pub type WarningSink = Arc<dyn Fn(WarningTick) + Send + Sync>;
|
| 34 |
+
|
| 35 |
#[derive(Debug, Clone)]
|
| 36 |
pub struct ProgressTick {
|
| 37 |
/// Coarse UI-facing step tag derived from the engine's primary
|
|
|
|
| 47 |
pub overall_percent: u8,
|
| 48 |
}
|
| 49 |
|
| 50 |
+
#[derive(Debug, Clone)]
|
| 51 |
+
pub struct WarningTick {
|
| 52 |
+
pub step_id: String,
|
| 53 |
+
pub page_index: usize,
|
| 54 |
+
pub total_pages: usize,
|
| 55 |
+
pub message: String,
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
/// Returned by [`run`]. `warning_count == 0` means the run finished cleanly.
|
| 59 |
+
#[derive(Debug, Clone, Default)]
|
| 60 |
+
pub struct RunOutcome {
|
| 61 |
+
pub warning_count: usize,
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
/// Map an engine's produced artifact to its UI step category. Stays
|
| 65 |
/// co-located with the engine metadata so adding a new engine can't
|
| 66 |
/// silently bypass the toolbar spinner — only the registered artifact
|
|
|
|
| 108 |
|
| 109 |
/// Execute `spec` against `session`. Each engine step becomes one `Op::Batch`
|
| 110 |
/// applied via the session's history (one undo step per step per page).
|
| 111 |
+
///
|
| 112 |
+
/// A failed step on a given page is non-fatal: the rest of that page's steps
|
| 113 |
+
/// are skipped (they typically depend on the failed step's output), one
|
| 114 |
+
/// [`WarningTick`] is emitted via `warnings`, and the driver moves on to the
|
| 115 |
+
/// next page. The function returns the total number of per-step warnings
|
| 116 |
+
/// that fired, letting callers flag the run as `CompletedWithErrors`.
|
| 117 |
#[allow(clippy::too_many_arguments)]
|
| 118 |
#[tracing::instrument(level = "info", skip_all)]
|
| 119 |
pub async fn run(
|
|
|
|
| 126 |
spec: PipelineSpec,
|
| 127 |
cancel: Arc<AtomicBool>,
|
| 128 |
progress: Option<ProgressSink>,
|
| 129 |
+
warnings: Option<WarningSink>,
|
| 130 |
+
) -> Result<RunOutcome> {
|
| 131 |
let infos: Vec<&EngineInfo> = spec
|
| 132 |
.steps
|
| 133 |
.iter()
|
|
|
|
| 150 |
let total_steps = order.len().max(1);
|
| 151 |
let total_units = (total_pages * total_steps) as u64;
|
| 152 |
let mut completed: u64 = 0;
|
| 153 |
+
let mut warning_count: usize = 0;
|
| 154 |
|
| 155 |
+
'pages: for (page_index, page_id) in pages.iter().enumerate() {
|
| 156 |
for (seq, &i) in order.iter().enumerate() {
|
| 157 |
if cancel.load(Ordering::Relaxed) {
|
| 158 |
bail!("cancelled");
|
|
|
|
| 174 |
|
| 175 |
// The page must still exist (user may have deleted it mid-run).
|
| 176 |
if !session.scene.read().pages.contains_key(page_id) {
|
| 177 |
+
// Skip the remaining steps for a deleted page and credit all
|
| 178 |
+
// of them against total_units so progress still reaches 100%.
|
| 179 |
+
completed += (total_steps - seq) as u64;
|
| 180 |
+
continue 'pages;
|
| 181 |
}
|
| 182 |
|
| 183 |
+
let engine = match registry.get(info.id, &runtime, cpu).await {
|
| 184 |
+
Ok(e) => e,
|
| 185 |
+
Err(err) => {
|
| 186 |
+
// Engine *load* failure: same recovery as a run failure.
|
| 187 |
+
report_step_failure(
|
| 188 |
+
info.id,
|
| 189 |
+
page_id,
|
| 190 |
+
seq,
|
| 191 |
+
page_index,
|
| 192 |
+
total_pages,
|
| 193 |
+
total_steps,
|
| 194 |
+
&err,
|
| 195 |
+
&mut warning_count,
|
| 196 |
+
warnings.as_ref(),
|
| 197 |
+
);
|
| 198 |
+
completed += (total_steps - seq) as u64;
|
| 199 |
+
continue 'pages;
|
| 200 |
+
}
|
| 201 |
+
};
|
| 202 |
let scene_snap = session.scene_snapshot();
|
| 203 |
let ctx = EngineCtx {
|
| 204 |
scene: &scene_snap,
|
|
|
|
| 210 |
llm: &llm,
|
| 211 |
renderer: &renderer,
|
| 212 |
};
|
| 213 |
+
let step_result = async { engine.run(ctx).await }
|
| 214 |
.instrument(tracing::info_span!("step", engine = info.id, page = %page_id))
|
| 215 |
+
.await;
|
| 216 |
+
let ops = match step_result {
|
| 217 |
+
Ok(ops) => ops,
|
| 218 |
+
Err(err) => {
|
| 219 |
+
report_step_failure(
|
| 220 |
+
info.id,
|
| 221 |
+
page_id,
|
| 222 |
+
seq,
|
| 223 |
+
page_index,
|
| 224 |
+
total_pages,
|
| 225 |
+
total_steps,
|
| 226 |
+
&err,
|
| 227 |
+
&mut warning_count,
|
| 228 |
+
warnings.as_ref(),
|
| 229 |
+
);
|
| 230 |
+
// Subsequent steps on this page almost always consume the
|
| 231 |
+
// failed step's artifact; skip the rest and move on.
|
| 232 |
+
completed += (total_steps - seq) as u64;
|
| 233 |
+
continue 'pages;
|
| 234 |
+
}
|
| 235 |
+
};
|
| 236 |
completed += 1;
|
| 237 |
if ops.is_empty() {
|
| 238 |
continue;
|
|
|
|
| 241 |
ops,
|
| 242 |
label: format!("{}: page {}", info.id, page_id),
|
| 243 |
};
|
| 244 |
+
if let Err(err) = session.apply(batch) {
|
| 245 |
+
report_step_failure(
|
| 246 |
+
info.id,
|
| 247 |
+
page_id,
|
| 248 |
+
seq,
|
| 249 |
+
page_index,
|
| 250 |
+
total_pages,
|
| 251 |
+
total_steps,
|
| 252 |
+
&err,
|
| 253 |
+
&mut warning_count,
|
| 254 |
+
warnings.as_ref(),
|
| 255 |
+
);
|
| 256 |
+
continue 'pages;
|
| 257 |
+
}
|
| 258 |
}
|
| 259 |
}
|
| 260 |
|
|
|
|
| 269 |
overall_percent: 100,
|
| 270 |
});
|
| 271 |
}
|
| 272 |
+
Ok(RunOutcome { warning_count })
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
#[allow(clippy::too_many_arguments)]
|
| 276 |
+
fn report_step_failure(
|
| 277 |
+
engine_id: &str,
|
| 278 |
+
page_id: &PageId,
|
| 279 |
+
step_index: usize,
|
| 280 |
+
page_index: usize,
|
| 281 |
+
total_pages: usize,
|
| 282 |
+
total_steps: usize,
|
| 283 |
+
err: &anyhow::Error,
|
| 284 |
+
warning_count: &mut usize,
|
| 285 |
+
sink: Option<&WarningSink>,
|
| 286 |
+
) {
|
| 287 |
+
let _ = total_steps;
|
| 288 |
+
tracing::warn!(
|
| 289 |
+
engine = engine_id,
|
| 290 |
+
page = %page_id,
|
| 291 |
+
step_index,
|
| 292 |
+
"pipeline step failed: {err:#}"
|
| 293 |
+
);
|
| 294 |
+
*warning_count += 1;
|
| 295 |
+
if let Some(sink) = sink {
|
| 296 |
+
sink(WarningTick {
|
| 297 |
+
step_id: engine_id.to_string(),
|
| 298 |
+
page_index,
|
| 299 |
+
total_pages,
|
| 300 |
+
message: format!("{err:#}"),
|
| 301 |
+
});
|
| 302 |
+
}
|
| 303 |
}
|
| 304 |
|
| 305 |
// ---------------------------------------------------------------------------
|
koharu-core/src/events.rs
CHANGED
|
@@ -21,8 +21,15 @@ use crate::protocol::LlmTarget;
|
|
| 21 |
#[serde(tag = "event", rename_all = "camelCase")]
|
| 22 |
pub enum AppEvent {
|
| 23 |
// Pipeline jobs.
|
| 24 |
-
JobStarted {
|
|
|
|
|
|
|
|
|
|
| 25 |
JobProgress(PipelineProgress),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
JobFinished(JobFinishedEvent),
|
| 27 |
|
| 28 |
// Runtime library / model downloads.
|
|
@@ -36,9 +43,15 @@ pub enum AppEvent {
|
|
| 36 |
// - `LlmLoaded` — model is on the GPU and ready for inference.
|
| 37 |
// - `LlmFailed` — load failed; see `GET /llm/current` for the reason.
|
| 38 |
// - `LlmUnloaded` — model released.
|
| 39 |
-
LlmLoading {
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
LlmUnloaded,
|
| 43 |
|
| 44 |
// (Re)connect replay so the client can seed in-flight state.
|
|
@@ -122,6 +135,21 @@ pub struct JobFinishedEvent {
|
|
| 122 |
pub error: Option<String>,
|
| 123 |
}
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
// ---------------------------------------------------------------------------
|
| 126 |
// Downloads
|
| 127 |
// ---------------------------------------------------------------------------
|
|
|
|
| 21 |
#[serde(tag = "event", rename_all = "camelCase")]
|
| 22 |
pub enum AppEvent {
|
| 23 |
// Pipeline jobs.
|
| 24 |
+
JobStarted {
|
| 25 |
+
id: String,
|
| 26 |
+
kind: String,
|
| 27 |
+
},
|
| 28 |
JobProgress(PipelineProgress),
|
| 29 |
+
/// A single step on one page failed but the pipeline kept running.
|
| 30 |
+
/// Emitted per failed step so clients can show a non-fatal warning while
|
| 31 |
+
/// the job continues with the next page.
|
| 32 |
+
JobWarning(JobWarningEvent),
|
| 33 |
JobFinished(JobFinishedEvent),
|
| 34 |
|
| 35 |
// Runtime library / model downloads.
|
|
|
|
| 43 |
// - `LlmLoaded` — model is on the GPU and ready for inference.
|
| 44 |
// - `LlmFailed` — load failed; see `GET /llm/current` for the reason.
|
| 45 |
// - `LlmUnloaded` — model released.
|
| 46 |
+
LlmLoading {
|
| 47 |
+
target: LlmTarget,
|
| 48 |
+
},
|
| 49 |
+
LlmLoaded {
|
| 50 |
+
target: LlmTarget,
|
| 51 |
+
},
|
| 52 |
+
LlmFailed {
|
| 53 |
+
target: Option<LlmTarget>,
|
| 54 |
+
},
|
| 55 |
LlmUnloaded,
|
| 56 |
|
| 57 |
// (Re)connect replay so the client can seed in-flight state.
|
|
|
|
| 135 |
pub error: Option<String>,
|
| 136 |
}
|
| 137 |
|
| 138 |
+
/// A non-fatal step failure during a pipeline run. The pipeline recovers by
|
| 139 |
+
/// skipping the rest of the current page's steps and moving on to the next
|
| 140 |
+
/// page; the UI accumulates these into a list during the job.
|
| 141 |
+
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
|
| 142 |
+
#[serde(rename_all = "camelCase")]
|
| 143 |
+
pub struct JobWarningEvent {
|
| 144 |
+
pub job_id: String,
|
| 145 |
+
/// 0-based page index where the failure happened.
|
| 146 |
+
pub page_index: usize,
|
| 147 |
+
pub total_pages: usize,
|
| 148 |
+
/// Engine id (e.g. `"lama-manga"`) of the step that failed.
|
| 149 |
+
pub step_id: String,
|
| 150 |
+
pub message: String,
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
// ---------------------------------------------------------------------------
|
| 154 |
// Downloads
|
| 155 |
// ---------------------------------------------------------------------------
|
koharu-core/src/lib.rs
CHANGED
|
@@ -19,7 +19,7 @@ pub mod style;
|
|
| 19 |
pub use blob::BlobRef;
|
| 20 |
pub use events::{
|
| 21 |
AppEvent, DownloadProgress, DownloadStatus, JobFinishedEvent, JobStatus, JobSummary,
|
| 22 |
-
PipelineProgress, PipelineStatus, PipelineStep, ProjectSummary, SnapshotEvent,
|
| 23 |
};
|
| 24 |
pub use font::{FontPrediction, NamedFontPrediction, TextDirection, TopFont};
|
| 25 |
pub use google_fonts::{FontSource, GoogleFontCatalog, GoogleFontEntry, GoogleFontVariant};
|
|
|
|
| 19 |
pub use blob::BlobRef;
|
| 20 |
pub use events::{
|
| 21 |
AppEvent, DownloadProgress, DownloadStatus, JobFinishedEvent, JobStatus, JobSummary,
|
| 22 |
+
JobWarningEvent, PipelineProgress, PipelineStatus, PipelineStep, ProjectSummary, SnapshotEvent,
|
| 23 |
};
|
| 24 |
pub use font::{FontPrediction, NamedFontPrediction, TextDirection, TopFont};
|
| 25 |
pub use google_fonts::{FontSource, GoogleFontCatalog, GoogleFontEntry, GoogleFontVariant};
|
koharu-ml/bin/aot-inpainting.rs
CHANGED
|
@@ -57,7 +57,11 @@ async fn main() -> Result<()> {
|
|
| 57 |
let mask = image::open(&cli.mask)?;
|
| 58 |
let started = std::time::Instant::now();
|
| 59 |
let output = if let Some(max_side) = cli.max_side {
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
} else {
|
| 62 |
model.inference(&image, &mask)?
|
| 63 |
};
|
|
|
|
| 57 |
let mask = image::open(&cli.mask)?;
|
| 58 |
let started = std::time::Instant::now();
|
| 59 |
let output = if let Some(max_side) = cli.max_side {
|
| 60 |
+
let cfg = koharu_ml::inpainting::HdStrategyConfig {
|
| 61 |
+
resize_limit: max_side,
|
| 62 |
+
..model.default_config()
|
| 63 |
+
};
|
| 64 |
+
model.inference_with_config(&image, &mask, &cfg)?
|
| 65 |
} else {
|
| 66 |
model.inference(&image, &mask)?
|
| 67 |
};
|
koharu-ml/src/aot_inpainting/mod.rs
CHANGED
|
@@ -7,17 +7,17 @@ use std::{
|
|
| 7 |
|
| 8 |
use anyhow::{Context, Result, bail};
|
| 9 |
use candle_core::{DType, Device, Tensor};
|
| 10 |
-
use image::{
|
| 11 |
-
DynamicImage, GenericImageView, GrayImage, RgbImage,
|
| 12 |
-
imageops::{FilterType, resize},
|
| 13 |
-
};
|
| 14 |
use koharu_runtime::RuntimeManager;
|
| 15 |
use serde::Deserialize;
|
| 16 |
use tracing::instrument;
|
| 17 |
|
| 18 |
use crate::{
|
| 19 |
device,
|
| 20 |
-
inpainting::{
|
|
|
|
|
|
|
|
|
|
| 21 |
loading,
|
| 22 |
};
|
| 23 |
|
|
@@ -49,16 +49,6 @@ pub struct AotInpainting {
|
|
| 49 |
device: Device,
|
| 50 |
}
|
| 51 |
|
| 52 |
-
#[derive(Debug, Clone)]
|
| 53 |
-
struct PreparedInput {
|
| 54 |
-
pixel_values: Tensor,
|
| 55 |
-
mask_values: Tensor,
|
| 56 |
-
original_rgb: RgbImage,
|
| 57 |
-
original_mask: GrayImage,
|
| 58 |
-
model_width: u32,
|
| 59 |
-
model_height: u32,
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
#[derive(Debug, Clone, Deserialize)]
|
| 63 |
struct AotInpaintingConfig {
|
| 64 |
model_type: String,
|
|
@@ -137,21 +127,27 @@ impl AotInpainting {
|
|
| 137 |
})
|
| 138 |
}
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
#[instrument(level = "debug", skip_all)]
|
| 141 |
pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
|
| 142 |
-
self.
|
| 143 |
}
|
| 144 |
|
| 145 |
#[instrument(level = "debug", skip_all)]
|
| 146 |
-
pub fn
|
| 147 |
&self,
|
| 148 |
image: &DynamicImage,
|
| 149 |
mask: &DynamicImage,
|
| 150 |
-
|
| 151 |
) -> Result<DynamicImage> {
|
| 152 |
-
if max_side == 0 {
|
| 153 |
-
bail!("max_side must be positive");
|
| 154 |
-
}
|
| 155 |
if image.dimensions() != mask.dimensions() {
|
| 156 |
bail!(
|
| 157 |
"image and mask dimensions dismatch: image is {:?}, mask is {:?}",
|
|
@@ -161,84 +157,36 @@ impl AotInpainting {
|
|
| 161 |
}
|
| 162 |
|
| 163 |
let started = Instant::now();
|
| 164 |
-
let
|
| 165 |
-
let
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
let composited = self.postprocess(&output, &prepared)?;
|
| 169 |
|
| 170 |
tracing::info!(
|
| 171 |
width = image.width(),
|
| 172 |
height = image.height(),
|
| 173 |
-
|
| 174 |
-
model_height = prepared.model_height,
|
| 175 |
-
max_side,
|
| 176 |
total_ms = started.elapsed().as_millis(),
|
| 177 |
"aot inpainting timings"
|
| 178 |
);
|
| 179 |
|
| 180 |
if image.color().has_alpha() {
|
| 181 |
let alpha = extract_alpha(&image.to_rgba8());
|
| 182 |
-
let rgba = restore_alpha_channel(&
|
| 183 |
Ok(DynamicImage::ImageRgba8(rgba))
|
| 184 |
} else {
|
| 185 |
-
Ok(DynamicImage::ImageRgb8(
|
| 186 |
}
|
| 187 |
}
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
) -> Result<PreparedInput> {
|
| 195 |
-
let original_rgb = image.to_rgb8();
|
| 196 |
-
let original_mask = binarize_mask(mask);
|
| 197 |
-
let mut working_rgb = original_rgb.clone();
|
| 198 |
-
let mut working_mask = original_mask.clone();
|
| 199 |
-
|
| 200 |
-
if working_rgb.width().max(working_rgb.height()) > max_side {
|
| 201 |
-
let (resized_width, resized_height) =
|
| 202 |
-
resize_keep_aspect_dims(working_rgb.width(), working_rgb.height(), max_side);
|
| 203 |
-
working_rgb = resize(
|
| 204 |
-
&working_rgb,
|
| 205 |
-
resized_width,
|
| 206 |
-
resized_height,
|
| 207 |
-
FilterType::Triangle,
|
| 208 |
-
);
|
| 209 |
-
working_mask = resize(
|
| 210 |
-
&working_mask,
|
| 211 |
-
resized_width,
|
| 212 |
-
resized_height,
|
| 213 |
-
FilterType::Triangle,
|
| 214 |
-
);
|
| 215 |
-
}
|
| 216 |
-
|
| 217 |
-
let model_width = round_up_multiple(working_rgb.width(), self.config.pad_multiple as u32);
|
| 218 |
-
let model_height = round_up_multiple(working_rgb.height(), self.config.pad_multiple as u32);
|
| 219 |
-
if model_width != working_rgb.width() || model_height != working_rgb.height() {
|
| 220 |
-
working_rgb = resize(
|
| 221 |
-
&working_rgb,
|
| 222 |
-
model_width,
|
| 223 |
-
model_height,
|
| 224 |
-
FilterType::Triangle,
|
| 225 |
-
);
|
| 226 |
-
working_mask = resize(
|
| 227 |
-
&working_mask,
|
| 228 |
-
model_width,
|
| 229 |
-
model_height,
|
| 230 |
-
FilterType::Triangle,
|
| 231 |
-
);
|
| 232 |
-
}
|
| 233 |
-
|
| 234 |
-
let mut binary_model_mask = working_mask;
|
| 235 |
-
for pixel in binary_model_mask.pixels_mut() {
|
| 236 |
-
pixel.0[0] = if pixel.0[0] >= 127 { 255 } else { 0 };
|
| 237 |
-
}
|
| 238 |
-
|
| 239 |
let image_tensor = (Tensor::from_vec(
|
| 240 |
-
|
| 241 |
-
(1,
|
| 242 |
&self.device,
|
| 243 |
)?
|
| 244 |
.permute((0, 3, 1, 2))?
|
|
@@ -247,29 +195,22 @@ impl AotInpainting {
|
|
| 247 |
let image_tensor = (image_tensor - 1.0)?;
|
| 248 |
|
| 249 |
let mask_tensor = Tensor::from_vec(
|
| 250 |
-
|
| 251 |
-
(1,
|
| 252 |
&self.device,
|
| 253 |
)?
|
| 254 |
.permute((0, 3, 1, 2))?
|
| 255 |
.to_dtype(DType::F32)?;
|
| 256 |
let mask_tensor = (mask_tensor / 255.0)?;
|
| 257 |
let mask_inv = (Tensor::ones_like(&mask_tensor)? - &mask_tensor)?;
|
| 258 |
-
let mask_inv_rgb =
|
| 259 |
-
mask_inv.broadcast_as((1, 3, model_height as usize, model_width as usize))?;
|
| 260 |
let masked_image = (&image_tensor * &mask_inv_rgb)?;
|
| 261 |
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
mask_values: mask_tensor,
|
| 265 |
-
original_rgb,
|
| 266 |
-
original_mask,
|
| 267 |
-
model_width,
|
| 268 |
-
model_height,
|
| 269 |
-
})
|
| 270 |
}
|
| 271 |
|
| 272 |
-
fn postprocess(&self, output: &Tensor
|
| 273 |
let output = output.to_device(&Device::Cpu)?.squeeze(0)?;
|
| 274 |
let (channels, height, width) = output.dims3()?;
|
| 275 |
if channels != 3 {
|
|
@@ -282,27 +223,27 @@ impl AotInpainting {
|
|
| 282 |
.to_dtype(DType::U8)?
|
| 283 |
.flatten_all()?
|
| 284 |
.to_vec1::<u8>()?;
|
| 285 |
-
|
| 286 |
-
.ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
resize(
|
| 292 |
-
&predicted,
|
| 293 |
-
prepared.original_rgb.width(),
|
| 294 |
-
prepared.original_rgb.height(),
|
| 295 |
-
FilterType::Triangle,
|
| 296 |
-
)
|
| 297 |
-
} else {
|
| 298 |
-
predicted
|
| 299 |
-
};
|
| 300 |
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
}
|
| 307 |
}
|
| 308 |
|
|
@@ -323,49 +264,3 @@ async fn resolve_model_paths(runtime: &RuntimeManager) -> Result<(PathBuf, PathB
|
|
| 323 |
.with_context(|| format!("failed to download {SAFETENSORS_FILENAME} from {HF_REPO}"))?;
|
| 324 |
Ok((config, weights))
|
| 325 |
}
|
| 326 |
-
|
| 327 |
-
fn resize_keep_aspect_dims(width: u32, height: u32, max_side: u32) -> (u32, u32) {
|
| 328 |
-
let ratio = max_side as f32 / width.max(height) as f32;
|
| 329 |
-
(
|
| 330 |
-
((width as f32 * ratio).round() as u32).max(1),
|
| 331 |
-
((height as f32 * ratio).round() as u32).max(1),
|
| 332 |
-
)
|
| 333 |
-
}
|
| 334 |
-
|
| 335 |
-
fn round_up_multiple(value: u32, multiple: u32) -> u32 {
|
| 336 |
-
if value.is_multiple_of(multiple) {
|
| 337 |
-
value
|
| 338 |
-
} else {
|
| 339 |
-
value + (multiple - value % multiple)
|
| 340 |
-
}
|
| 341 |
-
}
|
| 342 |
-
|
| 343 |
-
fn composite_rgb(original: &RgbImage, predicted: &RgbImage, mask: &GrayImage) -> RgbImage {
|
| 344 |
-
let mut composited = original.clone();
|
| 345 |
-
for y in 0..original.height() {
|
| 346 |
-
for x in 0..original.width() {
|
| 347 |
-
if mask.get_pixel(x, y).0[0] > 0 {
|
| 348 |
-
composited.put_pixel(x, y, *predicted.get_pixel(x, y));
|
| 349 |
-
}
|
| 350 |
-
}
|
| 351 |
-
}
|
| 352 |
-
composited
|
| 353 |
-
}
|
| 354 |
-
|
| 355 |
-
#[cfg(test)]
|
| 356 |
-
mod tests {
|
| 357 |
-
use super::{resize_keep_aspect_dims, round_up_multiple};
|
| 358 |
-
|
| 359 |
-
#[test]
|
| 360 |
-
fn resize_keep_aspect_matches_upstream_rounding() {
|
| 361 |
-
assert_eq!(resize_keep_aspect_dims(1600, 900, 1024), (1024, 576));
|
| 362 |
-
assert_eq!(resize_keep_aspect_dims(900, 1600, 1024), (576, 1024));
|
| 363 |
-
}
|
| 364 |
-
|
| 365 |
-
#[test]
|
| 366 |
-
fn round_up_multiple_expands_to_next_valid_shape() {
|
| 367 |
-
assert_eq!(round_up_multiple(1024, 8), 1024);
|
| 368 |
-
assert_eq!(round_up_multiple(1025, 8), 1032);
|
| 369 |
-
assert_eq!(round_up_multiple(7, 8), 8);
|
| 370 |
-
}
|
| 371 |
-
}
|
|
|
|
| 7 |
|
| 8 |
use anyhow::{Context, Result, bail};
|
| 9 |
use candle_core::{DType, Device, Tensor};
|
| 10 |
+
use image::{DynamicImage, GenericImageView, GrayImage, RgbImage};
|
|
|
|
|
|
|
|
|
|
| 11 |
use koharu_runtime::RuntimeManager;
|
| 12 |
use serde::Deserialize;
|
| 13 |
use tracing::instrument;
|
| 14 |
|
| 15 |
use crate::{
|
| 16 |
device,
|
| 17 |
+
inpainting::{
|
| 18 |
+
HdStrategyConfig, InpaintForward, binarize_mask, extract_alpha, restore_alpha_channel,
|
| 19 |
+
run_inpaint, try_fill_balloon,
|
| 20 |
+
},
|
| 21 |
loading,
|
| 22 |
};
|
| 23 |
|
|
|
|
| 49 |
device: Device,
|
| 50 |
}
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
#[derive(Debug, Clone, Deserialize)]
|
| 53 |
struct AotInpaintingConfig {
|
| 54 |
model_type: String,
|
|
|
|
| 127 |
})
|
| 128 |
}
|
| 129 |
|
| 130 |
+
/// Default strategy: Resize, using the model's shipped `default_max_side`
|
| 131 |
+
/// as the resize limit. Matches pre-refactor behaviour.
|
| 132 |
+
pub fn default_config(&self) -> HdStrategyConfig {
|
| 133 |
+
HdStrategyConfig::aot_default(
|
| 134 |
+
self.config.default_max_side,
|
| 135 |
+
self.config.pad_multiple as u32,
|
| 136 |
+
)
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
#[instrument(level = "debug", skip_all)]
|
| 140 |
pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
|
| 141 |
+
self.inference_with_config(image, mask, &self.default_config())
|
| 142 |
}
|
| 143 |
|
| 144 |
#[instrument(level = "debug", skip_all)]
|
| 145 |
+
pub fn inference_with_config(
|
| 146 |
&self,
|
| 147 |
image: &DynamicImage,
|
| 148 |
mask: &DynamicImage,
|
| 149 |
+
cfg: &HdStrategyConfig,
|
| 150 |
) -> Result<DynamicImage> {
|
|
|
|
|
|
|
|
|
|
| 151 |
if image.dimensions() != mask.dimensions() {
|
| 152 |
bail!(
|
| 153 |
"image and mask dimensions dismatch: image is {:?}, mask is {:?}",
|
|
|
|
| 157 |
}
|
| 158 |
|
| 159 |
let started = Instant::now();
|
| 160 |
+
let binary_mask = binarize_mask(mask);
|
| 161 |
+
let image_rgb = image.to_rgb8();
|
| 162 |
+
let forward = AotForward { aot: self };
|
| 163 |
+
let output_rgb = run_inpaint(&forward, &image_rgb, &binary_mask, cfg)?;
|
|
|
|
| 164 |
|
| 165 |
tracing::info!(
|
| 166 |
width = image.width(),
|
| 167 |
height = image.height(),
|
| 168 |
+
resize_limit = cfg.resize_limit,
|
|
|
|
|
|
|
| 169 |
total_ms = started.elapsed().as_millis(),
|
| 170 |
"aot inpainting timings"
|
| 171 |
);
|
| 172 |
|
| 173 |
if image.color().has_alpha() {
|
| 174 |
let alpha = extract_alpha(&image.to_rgba8());
|
| 175 |
+
let rgba = restore_alpha_channel(&output_rgb, &alpha, &binary_mask);
|
| 176 |
Ok(DynamicImage::ImageRgba8(rgba))
|
| 177 |
} else {
|
| 178 |
+
Ok(DynamicImage::ImageRgb8(output_rgb))
|
| 179 |
}
|
| 180 |
}
|
| 181 |
|
| 182 |
+
/// Raw model forward on a pre-padded RGB image + mask. Input spatial dims
|
| 183 |
+
/// must already be multiples of `pad_multiple` — the HD-strategy dispatcher
|
| 184 |
+
/// handles this.
|
| 185 |
+
fn forward_rgb(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
|
| 186 |
+
let (w, h) = image.dimensions();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
let image_tensor = (Tensor::from_vec(
|
| 188 |
+
image.clone().into_raw(),
|
| 189 |
+
(1, h as usize, w as usize, 3),
|
| 190 |
&self.device,
|
| 191 |
)?
|
| 192 |
.permute((0, 3, 1, 2))?
|
|
|
|
| 195 |
let image_tensor = (image_tensor - 1.0)?;
|
| 196 |
|
| 197 |
let mask_tensor = Tensor::from_vec(
|
| 198 |
+
mask.clone().into_raw(),
|
| 199 |
+
(1, h as usize, w as usize, 1),
|
| 200 |
&self.device,
|
| 201 |
)?
|
| 202 |
.permute((0, 3, 1, 2))?
|
| 203 |
.to_dtype(DType::F32)?;
|
| 204 |
let mask_tensor = (mask_tensor / 255.0)?;
|
| 205 |
let mask_inv = (Tensor::ones_like(&mask_tensor)? - &mask_tensor)?;
|
| 206 |
+
let mask_inv_rgb = mask_inv.broadcast_as((1, 3, h as usize, w as usize))?;
|
|
|
|
| 207 |
let masked_image = (&image_tensor * &mask_inv_rgb)?;
|
| 208 |
|
| 209 |
+
let output = self.model.forward(&masked_image, &mask_tensor)?;
|
| 210 |
+
self.postprocess(&output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
}
|
| 212 |
|
| 213 |
+
fn postprocess(&self, output: &Tensor) -> Result<RgbImage> {
|
| 214 |
let output = output.to_device(&Device::Cpu)?.squeeze(0)?;
|
| 215 |
let (channels, height, width) = output.dims3()?;
|
| 216 |
if channels != 3 {
|
|
|
|
| 223 |
.to_dtype(DType::U8)?
|
| 224 |
.flatten_all()?
|
| 225 |
.to_vec1::<u8>()?;
|
| 226 |
+
RgbImage::from_raw(width as u32, height as u32, raw)
|
| 227 |
+
.ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
|
| 231 |
+
struct AotForward<'a> {
|
| 232 |
+
aot: &'a AotInpainting,
|
| 233 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
impl InpaintForward for AotForward<'_> {
|
| 236 |
+
fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
|
| 237 |
+
if mask.pixels().all(|p| p.0[0] == 0) {
|
| 238 |
+
return Ok(image.clone());
|
| 239 |
+
}
|
| 240 |
+
// Same flat-balloon fast path as Lama: skip the model when the mask
|
| 241 |
+
// sits in a uniform-background bubble. Fires per-crop under the Crop
|
| 242 |
+
// strategy; generally no-ops on whole-image forwards under Resize.
|
| 243 |
+
if let Some(filled) = try_fill_balloon(image, mask) {
|
| 244 |
+
return Ok(filled);
|
| 245 |
+
}
|
| 246 |
+
self.aot.forward_rgb(image, mask)
|
| 247 |
}
|
| 248 |
}
|
| 249 |
|
|
|
|
| 264 |
.with_context(|| format!("failed to download {SAFETENSORS_FILENAME} from {HF_REPO}"))?;
|
| 265 |
Ok((config, weights))
|
| 266 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
koharu-ml/src/inpainting/balloon.rs
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Balloon-fill fast path for inpainting.
|
| 2 |
+
//!
|
| 3 |
+
//! When a mask sits inside a speech bubble with a near-uniform background,
|
| 4 |
+
//! the model can be skipped entirely: fill the masked pixels with the median
|
| 5 |
+
//! background colour of the balloon. This is purely image processing, so
|
| 6 |
+
//! every erase model (Lama, AoT) can use it as a pre-model pass.
|
| 7 |
+
//!
|
| 8 |
+
//! Effectiveness depends on the caller handing us one bubble at a time —
|
| 9 |
+
//! which is exactly what the Crop strategy does, since each crop corresponds
|
| 10 |
+
//! to a connected mask contour. On a whole-image forward (Resize strategy),
|
| 11 |
+
//! `extract_balloon_mask` usually fails to find a single containing contour
|
| 12 |
+
//! and we fall through to the model.
|
| 13 |
+
|
| 14 |
+
use image::{DynamicImage, GrayImage, Luma, Rgb, RgbImage};
|
| 15 |
+
use imageproc::{
|
| 16 |
+
contours::find_contours, distance_transform::Norm, drawing::draw_polygon_mut, edges::canny,
|
| 17 |
+
filter::gaussian_blur_f32, morphology::dilate, point::Point,
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
const BALLOON_CANNY_LOW: f32 = 70.0;
|
| 21 |
+
const BALLOON_CANNY_HIGH: f32 = 140.0;
|
| 22 |
+
const SIMPLE_BG_THRESHOLD_LOW_VARIANCE: f64 = 10.0;
|
| 23 |
+
const SIMPLE_BG_THRESHOLD_HIGH_VARIANCE: f64 = 7.0;
|
| 24 |
+
const SIMPLE_BG_CHANNEL_STD_SWITCH: f64 = 1.0;
|
| 25 |
+
|
| 26 |
+
type Xyxy = [u32; 4];
|
| 27 |
+
|
| 28 |
+
pub(crate) struct BalloonMasks {
|
| 29 |
+
pub balloon_mask: GrayImage,
|
| 30 |
+
pub non_text_mask: GrayImage,
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
/// Return an image with the masked pixels painted the balloon's median
|
| 34 |
+
/// background colour, iff a containing bubble with low background variance
|
| 35 |
+
/// can be identified. `None` means "no confident fast path; call the model".
|
| 36 |
+
pub fn try_fill_balloon(image: &RgbImage, mask: &GrayImage) -> Option<RgbImage> {
|
| 37 |
+
let masks = extract_balloon_mask(image, mask)?;
|
| 38 |
+
let average_bg_color = median_rgb(image, &masks.non_text_mask)?;
|
| 39 |
+
let std_rgb = color_stddev(image, &masks.non_text_mask, average_bg_color);
|
| 40 |
+
let inpaint_thresh = if stddev3(std_rgb) > SIMPLE_BG_CHANNEL_STD_SWITCH {
|
| 41 |
+
SIMPLE_BG_THRESHOLD_HIGH_VARIANCE
|
| 42 |
+
} else {
|
| 43 |
+
SIMPLE_BG_THRESHOLD_LOW_VARIANCE
|
| 44 |
+
};
|
| 45 |
+
let std_max = std_rgb.into_iter().fold(0.0, f64::max);
|
| 46 |
+
|
| 47 |
+
if std_max >= inpaint_thresh {
|
| 48 |
+
return None;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
let mut result = image.clone();
|
| 52 |
+
let fill = [
|
| 53 |
+
average_bg_color[0] as u8,
|
| 54 |
+
average_bg_color[1] as u8,
|
| 55 |
+
average_bg_color[2] as u8,
|
| 56 |
+
];
|
| 57 |
+
for (x, y, pixel) in masks.balloon_mask.enumerate_pixels() {
|
| 58 |
+
if pixel.0[0] > 0 {
|
| 59 |
+
result.put_pixel(x, y, Rgb(fill));
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
Some(result)
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
pub(crate) fn extract_balloon_mask(image: &RgbImage, mask: &GrayImage) -> Option<BalloonMasks> {
|
| 67 |
+
if image.dimensions() != mask.dimensions() {
|
| 68 |
+
return None;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
let text_bbox = non_zero_bbox(mask)?;
|
| 72 |
+
let text_sum = count_nonzero(mask);
|
| 73 |
+
if text_sum == 0 {
|
| 74 |
+
return None;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
let gray = DynamicImage::ImageRgb8(image.clone()).to_luma8();
|
| 78 |
+
let blurred = gaussian_blur_f32(&gray, 1.0);
|
| 79 |
+
let mut cannyed = canny(&blurred, BALLOON_CANNY_LOW, BALLOON_CANNY_HIGH);
|
| 80 |
+
cannyed = dilate(&cannyed, Norm::LInf, 1);
|
| 81 |
+
draw_binary_border(&mut cannyed);
|
| 82 |
+
subtract_binary_mask(&mut cannyed, mask);
|
| 83 |
+
|
| 84 |
+
let contours = find_contours::<i32>(&cannyed);
|
| 85 |
+
let (width, height) = cannyed.dimensions();
|
| 86 |
+
let mut best_mask = None;
|
| 87 |
+
let mut best_area = f64::INFINITY;
|
| 88 |
+
|
| 89 |
+
for contour in contours {
|
| 90 |
+
let Some(polygon) = contour_polygon(&contour.points) else {
|
| 91 |
+
continue;
|
| 92 |
+
};
|
| 93 |
+
let bbox = polygon_bbox(&polygon)?;
|
| 94 |
+
if bbox[0] > text_bbox[0]
|
| 95 |
+
|| bbox[1] > text_bbox[1]
|
| 96 |
+
|| bbox[2] < text_bbox[2]
|
| 97 |
+
|| bbox[3] < text_bbox[3]
|
| 98 |
+
{
|
| 99 |
+
continue;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
let mut candidate = GrayImage::new(width, height);
|
| 103 |
+
draw_polygon_mut(&mut candidate, &polygon, Luma([255u8]));
|
| 104 |
+
if count_overlap(&candidate, mask) < text_sum {
|
| 105 |
+
continue;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
let area = polygon_area(&polygon);
|
| 109 |
+
if area < best_area {
|
| 110 |
+
best_area = area;
|
| 111 |
+
best_mask = Some(candidate);
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
let balloon_mask = best_mask?;
|
| 116 |
+
let mut non_text_mask = balloon_mask.clone();
|
| 117 |
+
for (x, y, pixel) in mask.enumerate_pixels() {
|
| 118 |
+
if pixel.0[0] > 0 {
|
| 119 |
+
non_text_mask.put_pixel(x, y, Luma([0]));
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
Some(BalloonMasks {
|
| 124 |
+
balloon_mask,
|
| 125 |
+
non_text_mask,
|
| 126 |
+
})
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
fn contour_polygon(points: &[Point<i32>]) -> Option<Vec<Point<i32>>> {
|
| 130 |
+
let mut polygon = points.to_vec();
|
| 131 |
+
if polygon.len() < 3 {
|
| 132 |
+
return None;
|
| 133 |
+
}
|
| 134 |
+
if polygon.first() == polygon.last() {
|
| 135 |
+
polygon.pop();
|
| 136 |
+
}
|
| 137 |
+
if polygon.len() < 3 {
|
| 138 |
+
return None;
|
| 139 |
+
}
|
| 140 |
+
Some(polygon)
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
fn polygon_bbox(points: &[Point<i32>]) -> Option<Xyxy> {
|
| 144 |
+
let first = points.first()?;
|
| 145 |
+
let mut min_x = first.x;
|
| 146 |
+
let mut min_y = first.y;
|
| 147 |
+
let mut max_x = first.x;
|
| 148 |
+
let mut max_y = first.y;
|
| 149 |
+
for point in points.iter().skip(1) {
|
| 150 |
+
min_x = min_x.min(point.x);
|
| 151 |
+
min_y = min_y.min(point.y);
|
| 152 |
+
max_x = max_x.max(point.x);
|
| 153 |
+
max_y = max_y.max(point.y);
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
Some([
|
| 157 |
+
min_x.max(0) as u32,
|
| 158 |
+
min_y.max(0) as u32,
|
| 159 |
+
max_x.max(min_x).saturating_add(1) as u32,
|
| 160 |
+
max_y.max(min_y).saturating_add(1) as u32,
|
| 161 |
+
])
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
fn polygon_area(points: &[Point<i32>]) -> f64 {
|
| 165 |
+
let mut area = 0.0;
|
| 166 |
+
for index in 0..points.len() {
|
| 167 |
+
let current = points[index];
|
| 168 |
+
let next = points[(index + 1) % points.len()];
|
| 169 |
+
area += f64::from(current.x) * f64::from(next.y) - f64::from(next.x) * f64::from(current.y);
|
| 170 |
+
}
|
| 171 |
+
area.abs() * 0.5
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
fn draw_binary_border(image: &mut GrayImage) {
|
| 175 |
+
let width = image.width();
|
| 176 |
+
let height = image.height();
|
| 177 |
+
if width == 0 || height == 0 {
|
| 178 |
+
return;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
for x in 0..width {
|
| 182 |
+
image.put_pixel(x, 0, Luma([255]));
|
| 183 |
+
image.put_pixel(x, height - 1, Luma([255]));
|
| 184 |
+
}
|
| 185 |
+
for y in 0..height {
|
| 186 |
+
image.put_pixel(0, y, Luma([255]));
|
| 187 |
+
image.put_pixel(width - 1, y, Luma([255]));
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
fn subtract_binary_mask(image: &mut GrayImage, mask: &GrayImage) {
|
| 192 |
+
for (x, y, pixel) in image.enumerate_pixels_mut() {
|
| 193 |
+
if mask.get_pixel(x, y).0[0] > 0 {
|
| 194 |
+
pixel.0[0] = 0;
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
fn non_zero_bbox(mask: &GrayImage) -> Option<Xyxy> {
|
| 200 |
+
let (width, height) = mask.dimensions();
|
| 201 |
+
let mut min_x = width;
|
| 202 |
+
let mut min_y = height;
|
| 203 |
+
let mut max_x = 0;
|
| 204 |
+
let mut max_y = 0;
|
| 205 |
+
let mut found = false;
|
| 206 |
+
|
| 207 |
+
for (x, y, pixel) in mask.enumerate_pixels() {
|
| 208 |
+
if pixel.0[0] == 0 {
|
| 209 |
+
continue;
|
| 210 |
+
}
|
| 211 |
+
found = true;
|
| 212 |
+
min_x = min_x.min(x);
|
| 213 |
+
min_y = min_y.min(y);
|
| 214 |
+
max_x = max_x.max(x);
|
| 215 |
+
max_y = max_y.max(y);
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
found.then_some([
|
| 219 |
+
min_x,
|
| 220 |
+
min_y,
|
| 221 |
+
max_x.saturating_add(1),
|
| 222 |
+
max_y.saturating_add(1),
|
| 223 |
+
])
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
fn count_nonzero(mask: &GrayImage) -> u32 {
|
| 227 |
+
mask.pixels().filter(|pixel| pixel.0[0] > 0).count() as u32
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
fn count_overlap(left: &GrayImage, right: &GrayImage) -> u32 {
|
| 231 |
+
left.pixels()
|
| 232 |
+
.zip(right.pixels())
|
| 233 |
+
.filter(|(l, r)| l.0[0] > 0 && r.0[0] > 0)
|
| 234 |
+
.count() as u32
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
fn median_rgb(image: &RgbImage, mask: &GrayImage) -> Option<[f64; 3]> {
|
| 238 |
+
let mut channels = [Vec::new(), Vec::new(), Vec::new()];
|
| 239 |
+
for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
|
| 240 |
+
if mask_pixel.0[0] == 0 {
|
| 241 |
+
continue;
|
| 242 |
+
}
|
| 243 |
+
channels[0].push(pixel.0[0]);
|
| 244 |
+
channels[1].push(pixel.0[1]);
|
| 245 |
+
channels[2].push(pixel.0[2]);
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
Some([
|
| 249 |
+
median_channel(&channels[0])?,
|
| 250 |
+
median_channel(&channels[1])?,
|
| 251 |
+
median_channel(&channels[2])?,
|
| 252 |
+
])
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
fn median_channel(values: &[u8]) -> Option<f64> {
|
| 256 |
+
if values.is_empty() {
|
| 257 |
+
return None;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
let mut values = values.to_vec();
|
| 261 |
+
values.sort_unstable();
|
| 262 |
+
let mid = values.len() / 2;
|
| 263 |
+
if values.len().is_multiple_of(2) {
|
| 264 |
+
Some((f64::from(values[mid - 1]) + f64::from(values[mid])) / 2.0)
|
| 265 |
+
} else {
|
| 266 |
+
Some(f64::from(values[mid]))
|
| 267 |
+
}
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
fn color_stddev(image: &RgbImage, mask: &GrayImage, median: [f64; 3]) -> [f64; 3] {
|
| 271 |
+
let mut sum_sq = [0.0; 3];
|
| 272 |
+
let mut count = 0.0;
|
| 273 |
+
|
| 274 |
+
for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
|
| 275 |
+
if mask_pixel.0[0] == 0 {
|
| 276 |
+
continue;
|
| 277 |
+
}
|
| 278 |
+
count += 1.0;
|
| 279 |
+
for channel in 0..3 {
|
| 280 |
+
let diff = f64::from(pixel.0[channel]) - median[channel];
|
| 281 |
+
sum_sq[channel] += diff * diff;
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
if count == 0.0 {
|
| 286 |
+
return [f64::INFINITY; 3];
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
[
|
| 290 |
+
(sum_sq[0] / count).sqrt(),
|
| 291 |
+
(sum_sq[1] / count).sqrt(),
|
| 292 |
+
(sum_sq[2] / count).sqrt(),
|
| 293 |
+
]
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
fn stddev3(values: [f64; 3]) -> f64 {
|
| 297 |
+
let mean = values.iter().sum::<f64>() / 3.0;
|
| 298 |
+
let variance = values
|
| 299 |
+
.iter()
|
| 300 |
+
.map(|value| {
|
| 301 |
+
let diff = value - mean;
|
| 302 |
+
diff * diff
|
| 303 |
+
})
|
| 304 |
+
.sum::<f64>()
|
| 305 |
+
/ 3.0;
|
| 306 |
+
variance.sqrt()
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
#[cfg(test)]
|
| 310 |
+
mod tests {
|
| 311 |
+
use super::*;
|
| 312 |
+
use imageproc::drawing::draw_hollow_rect_mut;
|
| 313 |
+
use imageproc::rect::Rect;
|
| 314 |
+
|
| 315 |
+
#[test]
|
| 316 |
+
fn extract_balloon_mask_prefers_smallest_covering_contour() {
|
| 317 |
+
let mut image = RgbImage::from_pixel(80, 80, Rgb([255, 255, 255]));
|
| 318 |
+
draw_hollow_rect_mut(&mut image, Rect::at(4, 4).of_size(72, 72), Rgb([0, 0, 0]));
|
| 319 |
+
draw_hollow_rect_mut(&mut image, Rect::at(20, 20).of_size(28, 20), Rgb([0, 0, 0]));
|
| 320 |
+
|
| 321 |
+
let mut mask = GrayImage::new(80, 80);
|
| 322 |
+
for y in 24..36 {
|
| 323 |
+
for x in 24..44 {
|
| 324 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 325 |
+
}
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
let masks = extract_balloon_mask(&image, &mask).expect("balloon should be detected");
|
| 329 |
+
let balloon_pixels = count_nonzero(&masks.balloon_mask);
|
| 330 |
+
|
| 331 |
+
assert!(
|
| 332 |
+
balloon_pixels < 900,
|
| 333 |
+
"expected inner contour fill, got {balloon_pixels}"
|
| 334 |
+
);
|
| 335 |
+
assert!(
|
| 336 |
+
balloon_pixels > 250,
|
| 337 |
+
"expected meaningful bubble area, got {balloon_pixels}"
|
| 338 |
+
);
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
#[test]
|
| 342 |
+
fn simple_balloon_chooses_fill_but_textured_balloon_does_not() {
|
| 343 |
+
let mut flat = RgbImage::from_pixel(64, 64, Rgb([240, 240, 240]));
|
| 344 |
+
draw_hollow_rect_mut(&mut flat, Rect::at(8, 8).of_size(48, 32), Rgb([0, 0, 0]));
|
| 345 |
+
|
| 346 |
+
let mut mask = GrayImage::new(64, 64);
|
| 347 |
+
for y in 18..30 {
|
| 348 |
+
for x in 18..46 {
|
| 349 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
assert!(try_fill_balloon(&flat, &mask).is_some());
|
| 354 |
+
|
| 355 |
+
let mut textured = flat.clone();
|
| 356 |
+
for y in 9..39 {
|
| 357 |
+
for x in 9..55 {
|
| 358 |
+
let noise = ((x + y) % 23) as u8;
|
| 359 |
+
textured.put_pixel(
|
| 360 |
+
x,
|
| 361 |
+
y,
|
| 362 |
+
Rgb([200 + noise, 210 + (noise / 2), 220 - (noise / 3)]),
|
| 363 |
+
);
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
assert!(try_fill_balloon(&textured, &mask).is_none());
|
| 368 |
+
}
|
| 369 |
+
}
|
koharu-ml/src/{inpainting.rs → inpainting/mod.rs}
RENAMED
|
@@ -1,3 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
use image::{DynamicImage, GrayImage, Luma, RgbImage, Rgba, RgbaImage};
|
| 2 |
use imageproc::{distance_transform::Norm, morphology::dilate};
|
| 3 |
|
|
|
|
| 1 |
+
//! Shared inpainting infrastructure: alpha handling, mask prep, and the
|
| 2 |
+
//! HD-strategy dispatcher used by every erase model (Lama, AoT).
|
| 3 |
+
//!
|
| 4 |
+
//! The strategy dispatcher mirrors IOPaint's `InpaintModel.__call__`: one place
|
| 5 |
+
//! decides between Original / Resize / Crop based on image size and a
|
| 6 |
+
//! per-model config. Concrete models only implement the raw forward pass.
|
| 7 |
+
|
| 8 |
+
pub mod balloon;
|
| 9 |
+
pub mod strategy;
|
| 10 |
+
|
| 11 |
+
pub use balloon::try_fill_balloon;
|
| 12 |
+
pub use strategy::{HdStrategy, HdStrategyConfig, InpaintForward, run_inpaint};
|
| 13 |
+
|
| 14 |
use image::{DynamicImage, GrayImage, Luma, RgbImage, Rgba, RgbaImage};
|
| 15 |
use imageproc::{distance_transform::Norm, morphology::dilate};
|
| 16 |
|
koharu-ml/src/inpainting/strategy.rs
ADDED
|
@@ -0,0 +1,539 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! HD-strategy dispatcher for erase models.
|
| 2 |
+
//!
|
| 3 |
+
//! Mirrors IOPaint's `InpaintModel.__call__` (`iopaint/model/base.py`): one
|
| 4 |
+
//! entry point chooses between Original / Resize / Crop based on image size,
|
| 5 |
+
//! then delegates the raw forward to a model-specific [`InpaintForward`].
|
| 6 |
+
//!
|
| 7 |
+
//! ## Strategies
|
| 8 |
+
//!
|
| 9 |
+
//! - **Original** — pad to `pad_mod`, forward, unpad. Highest VRAM.
|
| 10 |
+
//! - **Resize** — downscale so `max(h,w) <= resize_limit`, pad, forward, unpad,
|
| 11 |
+
//! upscale, then restore pixels outside the mask from the original. Medium
|
| 12 |
+
//! VRAM, preserves quality outside the mask.
|
| 13 |
+
//! - **Crop** — extract one bounding box per connected mask contour, expand by
|
| 14 |
+
//! `crop_margin` on each side, forward each crop independently, paste back.
|
| 15 |
+
//! Lowest VRAM. Default for manga (many small speech bubbles).
|
| 16 |
+
//!
|
| 17 |
+
//! The Crop path uses [`pad_forward_bounded`] per crop, so an oversized crop
|
| 18 |
+
//! (e.g. a brush stroke covering most of a page) falls back to the Resize path
|
| 19 |
+
//! inside that single crop. No `HdStrategy` ever OOMs on a reasonable GPU
|
| 20 |
+
//! provided `resize_limit` is within VRAM budget.
|
| 21 |
+
//!
|
| 22 |
+
//! Mask boxes come from `imageproc::contours::find_contours` on the binarized
|
| 23 |
+
//! mask — equivalent to OpenCV's `cv2.findContours(RETR_EXTERNAL)` that IOPaint
|
| 24 |
+
//! uses. Only `BorderType::Outer` contours become boxes (holes are ignored).
|
| 25 |
+
|
| 26 |
+
use anyhow::Result;
|
| 27 |
+
use image::{
|
| 28 |
+
GrayImage, RgbImage,
|
| 29 |
+
imageops::{FilterType, crop_imm, replace, resize},
|
| 30 |
+
};
|
| 31 |
+
use imageproc::contours::{BorderType, find_contours};
|
| 32 |
+
|
| 33 |
+
/// Which preprocessing strategy to apply before the raw forward. See the
|
| 34 |
+
/// module docs for the semantics of each variant.
|
| 35 |
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
| 36 |
+
pub enum HdStrategy {
|
| 37 |
+
Original,
|
| 38 |
+
Resize,
|
| 39 |
+
Crop,
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
/// Tunable knobs for [`run_inpaint`]. Defaults match IOPaint
|
| 43 |
+
/// (`iopaint/schema.py` — trigger 800, margin 128, resize limit 1280).
|
| 44 |
+
#[derive(Debug, Clone, Copy)]
|
| 45 |
+
pub struct HdStrategyConfig {
|
| 46 |
+
pub strategy: HdStrategy,
|
| 47 |
+
/// Crop strategy only activates when `max(image.w, image.h) >
|
| 48 |
+
/// crop_trigger_size`. Smaller images fall through to Original.
|
| 49 |
+
pub crop_trigger_size: u32,
|
| 50 |
+
/// Additive margin (pixels) added to each side of a mask bounding box when
|
| 51 |
+
/// cropping. Controls how much context the model sees around the mask.
|
| 52 |
+
pub crop_margin: u32,
|
| 53 |
+
/// Hard ceiling on the forward's longer side. Applied by Resize strategy at
|
| 54 |
+
/// the top level, and as a nested fallback inside oversized crops.
|
| 55 |
+
pub resize_limit: u32,
|
| 56 |
+
/// Model-required spatial divisor. LaMa / AoT both need 8; larger for
|
| 57 |
+
/// models with deeper downsampling.
|
| 58 |
+
pub pad_mod: u32,
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
impl HdStrategyConfig {
|
| 62 |
+
/// Manga-tuned default for Lama: Crop strategy with IOPaint's defaults.
|
| 63 |
+
/// Many small speech bubbles → many small per-bubble crops → trivial VRAM.
|
| 64 |
+
pub const fn lama_default() -> Self {
|
| 65 |
+
Self {
|
| 66 |
+
strategy: HdStrategy::Crop,
|
| 67 |
+
crop_trigger_size: 800,
|
| 68 |
+
crop_margin: 128,
|
| 69 |
+
resize_limit: 1280,
|
| 70 |
+
pad_mod: 8,
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/// Default for AoT: whole-image Resize with a fixed upper bound (AoT's
|
| 75 |
+
/// upstream config calls this `default_max_side`).
|
| 76 |
+
pub const fn aot_default(resize_limit: u32, pad_mod: u32) -> Self {
|
| 77 |
+
Self {
|
| 78 |
+
strategy: HdStrategy::Resize,
|
| 79 |
+
crop_trigger_size: 800,
|
| 80 |
+
crop_margin: 128,
|
| 81 |
+
resize_limit,
|
| 82 |
+
pad_mod,
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/// `[x1, y1, x2, y2]` half-open rectangle: `x1,y1` inclusive, `x2,y2` exclusive.
|
| 88 |
+
pub type Xyxy = [u32; 4];
|
| 89 |
+
|
| 90 |
+
/// A raw forward pass on a (padded) image + mask, returning an image of the
|
| 91 |
+
/// same spatial size. Implementors are free to apply fast paths (e.g. Lama's
|
| 92 |
+
/// balloon-fill shortcut) before the model forward.
|
| 93 |
+
pub trait InpaintForward {
|
| 94 |
+
fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage>;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
/// Entry point: dispatch on `cfg.strategy` and return an RGB image with the
|
| 98 |
+
/// masked region inpainted. `mask` must already be binarized (0 or 255).
|
| 99 |
+
pub fn run_inpaint<F: InpaintForward>(
|
| 100 |
+
model: &F,
|
| 101 |
+
image: &RgbImage,
|
| 102 |
+
mask: &GrayImage,
|
| 103 |
+
cfg: &HdStrategyConfig,
|
| 104 |
+
) -> Result<RgbImage> {
|
| 105 |
+
assert_eq!(image.dimensions(), mask.dimensions());
|
| 106 |
+
let max_side = image.width().max(image.height());
|
| 107 |
+
|
| 108 |
+
match cfg.strategy {
|
| 109 |
+
HdStrategy::Crop if max_side > cfg.crop_trigger_size => run_crop(model, image, mask, cfg),
|
| 110 |
+
HdStrategy::Resize if max_side > cfg.resize_limit => run_resize(model, image, mask, cfg),
|
| 111 |
+
_ => pad_forward(model, image, mask, cfg.pad_mod),
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
fn run_crop<F: InpaintForward>(
|
| 116 |
+
model: &F,
|
| 117 |
+
image: &RgbImage,
|
| 118 |
+
mask: &GrayImage,
|
| 119 |
+
cfg: &HdStrategyConfig,
|
| 120 |
+
) -> Result<RgbImage> {
|
| 121 |
+
let boxes = boxes_from_mask(mask);
|
| 122 |
+
if boxes.is_empty() {
|
| 123 |
+
return Ok(image.clone());
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
tracing::debug!(
|
| 127 |
+
count = boxes.len(),
|
| 128 |
+
"inpaint crop strategy: one forward per mask contour"
|
| 129 |
+
);
|
| 130 |
+
|
| 131 |
+
let mut out = image.clone();
|
| 132 |
+
for b in boxes {
|
| 133 |
+
let (crop_img, crop_mask, [l, t, _r, _bt]) = crop_box(image, mask, b, cfg.crop_margin);
|
| 134 |
+
let crop_result = pad_forward_bounded(model, &crop_img, &crop_mask, cfg)?;
|
| 135 |
+
replace(&mut out, &crop_result, i64::from(l), i64::from(t));
|
| 136 |
+
}
|
| 137 |
+
Ok(out)
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
fn run_resize<F: InpaintForward>(
|
| 141 |
+
model: &F,
|
| 142 |
+
image: &RgbImage,
|
| 143 |
+
mask: &GrayImage,
|
| 144 |
+
cfg: &HdStrategyConfig,
|
| 145 |
+
) -> Result<RgbImage> {
|
| 146 |
+
let (w, h) = image.dimensions();
|
| 147 |
+
let (nw, nh) = scaled_dims(w, h, cfg.resize_limit);
|
| 148 |
+
tracing::debug!(
|
| 149 |
+
from_w = w,
|
| 150 |
+
from_h = h,
|
| 151 |
+
to_w = nw,
|
| 152 |
+
to_h = nh,
|
| 153 |
+
"inpaint resize strategy"
|
| 154 |
+
);
|
| 155 |
+
|
| 156 |
+
let small_img = resize(image, nw, nh, FilterType::Triangle);
|
| 157 |
+
let small_mask = rebinarize(&resize(mask, nw, nh, FilterType::Triangle));
|
| 158 |
+
|
| 159 |
+
let small_out = pad_forward(model, &small_img, &small_mask, cfg.pad_mod)?;
|
| 160 |
+
let full_out = resize(&small_out, w, h, FilterType::CatmullRom);
|
| 161 |
+
|
| 162 |
+
// Restore untouched pixels from the original so Resize only loses quality
|
| 163 |
+
// where we actually inpainted. Matches IOPaint's
|
| 164 |
+
// `original_pixel_indices = mask < 127`.
|
| 165 |
+
let mut out = full_out;
|
| 166 |
+
for y in 0..h {
|
| 167 |
+
for x in 0..w {
|
| 168 |
+
if mask.get_pixel(x, y).0[0] < 127 {
|
| 169 |
+
out.put_pixel(x, y, *image.get_pixel(x, y));
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
Ok(out)
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
/// `pad_forward` with a nested Resize fallback when the input exceeds
|
| 177 |
+
/// `resize_limit`. Used inside the Crop loop so oversized crops don't OOM.
|
| 178 |
+
fn pad_forward_bounded<F: InpaintForward>(
|
| 179 |
+
model: &F,
|
| 180 |
+
image: &RgbImage,
|
| 181 |
+
mask: &GrayImage,
|
| 182 |
+
cfg: &HdStrategyConfig,
|
| 183 |
+
) -> Result<RgbImage> {
|
| 184 |
+
if image.width().max(image.height()) > cfg.resize_limit {
|
| 185 |
+
run_resize(model, image, mask, cfg)
|
| 186 |
+
} else {
|
| 187 |
+
pad_forward(model, image, mask, cfg.pad_mod)
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
/// Pad both tensors to `pad_mod` on right/bottom with symmetric reflection,
|
| 192 |
+
/// forward through the model, then crop the output back to the input size.
|
| 193 |
+
/// Matches IOPaint's `_pad_forward` / `pad_img_to_modulo`.
|
| 194 |
+
fn pad_forward<F: InpaintForward>(
|
| 195 |
+
model: &F,
|
| 196 |
+
image: &RgbImage,
|
| 197 |
+
mask: &GrayImage,
|
| 198 |
+
pad_mod: u32,
|
| 199 |
+
) -> Result<RgbImage> {
|
| 200 |
+
let (w, h) = image.dimensions();
|
| 201 |
+
let pad_w = ceil_multiple(w, pad_mod);
|
| 202 |
+
let pad_h = ceil_multiple(h, pad_mod);
|
| 203 |
+
|
| 204 |
+
let out = if pad_w == w && pad_h == h {
|
| 205 |
+
model.forward(image, mask)?
|
| 206 |
+
} else {
|
| 207 |
+
let pad_img = symmetric_pad_rgb(image, pad_w, pad_h);
|
| 208 |
+
let pad_msk = symmetric_pad_gray(mask, pad_w, pad_h);
|
| 209 |
+
let padded_out = model.forward(&pad_img, &pad_msk)?;
|
| 210 |
+
crop_imm(&padded_out, 0, 0, w, h).to_image()
|
| 211 |
+
};
|
| 212 |
+
Ok(out)
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
/// External-contour bounding boxes of a binarized mask. Equivalent to
|
| 216 |
+
/// IOPaint's `boxes_from_mask` (`cv2.findContours(RETR_EXTERNAL)` +
|
| 217 |
+
/// `cv2.boundingRect`). Hole borders are discarded.
|
| 218 |
+
pub fn boxes_from_mask(mask: &GrayImage) -> Vec<Xyxy> {
|
| 219 |
+
let contours = find_contours::<i32>(mask);
|
| 220 |
+
let (mw, mh) = mask.dimensions();
|
| 221 |
+
let mut boxes = Vec::new();
|
| 222 |
+
for contour in contours {
|
| 223 |
+
if contour.border_type != BorderType::Outer || contour.points.is_empty() {
|
| 224 |
+
continue;
|
| 225 |
+
}
|
| 226 |
+
let mut min_x = i32::MAX;
|
| 227 |
+
let mut min_y = i32::MAX;
|
| 228 |
+
let mut max_x = i32::MIN;
|
| 229 |
+
let mut max_y = i32::MIN;
|
| 230 |
+
for p in &contour.points {
|
| 231 |
+
min_x = min_x.min(p.x);
|
| 232 |
+
min_y = min_y.min(p.y);
|
| 233 |
+
max_x = max_x.max(p.x);
|
| 234 |
+
max_y = max_y.max(p.y);
|
| 235 |
+
}
|
| 236 |
+
let x1 = (min_x.max(0) as u32).min(mw);
|
| 237 |
+
let y1 = (min_y.max(0) as u32).min(mh);
|
| 238 |
+
let x2 = (max_x.saturating_add(1).max(0) as u32).min(mw);
|
| 239 |
+
let y2 = (max_y.saturating_add(1).max(0) as u32).min(mh);
|
| 240 |
+
if x2 > x1 && y2 > y1 {
|
| 241 |
+
boxes.push([x1, y1, x2, y2]);
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
boxes
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
/// Expand `box_xyxy` by `margin` pixels on each side, clamped to the image.
|
| 248 |
+
/// When the expanded rect would overflow one edge, shift inward so the full
|
| 249 |
+
/// `(box + margin*2)` footprint still fits when possible — matches IOPaint's
|
| 250 |
+
/// `_crop_box` (`iopaint/model/base.py`).
|
| 251 |
+
pub fn crop_box(
|
| 252 |
+
image: &RgbImage,
|
| 253 |
+
mask: &GrayImage,
|
| 254 |
+
box_xyxy: Xyxy,
|
| 255 |
+
margin: u32,
|
| 256 |
+
) -> (RgbImage, GrayImage, Xyxy) {
|
| 257 |
+
let [bx1, by1, bx2, by2] = box_xyxy;
|
| 258 |
+
let (img_w, img_h) = image.dimensions();
|
| 259 |
+
let cx = (bx1 + bx2) / 2;
|
| 260 |
+
let cy = (by1 + by2) / 2;
|
| 261 |
+
let want_w = (bx2 - bx1) + margin * 2;
|
| 262 |
+
let want_h = (by2 - by1) + margin * 2;
|
| 263 |
+
let half_w = want_w / 2;
|
| 264 |
+
let half_h = want_h / 2;
|
| 265 |
+
|
| 266 |
+
// Signed desired bounds before clamping (i64 to preserve negatives).
|
| 267 |
+
let desire_l = cx as i64 - half_w as i64;
|
| 268 |
+
let desire_r = cx as i64 + half_w as i64;
|
| 269 |
+
let desire_t = cy as i64 - half_h as i64;
|
| 270 |
+
let desire_b = cy as i64 + half_h as i64;
|
| 271 |
+
|
| 272 |
+
let img_w_i = img_w as i64;
|
| 273 |
+
let img_h_i = img_h as i64;
|
| 274 |
+
|
| 275 |
+
let mut l = desire_l.max(0);
|
| 276 |
+
let mut r = desire_r.min(img_w_i);
|
| 277 |
+
let mut t = desire_t.max(0);
|
| 278 |
+
let mut b = desire_b.min(img_h_i);
|
| 279 |
+
|
| 280 |
+
if desire_l < 0 {
|
| 281 |
+
r = (r - desire_l).min(img_w_i);
|
| 282 |
+
}
|
| 283 |
+
if desire_r > img_w_i {
|
| 284 |
+
l = (l - (desire_r - img_w_i)).max(0);
|
| 285 |
+
}
|
| 286 |
+
if desire_t < 0 {
|
| 287 |
+
b = (b - desire_t).min(img_h_i);
|
| 288 |
+
}
|
| 289 |
+
if desire_b > img_h_i {
|
| 290 |
+
t = (t - (desire_b - img_h_i)).max(0);
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
let l = l.clamp(0, img_w_i) as u32;
|
| 294 |
+
let r = r.clamp(0, img_w_i) as u32;
|
| 295 |
+
let t = t.clamp(0, img_h_i) as u32;
|
| 296 |
+
let b = b.clamp(0, img_h_i) as u32;
|
| 297 |
+
let r = r.max(l + 1).min(img_w);
|
| 298 |
+
let b = b.max(t + 1).min(img_h);
|
| 299 |
+
|
| 300 |
+
let cw = r - l;
|
| 301 |
+
let ch = b - t;
|
| 302 |
+
let crop_img = crop_imm(image, l, t, cw, ch).to_image();
|
| 303 |
+
let crop_mask = crop_imm(mask, l, t, cw, ch).to_image();
|
| 304 |
+
(crop_img, crop_mask, [l, t, r, b])
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
/// Scale `(w, h)` so `max(w, h) == max_side`, preserving aspect ratio. No-op
|
| 308 |
+
/// when the image already fits. Mirrors IOPaint's `resize_max_size`.
|
| 309 |
+
pub fn scaled_dims(w: u32, h: u32, max_side: u32) -> (u32, u32) {
|
| 310 |
+
let longer = w.max(h);
|
| 311 |
+
if longer <= max_side {
|
| 312 |
+
return (w, h);
|
| 313 |
+
}
|
| 314 |
+
let ratio = f64::from(max_side) / f64::from(longer);
|
| 315 |
+
let nw = ((f64::from(w) * ratio).round() as u32).max(1);
|
| 316 |
+
let nh = ((f64::from(h) * ratio).round() as u32).max(1);
|
| 317 |
+
(nw, nh)
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
fn ceil_multiple(v: u32, m: u32) -> u32 {
|
| 321 |
+
if m == 0 {
|
| 322 |
+
return v;
|
| 323 |
+
}
|
| 324 |
+
let r = v % m;
|
| 325 |
+
if r == 0 { v } else { v + (m - r) }
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
fn rebinarize(mask: &GrayImage) -> GrayImage {
|
| 329 |
+
let mut out = mask.clone();
|
| 330 |
+
for p in out.pixels_mut() {
|
| 331 |
+
p.0[0] = if p.0[0] > 127 { 255 } else { 0 };
|
| 332 |
+
}
|
| 333 |
+
out
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
/// Numpy-style `mode="symmetric"` padding, but only on the right/bottom edges
|
| 337 |
+
/// (we only ever pad up to `pad_mod - 1` pixels to reach a modulo boundary).
|
| 338 |
+
fn symmetric_pad_rgb(img: &RgbImage, new_w: u32, new_h: u32) -> RgbImage {
|
| 339 |
+
let (w, h) = img.dimensions();
|
| 340 |
+
if new_w == w && new_h == h {
|
| 341 |
+
return img.clone();
|
| 342 |
+
}
|
| 343 |
+
let mut out = RgbImage::new(new_w, new_h);
|
| 344 |
+
for y in 0..new_h {
|
| 345 |
+
let sy = reflect_index(y, h);
|
| 346 |
+
for x in 0..new_w {
|
| 347 |
+
let sx = reflect_index(x, w);
|
| 348 |
+
out.put_pixel(x, y, *img.get_pixel(sx, sy));
|
| 349 |
+
}
|
| 350 |
+
}
|
| 351 |
+
out
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
fn symmetric_pad_gray(img: &GrayImage, new_w: u32, new_h: u32) -> GrayImage {
|
| 355 |
+
let (w, h) = img.dimensions();
|
| 356 |
+
if new_w == w && new_h == h {
|
| 357 |
+
return img.clone();
|
| 358 |
+
}
|
| 359 |
+
let mut out = GrayImage::new(new_w, new_h);
|
| 360 |
+
for y in 0..new_h {
|
| 361 |
+
let sy = reflect_index(y, h);
|
| 362 |
+
for x in 0..new_w {
|
| 363 |
+
let sx = reflect_index(x, w);
|
| 364 |
+
out.put_pixel(x, y, *img.get_pixel(sx, sy));
|
| 365 |
+
}
|
| 366 |
+
}
|
| 367 |
+
out
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
/// Reflect index for symmetric padding: `[0..len-1]` maps to itself, `[len..]`
|
| 371 |
+
/// reflects. Padding is always less than `len` for our use (right/bottom only,
|
| 372 |
+
/// by `pad_mod - 1` pixels max).
|
| 373 |
+
fn reflect_index(i: u32, len: u32) -> u32 {
|
| 374 |
+
if len == 0 {
|
| 375 |
+
return 0;
|
| 376 |
+
}
|
| 377 |
+
if i < len {
|
| 378 |
+
return i;
|
| 379 |
+
}
|
| 380 |
+
let past = i - len;
|
| 381 |
+
if past < len {
|
| 382 |
+
len - 1 - past
|
| 383 |
+
} else {
|
| 384 |
+
past % len
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
#[cfg(test)]
|
| 389 |
+
mod tests {
|
| 390 |
+
use super::*;
|
| 391 |
+
use image::{Luma, Rgb};
|
| 392 |
+
|
| 393 |
+
fn solid_rgb(w: u32, h: u32, rgb: [u8; 3]) -> RgbImage {
|
| 394 |
+
RgbImage::from_pixel(w, h, Rgb(rgb))
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
struct IdentityForward;
|
| 398 |
+
impl InpaintForward for IdentityForward {
|
| 399 |
+
fn forward(&self, image: &RgbImage, _mask: &GrayImage) -> Result<RgbImage> {
|
| 400 |
+
Ok(image.clone())
|
| 401 |
+
}
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
#[test]
|
| 405 |
+
fn ceil_multiple_rounds_up() {
|
| 406 |
+
assert_eq!(ceil_multiple(8, 8), 8);
|
| 407 |
+
assert_eq!(ceil_multiple(9, 8), 16);
|
| 408 |
+
assert_eq!(ceil_multiple(0, 8), 0);
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
#[test]
|
| 412 |
+
fn reflect_index_mirrors_beyond_boundary() {
|
| 413 |
+
// len=5 → symmetric pads: [..., 2, 1, 0, 1, 2, 3, 4, 4, 3, 2, ...]
|
| 414 |
+
// but our padding is right-side only so we only care about i >= len:
|
| 415 |
+
assert_eq!(reflect_index(0, 5), 0);
|
| 416 |
+
assert_eq!(reflect_index(4, 5), 4);
|
| 417 |
+
assert_eq!(reflect_index(5, 5), 4);
|
| 418 |
+
assert_eq!(reflect_index(6, 5), 3);
|
| 419 |
+
assert_eq!(reflect_index(9, 5), 0);
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
#[test]
|
| 423 |
+
fn scaled_dims_preserves_aspect() {
|
| 424 |
+
assert_eq!(scaled_dims(1600, 900, 1280), (1280, 720));
|
| 425 |
+
assert_eq!(scaled_dims(800, 600, 1280), (800, 600));
|
| 426 |
+
assert_eq!(scaled_dims(1000, 2000, 1280), (640, 1280));
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
#[test]
|
| 430 |
+
fn boxes_from_mask_finds_each_contour() {
|
| 431 |
+
let mut mask = GrayImage::new(100, 100);
|
| 432 |
+
for y in 10..20 {
|
| 433 |
+
for x in 10..25 {
|
| 434 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 435 |
+
}
|
| 436 |
+
}
|
| 437 |
+
for y in 50..60 {
|
| 438 |
+
for x in 70..80 {
|
| 439 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 440 |
+
}
|
| 441 |
+
}
|
| 442 |
+
let boxes = boxes_from_mask(&mask);
|
| 443 |
+
assert_eq!(boxes.len(), 2);
|
| 444 |
+
let mut sorted = boxes;
|
| 445 |
+
sorted.sort_by_key(|b| b[0]);
|
| 446 |
+
assert_eq!(sorted[0], [10, 10, 25, 20]);
|
| 447 |
+
assert_eq!(sorted[1], [70, 50, 80, 60]);
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
#[test]
|
| 451 |
+
fn boxes_from_mask_ignores_holes() {
|
| 452 |
+
// Filled rectangle with a hole in the middle.
|
| 453 |
+
let mut mask = GrayImage::new(50, 50);
|
| 454 |
+
for y in 5..45 {
|
| 455 |
+
for x in 5..45 {
|
| 456 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 457 |
+
}
|
| 458 |
+
}
|
| 459 |
+
for y in 20..30 {
|
| 460 |
+
for x in 20..30 {
|
| 461 |
+
mask.put_pixel(x, y, Luma([0]));
|
| 462 |
+
}
|
| 463 |
+
}
|
| 464 |
+
let boxes = boxes_from_mask(&mask);
|
| 465 |
+
assert_eq!(boxes.len(), 1, "hole must not produce a second box");
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
#[test]
|
| 469 |
+
fn crop_box_expands_by_margin_additively() {
|
| 470 |
+
let img = solid_rgb(200, 200, [255, 255, 255]);
|
| 471 |
+
let mask = GrayImage::new(200, 200);
|
| 472 |
+
let (ci, _cm, [l, t, r, b]) = crop_box(&img, &mask, [80, 80, 120, 120], 20);
|
| 473 |
+
assert_eq!([l, t, r, b], [60, 60, 140, 140]);
|
| 474 |
+
assert_eq!(ci.dimensions(), (80, 80));
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
#[test]
|
| 478 |
+
fn crop_box_shifts_inward_at_edges() {
|
| 479 |
+
let img = solid_rgb(100, 100, [255, 255, 255]);
|
| 480 |
+
let mask = GrayImage::new(100, 100);
|
| 481 |
+
// Box hugging the left edge — desired crop starts at -10, so we shift
|
| 482 |
+
// the right edge outward to keep the full (box + margin*2) width.
|
| 483 |
+
let (_ci, _cm, [l, t, r, b]) = crop_box(&img, &mask, [0, 40, 20, 60], 10);
|
| 484 |
+
assert_eq!(l, 0);
|
| 485 |
+
assert_eq!(r, 40);
|
| 486 |
+
assert_eq!(t, 30);
|
| 487 |
+
assert_eq!(b, 70);
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
#[test]
|
| 491 |
+
fn crop_strategy_skips_when_mask_empty() {
|
| 492 |
+
let img = solid_rgb(900, 900, [50, 60, 70]);
|
| 493 |
+
let mask = GrayImage::new(900, 900);
|
| 494 |
+
let cfg = HdStrategyConfig::lama_default();
|
| 495 |
+
let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
|
| 496 |
+
assert_eq!(out.get_pixel(0, 0).0, [50, 60, 70]);
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
#[test]
|
| 500 |
+
fn resize_strategy_restores_unmasked_pixels() {
|
| 501 |
+
// Small image → even under Resize, unmasked pixels must be identical.
|
| 502 |
+
let mut img = solid_rgb(1600, 1200, [10, 20, 30]);
|
| 503 |
+
// One pixel in the masked area, different value.
|
| 504 |
+
img.put_pixel(500, 500, Rgb([200, 200, 200]));
|
| 505 |
+
let mut mask = GrayImage::new(1600, 1200);
|
| 506 |
+
mask.put_pixel(500, 500, Luma([255]));
|
| 507 |
+
|
| 508 |
+
let cfg = HdStrategyConfig {
|
| 509 |
+
strategy: HdStrategy::Resize,
|
| 510 |
+
resize_limit: 640,
|
| 511 |
+
..HdStrategyConfig::lama_default()
|
| 512 |
+
};
|
| 513 |
+
let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
|
| 514 |
+
assert_eq!(out.get_pixel(0, 0).0, [10, 20, 30]);
|
| 515 |
+
assert_eq!(out.get_pixel(1599, 1199).0, [10, 20, 30]);
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
#[test]
|
| 519 |
+
fn crop_strategy_paste_bounds() {
|
| 520 |
+
// Two masked blobs → two crops → full image untouched outside crops.
|
| 521 |
+
let img = solid_rgb(1200, 1200, [100, 100, 100]);
|
| 522 |
+
let mut mask = GrayImage::new(1200, 1200);
|
| 523 |
+
for y in 100..120 {
|
| 524 |
+
for x in 100..120 {
|
| 525 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 526 |
+
}
|
| 527 |
+
}
|
| 528 |
+
for y in 900..920 {
|
| 529 |
+
for x in 900..920 {
|
| 530 |
+
mask.put_pixel(x, y, Luma([255]));
|
| 531 |
+
}
|
| 532 |
+
}
|
| 533 |
+
let cfg = HdStrategyConfig::lama_default();
|
| 534 |
+
let out = run_inpaint(&IdentityForward, &img, &mask, &cfg).unwrap();
|
| 535 |
+
// IdentityForward is a no-op, so output == input everywhere.
|
| 536 |
+
assert_eq!(out.get_pixel(0, 0).0, [100, 100, 100]);
|
| 537 |
+
assert_eq!(out.get_pixel(500, 500).0, [100, 100, 100]);
|
| 538 |
+
}
|
| 539 |
+
}
|
koharu-ml/src/lama/mod.rs
CHANGED
|
@@ -1,23 +1,18 @@
|
|
| 1 |
mod fft;
|
| 2 |
mod model;
|
| 3 |
|
| 4 |
-
use crate::types::TextRegion;
|
| 5 |
use anyhow::{Result, bail};
|
| 6 |
use candle_core::{DType, Device, Tensor};
|
| 7 |
-
use image::{
|
| 8 |
-
DynamicImage, GenericImageView, GrayImage, Luma, Rgb, RgbImage,
|
| 9 |
-
imageops::{crop_imm, replace},
|
| 10 |
-
};
|
| 11 |
-
use imageproc::{
|
| 12 |
-
contours::find_contours, distance_transform::Norm, drawing::draw_polygon_mut, edges::canny,
|
| 13 |
-
filter::gaussian_blur_f32, morphology::dilate, point::Point,
|
| 14 |
-
};
|
| 15 |
use koharu_runtime::RuntimeManager;
|
| 16 |
use tracing::instrument;
|
| 17 |
|
| 18 |
use crate::{
|
| 19 |
device,
|
| 20 |
-
inpainting::{
|
|
|
|
|
|
|
|
|
|
| 21 |
loading,
|
| 22 |
};
|
| 23 |
|
|
@@ -31,20 +26,6 @@ koharu_runtime::declare_hf_model_package!(
|
|
| 31 |
order: 130,
|
| 32 |
);
|
| 33 |
|
| 34 |
-
const BALLOON_CANNY_LOW: f32 = 70.0;
|
| 35 |
-
const BALLOON_CANNY_HIGH: f32 = 140.0;
|
| 36 |
-
const BALLOON_WINDOW_RATIO: f64 = 1.7;
|
| 37 |
-
const BALLOON_WINDOW_ASPECT_RATIO: f64 = 1.0;
|
| 38 |
-
const SIMPLE_BG_THRESHOLD_LOW_VARIANCE: f64 = 10.0;
|
| 39 |
-
const SIMPLE_BG_THRESHOLD_HIGH_VARIANCE: f64 = 7.0;
|
| 40 |
-
const SIMPLE_BG_CHANNEL_STD_SWITCH: f64 = 1.0;
|
| 41 |
-
type Xyxy = [u32; 4];
|
| 42 |
-
|
| 43 |
-
struct BalloonMasks {
|
| 44 |
-
balloon_mask: GrayImage,
|
| 45 |
-
non_text_mask: GrayImage,
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
pub struct Lama {
|
| 49 |
model: model::Lama,
|
| 50 |
device: Device,
|
|
@@ -64,33 +45,21 @@ impl Lama {
|
|
| 64 |
Ok(Self { model, device })
|
| 65 |
}
|
| 66 |
|
| 67 |
-
|
| 68 |
-
fn forward(&self, image: &Tensor, mask: &Tensor) -> Result<Tensor> {
|
| 69 |
-
self.model.forward(image, mask)
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
#[instrument(level = "debug", skip_all)]
|
| 73 |
-
pub fn inference_model(
|
| 74 |
-
&self,
|
| 75 |
-
image: &DynamicImage,
|
| 76 |
-
mask: &DynamicImage,
|
| 77 |
-
) -> Result<DynamicImage> {
|
| 78 |
-
let (image_tensor, mask_tensor) = self.preprocess(image, mask)?;
|
| 79 |
-
let output = self.forward(&image_tensor, &mask_tensor)?;
|
| 80 |
-
self.postprocess(&output)
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
#[instrument(level = "debug", skip_all)]
|
| 84 |
pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
|
| 85 |
-
self.
|
| 86 |
}
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
#[instrument(level = "debug", skip_all)]
|
| 89 |
-
pub fn
|
| 90 |
&self,
|
| 91 |
image: &DynamicImage,
|
| 92 |
mask: &DynamicImage,
|
| 93 |
-
|
| 94 |
) -> Result<DynamicImage> {
|
| 95 |
if image.dimensions() != mask.dimensions() {
|
| 96 |
bail!(
|
|
@@ -101,12 +70,9 @@ impl Lama {
|
|
| 101 |
}
|
| 102 |
|
| 103 |
let binary_mask = binarize_mask(mask);
|
| 104 |
-
let
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
} else {
|
| 108 |
-
self.inference_crop(&image.to_rgb8(), &binary_mask)?
|
| 109 |
-
};
|
| 110 |
|
| 111 |
if image.color().has_alpha() {
|
| 112 |
let original_alpha = image.to_rgba8();
|
|
@@ -119,90 +85,22 @@ impl Lama {
|
|
| 119 |
}
|
| 120 |
|
| 121 |
#[instrument(level = "debug", skip_all)]
|
| 122 |
-
fn
|
| 123 |
-
|
| 124 |
-
return Ok(filled);
|
| 125 |
-
}
|
| 126 |
-
|
| 127 |
-
self.inference_model_rgb(image, mask)
|
| 128 |
-
}
|
| 129 |
-
|
| 130 |
-
#[instrument(level = "debug", skip_all)]
|
| 131 |
-
fn inference_blockwise(
|
| 132 |
-
&self,
|
| 133 |
-
image: &RgbImage,
|
| 134 |
-
mask: &GrayImage,
|
| 135 |
-
text_blocks: &[TextRegion],
|
| 136 |
-
) -> Result<RgbImage> {
|
| 137 |
-
let (im_w, im_h) = image.dimensions();
|
| 138 |
-
let mut inpainted = image.clone();
|
| 139 |
-
let mut working_mask = mask.clone();
|
| 140 |
-
|
| 141 |
-
for block in text_blocks {
|
| 142 |
-
let Some(xyxy) = block_xyxy(block, im_w, im_h) else {
|
| 143 |
-
continue;
|
| 144 |
-
};
|
| 145 |
-
let xyxy_e = enlarge_window(
|
| 146 |
-
xyxy,
|
| 147 |
-
im_w,
|
| 148 |
-
im_h,
|
| 149 |
-
BALLOON_WINDOW_RATIO,
|
| 150 |
-
BALLOON_WINDOW_ASPECT_RATIO,
|
| 151 |
-
);
|
| 152 |
-
let crop_width = xyxy_e[2].saturating_sub(xyxy_e[0]);
|
| 153 |
-
let crop_height = xyxy_e[3].saturating_sub(xyxy_e[1]);
|
| 154 |
-
if crop_width == 0 || crop_height == 0 {
|
| 155 |
-
continue;
|
| 156 |
-
}
|
| 157 |
-
|
| 158 |
-
let crop_image =
|
| 159 |
-
crop_imm(&inpainted, xyxy_e[0], xyxy_e[1], crop_width, crop_height).to_image();
|
| 160 |
-
let crop_mask =
|
| 161 |
-
crop_imm(&working_mask, xyxy_e[0], xyxy_e[1], crop_width, crop_height).to_image();
|
| 162 |
-
|
| 163 |
-
let output = if count_nonzero(&crop_mask) == 0 {
|
| 164 |
-
crop_image
|
| 165 |
-
} else if let Some(filled) = try_fill_balloon(&crop_image, &crop_mask) {
|
| 166 |
-
filled
|
| 167 |
-
} else {
|
| 168 |
-
self.inference_model_rgb(&crop_image, &crop_mask)?
|
| 169 |
-
};
|
| 170 |
-
|
| 171 |
-
replace(
|
| 172 |
-
&mut inpainted,
|
| 173 |
-
&output,
|
| 174 |
-
i64::from(xyxy_e[0]),
|
| 175 |
-
i64::from(xyxy_e[1]),
|
| 176 |
-
);
|
| 177 |
-
clear_mask_bbox(&mut working_mask, xyxy);
|
| 178 |
-
}
|
| 179 |
-
|
| 180 |
-
Ok(inpainted)
|
| 181 |
}
|
| 182 |
|
| 183 |
#[instrument(level = "debug", skip_all)]
|
| 184 |
-
fn
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
&DynamicImage::ImageLuma8(mask.clone()),
|
| 189 |
-
)?
|
| 190 |
-
.to_rgb8())
|
| 191 |
}
|
| 192 |
|
| 193 |
#[instrument(level = "debug", skip_all)]
|
| 194 |
-
fn preprocess(&self, image: &
|
| 195 |
-
if image.dimensions() != mask.dimensions() {
|
| 196 |
-
bail!(
|
| 197 |
-
"image and mask dimensions dismatch: image is {:?}, mask is {:?}",
|
| 198 |
-
image.dimensions(),
|
| 199 |
-
mask.dimensions()
|
| 200 |
-
);
|
| 201 |
-
}
|
| 202 |
let (w, h) = (image.width() as usize, image.height() as usize);
|
| 203 |
-
|
| 204 |
-
let
|
| 205 |
-
let luma = mask.to_luma8().into_raw();
|
| 206 |
|
| 207 |
let image_tensor = (Tensor::from_vec(rgb, (1, h, w, 3), &self.device)?
|
| 208 |
.permute((0, 3, 1, 2))?
|
|
@@ -218,7 +116,7 @@ impl Lama {
|
|
| 218 |
}
|
| 219 |
|
| 220 |
#[instrument(level = "debug", skip_all)]
|
| 221 |
-
fn postprocess(&self, output: &Tensor) -> Result<
|
| 222 |
let output = output.squeeze(0)?;
|
| 223 |
let (channels, height, width) = output.dims3()?;
|
| 224 |
if channels != 3 {
|
|
@@ -229,439 +127,37 @@ impl Lama {
|
|
| 229 |
.permute((1, 2, 0))?
|
| 230 |
.to_dtype(DType::U8)?;
|
| 231 |
let raw: Vec<u8> = output.flatten_all()?.to_vec1()?;
|
| 232 |
-
|
| 233 |
-
.ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
|
| 234 |
-
Ok(DynamicImage::ImageRgb8(image))
|
| 235 |
-
}
|
| 236 |
-
}
|
| 237 |
-
|
| 238 |
-
fn block_xyxy(block: &TextRegion, width: u32, height: u32) -> Option<Xyxy> {
|
| 239 |
-
let x1 = block.x.floor().max(0.0) as u32;
|
| 240 |
-
let y1 = block.y.floor().max(0.0) as u32;
|
| 241 |
-
let x2 = (block.x + block.width).ceil().max(block.x.floor()) as u32;
|
| 242 |
-
let y2 = (block.y + block.height).ceil().max(block.y.floor()) as u32;
|
| 243 |
-
|
| 244 |
-
let x1 = x1.min(width);
|
| 245 |
-
let y1 = y1.min(height);
|
| 246 |
-
let x2 = x2.min(width);
|
| 247 |
-
let y2 = y2.min(height);
|
| 248 |
-
|
| 249 |
-
if x2 <= x1 || y2 <= y1 {
|
| 250 |
-
return None;
|
| 251 |
-
}
|
| 252 |
-
|
| 253 |
-
Some([x1, y1, x2, y2])
|
| 254 |
-
}
|
| 255 |
-
|
| 256 |
-
fn enlarge_window(rect: Xyxy, im_w: u32, im_h: u32, ratio: f64, aspect_ratio: f64) -> Xyxy {
|
| 257 |
-
debug_assert!(ratio > 1.0);
|
| 258 |
-
|
| 259 |
-
let [x1, y1, x2, y2] = rect;
|
| 260 |
-
let w = f64::from(x2.saturating_sub(x1));
|
| 261 |
-
let h = f64::from(y2.saturating_sub(y1));
|
| 262 |
-
if w <= 0.0 || h <= 0.0 || aspect_ratio <= 0.0 {
|
| 263 |
-
return [0, 0, 0, 0];
|
| 264 |
-
}
|
| 265 |
-
|
| 266 |
-
let a = aspect_ratio;
|
| 267 |
-
let b = w + h * aspect_ratio;
|
| 268 |
-
let c = (1.0 - ratio) * w * h;
|
| 269 |
-
let discriminant = (b * b - 4.0 * a * c).max(0.0);
|
| 270 |
-
let delta = ((-b + discriminant.sqrt()) / (2.0 * a) / 2.0).round();
|
| 271 |
-
let mut delta_h = delta.max(0.0) as u32;
|
| 272 |
-
let mut delta_w = (delta * aspect_ratio).round().max(0.0) as u32;
|
| 273 |
-
|
| 274 |
-
delta_w = delta_w.min(x1).min(im_w.saturating_sub(x2));
|
| 275 |
-
delta_h = delta_h.min(y1).min(im_h.saturating_sub(y2));
|
| 276 |
-
|
| 277 |
-
[
|
| 278 |
-
x1.saturating_sub(delta_w),
|
| 279 |
-
y1.saturating_sub(delta_h),
|
| 280 |
-
(x2 + delta_w).min(im_w),
|
| 281 |
-
(y2 + delta_h).min(im_h),
|
| 282 |
-
]
|
| 283 |
-
}
|
| 284 |
-
|
| 285 |
-
fn try_fill_balloon(image: &RgbImage, mask: &GrayImage) -> Option<RgbImage> {
|
| 286 |
-
let masks = extract_balloon_mask(image, mask)?;
|
| 287 |
-
let average_bg_color = median_rgb(image, &masks.non_text_mask)?;
|
| 288 |
-
let std_rgb = color_stddev(image, &masks.non_text_mask, average_bg_color);
|
| 289 |
-
let inpaint_thresh = if stddev3(std_rgb) > SIMPLE_BG_CHANNEL_STD_SWITCH {
|
| 290 |
-
SIMPLE_BG_THRESHOLD_HIGH_VARIANCE
|
| 291 |
-
} else {
|
| 292 |
-
SIMPLE_BG_THRESHOLD_LOW_VARIANCE
|
| 293 |
-
};
|
| 294 |
-
let std_max = std_rgb.into_iter().fold(0.0, f64::max);
|
| 295 |
-
|
| 296 |
-
if std_max >= inpaint_thresh {
|
| 297 |
-
return None;
|
| 298 |
-
}
|
| 299 |
-
|
| 300 |
-
let mut result = image.clone();
|
| 301 |
-
let fill = [
|
| 302 |
-
average_bg_color[0] as u8,
|
| 303 |
-
average_bg_color[1] as u8,
|
| 304 |
-
average_bg_color[2] as u8,
|
| 305 |
-
];
|
| 306 |
-
for (x, y, pixel) in masks.balloon_mask.enumerate_pixels() {
|
| 307 |
-
if pixel.0[0] > 0 {
|
| 308 |
-
result.put_pixel(x, y, Rgb(fill));
|
| 309 |
-
}
|
| 310 |
-
}
|
| 311 |
-
|
| 312 |
-
Some(result)
|
| 313 |
-
}
|
| 314 |
-
|
| 315 |
-
fn extract_balloon_mask(image: &RgbImage, mask: &GrayImage) -> Option<BalloonMasks> {
|
| 316 |
-
if image.dimensions() != mask.dimensions() {
|
| 317 |
-
return None;
|
| 318 |
-
}
|
| 319 |
-
|
| 320 |
-
let text_bbox = non_zero_bbox(mask)?;
|
| 321 |
-
let text_sum = count_nonzero(mask);
|
| 322 |
-
if text_sum == 0 {
|
| 323 |
-
return None;
|
| 324 |
-
}
|
| 325 |
-
|
| 326 |
-
let gray = DynamicImage::ImageRgb8(image.clone()).to_luma8();
|
| 327 |
-
let blurred = gaussian_blur_f32(&gray, 1.0);
|
| 328 |
-
let mut cannyed = canny(&blurred, BALLOON_CANNY_LOW, BALLOON_CANNY_HIGH);
|
| 329 |
-
cannyed = dilate(&cannyed, Norm::LInf, 1);
|
| 330 |
-
draw_binary_border(&mut cannyed);
|
| 331 |
-
subtract_binary_mask(&mut cannyed, mask);
|
| 332 |
-
|
| 333 |
-
let contours = find_contours::<i32>(&cannyed);
|
| 334 |
-
let (width, height) = cannyed.dimensions();
|
| 335 |
-
let mut best_mask = None;
|
| 336 |
-
let mut best_area = f64::INFINITY;
|
| 337 |
-
|
| 338 |
-
for contour in contours {
|
| 339 |
-
let Some(polygon) = contour_polygon(&contour.points) else {
|
| 340 |
-
continue;
|
| 341 |
-
};
|
| 342 |
-
let bbox = polygon_bbox(&polygon)?;
|
| 343 |
-
if bbox[0] > text_bbox[0]
|
| 344 |
-
|| bbox[1] > text_bbox[1]
|
| 345 |
-
|| bbox[2] < text_bbox[2]
|
| 346 |
-
|| bbox[3] < text_bbox[3]
|
| 347 |
-
{
|
| 348 |
-
continue;
|
| 349 |
-
}
|
| 350 |
-
|
| 351 |
-
let mut candidate = GrayImage::new(width, height);
|
| 352 |
-
draw_polygon_mut(&mut candidate, &polygon, Luma([255u8]));
|
| 353 |
-
if count_overlap(&candidate, mask) < text_sum {
|
| 354 |
-
continue;
|
| 355 |
-
}
|
| 356 |
-
|
| 357 |
-
let area = polygon_area(&polygon);
|
| 358 |
-
if area < best_area {
|
| 359 |
-
best_area = area;
|
| 360 |
-
best_mask = Some(candidate);
|
| 361 |
-
}
|
| 362 |
-
}
|
| 363 |
-
|
| 364 |
-
let balloon_mask = best_mask?;
|
| 365 |
-
let mut non_text_mask = balloon_mask.clone();
|
| 366 |
-
for (x, y, pixel) in mask.enumerate_pixels() {
|
| 367 |
-
if pixel.0[0] > 0 {
|
| 368 |
-
non_text_mask.put_pixel(x, y, Luma([0]));
|
| 369 |
-
}
|
| 370 |
}
|
| 371 |
-
|
| 372 |
-
Some(BalloonMasks {
|
| 373 |
-
balloon_mask,
|
| 374 |
-
non_text_mask,
|
| 375 |
-
})
|
| 376 |
-
}
|
| 377 |
-
|
| 378 |
-
fn contour_polygon(points: &[Point<i32>]) -> Option<Vec<Point<i32>>> {
|
| 379 |
-
let mut polygon = points.to_vec();
|
| 380 |
-
if polygon.len() < 3 {
|
| 381 |
-
return None;
|
| 382 |
-
}
|
| 383 |
-
if polygon.first() == polygon.last() {
|
| 384 |
-
polygon.pop();
|
| 385 |
-
}
|
| 386 |
-
if polygon.len() < 3 {
|
| 387 |
-
return None;
|
| 388 |
-
}
|
| 389 |
-
Some(polygon)
|
| 390 |
}
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
let mut max_y = first.y;
|
| 398 |
-
for point in points.iter().skip(1) {
|
| 399 |
-
min_x = min_x.min(point.x);
|
| 400 |
-
min_y = min_y.min(point.y);
|
| 401 |
-
max_x = max_x.max(point.x);
|
| 402 |
-
max_y = max_y.max(point.y);
|
| 403 |
-
}
|
| 404 |
-
|
| 405 |
-
Some([
|
| 406 |
-
min_x.max(0) as u32,
|
| 407 |
-
min_y.max(0) as u32,
|
| 408 |
-
max_x.max(min_x).saturating_add(1) as u32,
|
| 409 |
-
max_y.max(min_y).saturating_add(1) as u32,
|
| 410 |
-
])
|
| 411 |
-
}
|
| 412 |
-
|
| 413 |
-
fn polygon_area(points: &[Point<i32>]) -> f64 {
|
| 414 |
-
let mut area = 0.0;
|
| 415 |
-
for index in 0..points.len() {
|
| 416 |
-
let current = points[index];
|
| 417 |
-
let next = points[(index + 1) % points.len()];
|
| 418 |
-
area += f64::from(current.x) * f64::from(next.y) - f64::from(next.x) * f64::from(current.y);
|
| 419 |
-
}
|
| 420 |
-
area.abs() * 0.5
|
| 421 |
}
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
return;
|
| 428 |
-
}
|
| 429 |
-
|
| 430 |
-
for x in 0..width {
|
| 431 |
-
image.put_pixel(x, 0, Luma([255]));
|
| 432 |
-
image.put_pixel(x, height - 1, Luma([255]));
|
| 433 |
-
}
|
| 434 |
-
for y in 0..height {
|
| 435 |
-
image.put_pixel(0, y, Luma([255]));
|
| 436 |
-
image.put_pixel(width - 1, y, Luma([255]));
|
| 437 |
-
}
|
| 438 |
-
}
|
| 439 |
-
|
| 440 |
-
fn subtract_binary_mask(image: &mut GrayImage, mask: &GrayImage) {
|
| 441 |
-
for (x, y, pixel) in image.enumerate_pixels_mut() {
|
| 442 |
-
if mask.get_pixel(x, y).0[0] > 0 {
|
| 443 |
-
pixel.0[0] = 0;
|
| 444 |
}
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
fn non_zero_bbox(mask: &GrayImage) -> Option<Xyxy> {
|
| 449 |
-
let (width, height) = mask.dimensions();
|
| 450 |
-
let mut min_x = width;
|
| 451 |
-
let mut min_y = height;
|
| 452 |
-
let mut max_x = 0;
|
| 453 |
-
let mut max_y = 0;
|
| 454 |
-
let mut found = false;
|
| 455 |
-
|
| 456 |
-
for (x, y, pixel) in mask.enumerate_pixels() {
|
| 457 |
-
if pixel.0[0] == 0 {
|
| 458 |
-
continue;
|
| 459 |
-
}
|
| 460 |
-
found = true;
|
| 461 |
-
min_x = min_x.min(x);
|
| 462 |
-
min_y = min_y.min(y);
|
| 463 |
-
max_x = max_x.max(x);
|
| 464 |
-
max_y = max_y.max(y);
|
| 465 |
-
}
|
| 466 |
-
|
| 467 |
-
found.then_some([
|
| 468 |
-
min_x,
|
| 469 |
-
min_y,
|
| 470 |
-
max_x.saturating_add(1),
|
| 471 |
-
max_y.saturating_add(1),
|
| 472 |
-
])
|
| 473 |
-
}
|
| 474 |
-
|
| 475 |
-
fn clear_mask_bbox(mask: &mut GrayImage, bbox: Xyxy) {
|
| 476 |
-
for y in bbox[1]..bbox[3] {
|
| 477 |
-
for x in bbox[0]..bbox[2] {
|
| 478 |
-
mask.put_pixel(x, y, Luma([0]));
|
| 479 |
-
}
|
| 480 |
-
}
|
| 481 |
-
}
|
| 482 |
-
|
| 483 |
-
fn count_nonzero(mask: &GrayImage) -> u32 {
|
| 484 |
-
mask.pixels().filter(|pixel| pixel.0[0] > 0).count() as u32
|
| 485 |
-
}
|
| 486 |
-
|
| 487 |
-
fn count_overlap(left: &GrayImage, right: &GrayImage) -> u32 {
|
| 488 |
-
left.pixels()
|
| 489 |
-
.zip(right.pixels())
|
| 490 |
-
.filter(|(l, r)| l.0[0] > 0 && r.0[0] > 0)
|
| 491 |
-
.count() as u32
|
| 492 |
-
}
|
| 493 |
-
|
| 494 |
-
fn median_rgb(image: &RgbImage, mask: &GrayImage) -> Option<[f64; 3]> {
|
| 495 |
-
let mut channels = [Vec::new(), Vec::new(), Vec::new()];
|
| 496 |
-
for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
|
| 497 |
-
if mask_pixel.0[0] == 0 {
|
| 498 |
-
continue;
|
| 499 |
-
}
|
| 500 |
-
channels[0].push(pixel.0[0]);
|
| 501 |
-
channels[1].push(pixel.0[1]);
|
| 502 |
-
channels[2].push(pixel.0[2]);
|
| 503 |
-
}
|
| 504 |
-
|
| 505 |
-
Some([
|
| 506 |
-
median_channel(&channels[0])?,
|
| 507 |
-
median_channel(&channels[1])?,
|
| 508 |
-
median_channel(&channels[2])?,
|
| 509 |
-
])
|
| 510 |
-
}
|
| 511 |
-
|
| 512 |
-
fn median_channel(values: &[u8]) -> Option<f64> {
|
| 513 |
-
if values.is_empty() {
|
| 514 |
-
return None;
|
| 515 |
-
}
|
| 516 |
-
|
| 517 |
-
let mut values = values.to_vec();
|
| 518 |
-
values.sort_unstable();
|
| 519 |
-
let mid = values.len() / 2;
|
| 520 |
-
if values.len().is_multiple_of(2) {
|
| 521 |
-
Some((f64::from(values[mid - 1]) + f64::from(values[mid])) / 2.0)
|
| 522 |
-
} else {
|
| 523 |
-
Some(f64::from(values[mid]))
|
| 524 |
-
}
|
| 525 |
-
}
|
| 526 |
-
|
| 527 |
-
fn color_stddev(image: &RgbImage, mask: &GrayImage, median: [f64; 3]) -> [f64; 3] {
|
| 528 |
-
let mut sum_sq = [0.0; 3];
|
| 529 |
-
let mut count = 0.0;
|
| 530 |
-
|
| 531 |
-
for (pixel, mask_pixel) in image.pixels().zip(mask.pixels()) {
|
| 532 |
-
if mask_pixel.0[0] == 0 {
|
| 533 |
-
continue;
|
| 534 |
-
}
|
| 535 |
-
count += 1.0;
|
| 536 |
-
for channel in 0..3 {
|
| 537 |
-
let diff = f64::from(pixel.0[channel]) - median[channel];
|
| 538 |
-
sum_sq[channel] += diff * diff;
|
| 539 |
}
|
|
|
|
| 540 |
}
|
| 541 |
-
|
| 542 |
-
if count == 0.0 {
|
| 543 |
-
return [f64::INFINITY; 3];
|
| 544 |
-
}
|
| 545 |
-
|
| 546 |
-
[
|
| 547 |
-
(sum_sq[0] / count).sqrt(),
|
| 548 |
-
(sum_sq[1] / count).sqrt(),
|
| 549 |
-
(sum_sq[2] / count).sqrt(),
|
| 550 |
-
]
|
| 551 |
-
}
|
| 552 |
-
|
| 553 |
-
fn stddev3(values: [f64; 3]) -> f64 {
|
| 554 |
-
let mean = values.iter().sum::<f64>() / 3.0;
|
| 555 |
-
let variance = values
|
| 556 |
-
.iter()
|
| 557 |
-
.map(|value| {
|
| 558 |
-
let diff = value - mean;
|
| 559 |
-
diff * diff
|
| 560 |
-
})
|
| 561 |
-
.sum::<f64>()
|
| 562 |
-
/ 3.0;
|
| 563 |
-
variance.sqrt()
|
| 564 |
}
|
| 565 |
|
| 566 |
#[cfg(test)]
|
| 567 |
mod tests {
|
| 568 |
-
use super::{
|
| 569 |
-
BALLOON_WINDOW_ASPECT_RATIO, BALLOON_WINDOW_RATIO, clear_mask_bbox, count_nonzero,
|
| 570 |
-
enlarge_window, extract_balloon_mask, try_fill_balloon,
|
| 571 |
-
};
|
| 572 |
use crate::inpainting::restore_alpha_channel;
|
| 573 |
-
use crate::types::TextRegion;
|
| 574 |
use image::{GrayImage, Luma, Rgb, RgbImage};
|
| 575 |
-
use imageproc::drawing::draw_hollow_rect_mut;
|
| 576 |
-
use imageproc::rect::Rect;
|
| 577 |
|
| 578 |
const ALPHA_RING_RADIUS: u8 = 7;
|
| 579 |
|
| 580 |
-
#[test]
|
| 581 |
-
fn enlarge_window_matches_ratio_1_7_reference() {
|
| 582 |
-
let enlarged = enlarge_window(
|
| 583 |
-
[10, 20, 50, 60],
|
| 584 |
-
200,
|
| 585 |
-
150,
|
| 586 |
-
BALLOON_WINDOW_RATIO,
|
| 587 |
-
BALLOON_WINDOW_ASPECT_RATIO,
|
| 588 |
-
);
|
| 589 |
-
|
| 590 |
-
assert_eq!(enlarged, [4, 14, 56, 66]);
|
| 591 |
-
}
|
| 592 |
-
|
| 593 |
-
#[test]
|
| 594 |
-
fn extract_balloon_mask_prefers_smallest_covering_contour() {
|
| 595 |
-
let mut image = RgbImage::from_pixel(80, 80, Rgb([255, 255, 255]));
|
| 596 |
-
draw_hollow_rect_mut(&mut image, Rect::at(4, 4).of_size(72, 72), Rgb([0, 0, 0]));
|
| 597 |
-
draw_hollow_rect_mut(&mut image, Rect::at(20, 20).of_size(28, 20), Rgb([0, 0, 0]));
|
| 598 |
-
|
| 599 |
-
let mut mask = GrayImage::new(80, 80);
|
| 600 |
-
for y in 24..36 {
|
| 601 |
-
for x in 24..44 {
|
| 602 |
-
mask.put_pixel(x, y, Luma([255]));
|
| 603 |
-
}
|
| 604 |
-
}
|
| 605 |
-
|
| 606 |
-
let masks = extract_balloon_mask(&image, &mask).expect("balloon should be detected");
|
| 607 |
-
let balloon_pixels = count_nonzero(&masks.balloon_mask);
|
| 608 |
-
|
| 609 |
-
assert!(
|
| 610 |
-
balloon_pixels < 900,
|
| 611 |
-
"expected inner contour fill, got {balloon_pixels}"
|
| 612 |
-
);
|
| 613 |
-
assert!(
|
| 614 |
-
balloon_pixels > 250,
|
| 615 |
-
"expected meaningful bubble area, got {balloon_pixels}"
|
| 616 |
-
);
|
| 617 |
-
}
|
| 618 |
-
|
| 619 |
-
#[test]
|
| 620 |
-
fn simple_balloon_chooses_fill_but_textured_balloon_does_not() {
|
| 621 |
-
let mut flat = RgbImage::from_pixel(64, 64, Rgb([240, 240, 240]));
|
| 622 |
-
draw_hollow_rect_mut(&mut flat, Rect::at(8, 8).of_size(48, 32), Rgb([0, 0, 0]));
|
| 623 |
-
|
| 624 |
-
let mut mask = GrayImage::new(64, 64);
|
| 625 |
-
for y in 18..30 {
|
| 626 |
-
for x in 18..46 {
|
| 627 |
-
mask.put_pixel(x, y, Luma([255]));
|
| 628 |
-
}
|
| 629 |
-
}
|
| 630 |
-
|
| 631 |
-
assert!(try_fill_balloon(&flat, &mask).is_some());
|
| 632 |
-
|
| 633 |
-
let mut textured = flat.clone();
|
| 634 |
-
for y in 9..39 {
|
| 635 |
-
for x in 9..55 {
|
| 636 |
-
let noise = ((x + y) % 23) as u8;
|
| 637 |
-
textured.put_pixel(
|
| 638 |
-
x,
|
| 639 |
-
y,
|
| 640 |
-
Rgb([200 + noise, 210 + (noise / 2), 220 - (noise / 3)]),
|
| 641 |
-
);
|
| 642 |
-
}
|
| 643 |
-
}
|
| 644 |
-
|
| 645 |
-
assert!(try_fill_balloon(&textured, &mask).is_none());
|
| 646 |
-
}
|
| 647 |
-
|
| 648 |
-
#[test]
|
| 649 |
-
fn clearing_mask_consumes_only_original_bbox() {
|
| 650 |
-
let mut mask = GrayImage::from_pixel(32, 32, Luma([255]));
|
| 651 |
-
clear_mask_bbox(&mut mask, [8, 10, 16, 18]);
|
| 652 |
-
|
| 653 |
-
for y in 10..18 {
|
| 654 |
-
for x in 8..16 {
|
| 655 |
-
assert_eq!(mask.get_pixel(x, y).0[0], 0);
|
| 656 |
-
}
|
| 657 |
-
}
|
| 658 |
-
|
| 659 |
-
assert_eq!(mask.get_pixel(7, 10).0[0], 255);
|
| 660 |
-
assert_eq!(mask.get_pixel(16, 17).0[0], 255);
|
| 661 |
-
assert_eq!(mask.get_pixel(8, 9).0[0], 255);
|
| 662 |
-
assert_eq!(mask.get_pixel(15, 18).0[0], 255);
|
| 663 |
-
}
|
| 664 |
-
|
| 665 |
#[test]
|
| 666 |
fn rgba_alpha_restore_uses_surrounding_ring() {
|
| 667 |
let image = RgbImage::from_pixel(32, 32, Rgb([20, 30, 40]));
|
|
@@ -685,18 +181,4 @@ mod tests {
|
|
| 685 |
assert_eq!(restored.get_pixel(15, 15).0[3], 64);
|
| 686 |
assert_eq!(restored.get_pixel(2, 2).0[3], 255);
|
| 687 |
}
|
| 688 |
-
|
| 689 |
-
#[test]
|
| 690 |
-
fn block_xyxy_rounds_and_clamps_document_coords() {
|
| 691 |
-
let block = TextRegion {
|
| 692 |
-
x: 10.2,
|
| 693 |
-
y: 20.7,
|
| 694 |
-
width: 15.1,
|
| 695 |
-
height: 9.4,
|
| 696 |
-
..Default::default()
|
| 697 |
-
};
|
| 698 |
-
|
| 699 |
-
let bbox = super::block_xyxy(&block, 100, 100).expect("bbox");
|
| 700 |
-
assert_eq!(bbox, [10, 20, 26, 31]);
|
| 701 |
-
}
|
| 702 |
}
|
|
|
|
| 1 |
mod fft;
|
| 2 |
mod model;
|
| 3 |
|
|
|
|
| 4 |
use anyhow::{Result, bail};
|
| 5 |
use candle_core::{DType, Device, Tensor};
|
| 6 |
+
use image::{DynamicImage, GenericImageView, GrayImage, RgbImage};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
use koharu_runtime::RuntimeManager;
|
| 8 |
use tracing::instrument;
|
| 9 |
|
| 10 |
use crate::{
|
| 11 |
device,
|
| 12 |
+
inpainting::{
|
| 13 |
+
HdStrategyConfig, InpaintForward, binarize_mask, extract_alpha, restore_alpha_channel,
|
| 14 |
+
run_inpaint, try_fill_balloon,
|
| 15 |
+
},
|
| 16 |
loading,
|
| 17 |
};
|
| 18 |
|
|
|
|
| 26 |
order: 130,
|
| 27 |
);
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
pub struct Lama {
|
| 30 |
model: model::Lama,
|
| 31 |
device: Device,
|
|
|
|
| 45 |
Ok(Self { model, device })
|
| 46 |
}
|
| 47 |
|
| 48 |
+
/// Run inpainting with the manga-tuned default strategy (Crop, 800/128/1280).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
#[instrument(level = "debug", skip_all)]
|
| 50 |
pub fn inference(&self, image: &DynamicImage, mask: &DynamicImage) -> Result<DynamicImage> {
|
| 51 |
+
self.inference_with_config(image, mask, &HdStrategyConfig::lama_default())
|
| 52 |
}
|
| 53 |
|
| 54 |
+
/// Run inpainting with a caller-supplied [`HdStrategyConfig`]. Use this to
|
| 55 |
+
/// pick a different strategy (Original / Resize) or tune the trigger /
|
| 56 |
+
/// margin / resize-limit for GPUs with less VRAM.
|
| 57 |
#[instrument(level = "debug", skip_all)]
|
| 58 |
+
pub fn inference_with_config(
|
| 59 |
&self,
|
| 60 |
image: &DynamicImage,
|
| 61 |
mask: &DynamicImage,
|
| 62 |
+
cfg: &HdStrategyConfig,
|
| 63 |
) -> Result<DynamicImage> {
|
| 64 |
if image.dimensions() != mask.dimensions() {
|
| 65 |
bail!(
|
|
|
|
| 70 |
}
|
| 71 |
|
| 72 |
let binary_mask = binarize_mask(mask);
|
| 73 |
+
let image_rgb = image.to_rgb8();
|
| 74 |
+
let forward = LamaForward { lama: self };
|
| 75 |
+
let output_rgb = run_inpaint(&forward, &image_rgb, &binary_mask, cfg)?;
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
if image.color().has_alpha() {
|
| 78 |
let original_alpha = image.to_rgba8();
|
|
|
|
| 85 |
}
|
| 86 |
|
| 87 |
#[instrument(level = "debug", skip_all)]
|
| 88 |
+
fn forward(&self, image: &Tensor, mask: &Tensor) -> Result<Tensor> {
|
| 89 |
+
self.model.forward(image, mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
}
|
| 91 |
|
| 92 |
#[instrument(level = "debug", skip_all)]
|
| 93 |
+
fn inference_model(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
|
| 94 |
+
let (image_tensor, mask_tensor) = self.preprocess(image, mask)?;
|
| 95 |
+
let output = self.forward(&image_tensor, &mask_tensor)?;
|
| 96 |
+
self.postprocess(&output)
|
|
|
|
|
|
|
|
|
|
| 97 |
}
|
| 98 |
|
| 99 |
#[instrument(level = "debug", skip_all)]
|
| 100 |
+
fn preprocess(&self, image: &RgbImage, mask: &GrayImage) -> Result<(Tensor, Tensor)> {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
let (w, h) = (image.width() as usize, image.height() as usize);
|
| 102 |
+
let rgb = image.clone().into_raw();
|
| 103 |
+
let luma = mask.clone().into_raw();
|
|
|
|
| 104 |
|
| 105 |
let image_tensor = (Tensor::from_vec(rgb, (1, h, w, 3), &self.device)?
|
| 106 |
.permute((0, 3, 1, 2))?
|
|
|
|
| 116 |
}
|
| 117 |
|
| 118 |
#[instrument(level = "debug", skip_all)]
|
| 119 |
+
fn postprocess(&self, output: &Tensor) -> Result<RgbImage> {
|
| 120 |
let output = output.squeeze(0)?;
|
| 121 |
let (channels, height, width) = output.dims3()?;
|
| 122 |
if channels != 3 {
|
|
|
|
| 127 |
.permute((1, 2, 0))?
|
| 128 |
.to_dtype(DType::U8)?;
|
| 129 |
let raw: Vec<u8> = output.flatten_all()?.to_vec1()?;
|
| 130 |
+
RgbImage::from_raw(width as u32, height as u32, raw)
|
| 131 |
+
.ok_or_else(|| anyhow::anyhow!("failed to create image buffer from model output"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
}
|
| 134 |
|
| 135 |
+
/// [`InpaintForward`] impl used by the HD-strategy dispatcher. Applies the
|
| 136 |
+
/// balloon-fill fast path on a per-crop basis before falling back to the
|
| 137 |
+
/// model forward — flat-background speech bubbles skip the model entirely.
|
| 138 |
+
struct LamaForward<'a> {
|
| 139 |
+
lama: &'a Lama,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
}
|
| 141 |
|
| 142 |
+
impl InpaintForward for LamaForward<'_> {
|
| 143 |
+
fn forward(&self, image: &RgbImage, mask: &GrayImage) -> Result<RgbImage> {
|
| 144 |
+
if mask.pixels().all(|p| p.0[0] == 0) {
|
| 145 |
+
return Ok(image.clone());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
}
|
| 147 |
+
if let Some(filled) = try_fill_balloon(image, mask) {
|
| 148 |
+
return Ok(filled);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
}
|
| 150 |
+
self.lama.inference_model(image, mask)
|
| 151 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
}
|
| 153 |
|
| 154 |
#[cfg(test)]
|
| 155 |
mod tests {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
use crate::inpainting::restore_alpha_channel;
|
|
|
|
| 157 |
use image::{GrayImage, Luma, Rgb, RgbImage};
|
|
|
|
|
|
|
| 158 |
|
| 159 |
const ALPHA_RING_RADIUS: u8 = 7;
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
#[test]
|
| 162 |
fn rgba_alpha_restore_uses_surrounding_ring() {
|
| 163 |
let image = RgbImage::from_pixel(32, 32, Rgb([20, 30, 40]));
|
|
|
|
| 181 |
assert_eq!(restored.get_pixel(15, 15).0[3], 64);
|
| 182 |
assert_eq!(restored.get_pixel(2, 2).0[3], 255);
|
| 183 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
}
|
koharu-ml/src/lib.rs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
mod hf_hub;
|
| 2 |
-
mod inpainting;
|
| 3 |
|
| 4 |
pub mod aot_inpainting;
|
| 5 |
pub mod comic_text_bubble_detector;
|
| 6 |
pub mod comic_text_detector;
|
| 7 |
pub mod font_detector;
|
|
|
|
| 8 |
pub mod lama;
|
| 9 |
pub mod loading;
|
| 10 |
pub mod manga_ocr;
|
|
|
|
| 1 |
mod hf_hub;
|
|
|
|
| 2 |
|
| 3 |
pub mod aot_inpainting;
|
| 4 |
pub mod comic_text_bubble_detector;
|
| 5 |
pub mod comic_text_detector;
|
| 6 |
pub mod font_detector;
|
| 7 |
+
pub mod inpainting;
|
| 8 |
pub mod lama;
|
| 9 |
pub mod loading;
|
| 10 |
pub mod manga_ocr;
|
koharu-ml/tests/inpaint.rs
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
use std::path::Path;
|
| 2 |
|
| 3 |
use image::GenericImageView;
|
| 4 |
-
use koharu_ml::TextRegion;
|
| 5 |
use koharu_ml::aot_inpainting::AotInpainting;
|
| 6 |
use koharu_ml::lama::Lama;
|
| 7 |
|
|
@@ -40,48 +39,6 @@ async fn lama_inpainting_updates_masked_region() -> anyhow::Result<()> {
|
|
| 40 |
Ok(())
|
| 41 |
}
|
| 42 |
|
| 43 |
-
#[tokio::test]
|
| 44 |
-
#[ignore]
|
| 45 |
-
async fn lama_block_aware_inpainting_returns_same_size() -> anyhow::Result<()> {
|
| 46 |
-
let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures");
|
| 47 |
-
|
| 48 |
-
let runtime = support::cpu_runtime();
|
| 49 |
-
let lama = Lama::load(&runtime, false).await?;
|
| 50 |
-
let base = image::open(fixtures.join("image.jpg"))?;
|
| 51 |
-
let mask = image::open(fixtures.join("mask.png"))?;
|
| 52 |
-
let mask_luma = mask.to_luma8();
|
| 53 |
-
|
| 54 |
-
let mut min_x = mask_luma.width();
|
| 55 |
-
let mut min_y = mask_luma.height();
|
| 56 |
-
let mut max_x = 0;
|
| 57 |
-
let mut max_y = 0;
|
| 58 |
-
let mut found = false;
|
| 59 |
-
for (x, y, pixel) in mask_luma.enumerate_pixels() {
|
| 60 |
-
if pixel.0[0] == 0 {
|
| 61 |
-
continue;
|
| 62 |
-
}
|
| 63 |
-
found = true;
|
| 64 |
-
min_x = min_x.min(x);
|
| 65 |
-
min_y = min_y.min(y);
|
| 66 |
-
max_x = max_x.max(x);
|
| 67 |
-
max_y = max_y.max(y);
|
| 68 |
-
}
|
| 69 |
-
|
| 70 |
-
assert!(found, "mask fixture should contain a non-empty region");
|
| 71 |
-
|
| 72 |
-
let block = TextRegion {
|
| 73 |
-
x: min_x as f32,
|
| 74 |
-
y: min_y as f32,
|
| 75 |
-
width: max_x.saturating_sub(min_x).saturating_add(1) as f32,
|
| 76 |
-
height: max_y.saturating_sub(min_y).saturating_add(1) as f32,
|
| 77 |
-
..Default::default()
|
| 78 |
-
};
|
| 79 |
-
|
| 80 |
-
let output = lama.inference_with_blocks(&base, &mask, Some(&[block]))?;
|
| 81 |
-
assert_eq!(output.dimensions(), base.dimensions());
|
| 82 |
-
Ok(())
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
#[tokio::test]
|
| 86 |
#[ignore]
|
| 87 |
async fn aot_inpainting_updates_masked_region() -> anyhow::Result<()> {
|
|
|
|
| 1 |
use std::path::Path;
|
| 2 |
|
| 3 |
use image::GenericImageView;
|
|
|
|
| 4 |
use koharu_ml::aot_inpainting::AotInpainting;
|
| 5 |
use koharu_ml::lama::Lama;
|
| 6 |
|
|
|
|
| 39 |
Ok(())
|
| 40 |
}
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
#[tokio::test]
|
| 43 |
#[ignore]
|
| 44 |
async fn aot_inpainting_updates_masked_region() -> anyhow::Result<()> {
|
koharu-rpc/src/mcp/mod.rs
CHANGED
|
@@ -199,7 +199,7 @@ impl KoharuServer {
|
|
| 199 |
let cpu = app.cpu_only();
|
| 200 |
tokio::spawn(async move {
|
| 201 |
let _ = koharu_app::pipeline::run(
|
| 202 |
-
session, registry, runtime, cpu, llm, renderer, spec, cancel, None,
|
| 203 |
)
|
| 204 |
.await;
|
| 205 |
});
|
|
|
|
| 199 |
let cpu = app.cpu_only();
|
| 200 |
tokio::spawn(async move {
|
| 201 |
let _ = koharu_app::pipeline::run(
|
| 202 |
+
session, registry, runtime, cpu, llm, renderer, spec, cancel, None, None,
|
| 203 |
)
|
| 204 |
.await;
|
| 205 |
});
|
koharu-rpc/src/routes/pipelines.rs
CHANGED
|
@@ -9,10 +9,12 @@ use std::sync::atomic::AtomicBool;
|
|
| 9 |
|
| 10 |
use axum::Json;
|
| 11 |
use axum::extract::State;
|
| 12 |
-
use koharu_app::pipeline::{
|
|
|
|
|
|
|
| 13 |
use koharu_core::{
|
| 14 |
-
AppEvent, JobFinishedEvent, JobStatus, JobSummary, PageId, PipelineProgress,
|
| 15 |
-
Region,
|
| 16 |
};
|
| 17 |
use serde::{Deserialize, Serialize};
|
| 18 |
use utoipa_axum::{router::OpenApiRouter, routes};
|
|
@@ -123,6 +125,17 @@ async fn start_pipeline(
|
|
| 123 |
overall_percent: tick.overall_percent,
|
| 124 |
}));
|
| 125 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
tokio::spawn(async move {
|
| 127 |
let result = pipeline::run(
|
| 128 |
session_c,
|
|
@@ -134,12 +147,23 @@ async fn start_pipeline(
|
|
| 134 |
spec,
|
| 135 |
cancel,
|
| 136 |
Some(progress_sink),
|
|
|
|
| 137 |
)
|
| 138 |
.await;
|
| 139 |
let (status, error) = match &result {
|
| 140 |
-
Ok(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
Err(e) if e.to_string().contains("cancelled") => (JobStatus::Cancelled, None),
|
| 142 |
-
Err(e) =>
|
|
|
|
|
|
|
|
|
|
| 143 |
};
|
| 144 |
app_c.jobs.insert(
|
| 145 |
op_id_c.clone(),
|
|
|
|
| 9 |
|
| 10 |
use axum::Json;
|
| 11 |
use axum::extract::State;
|
| 12 |
+
use koharu_app::pipeline::{
|
| 13 |
+
self, PipelineRunOptions, PipelineSpec, ProgressTick, Scope, WarningTick,
|
| 14 |
+
};
|
| 15 |
use koharu_core::{
|
| 16 |
+
AppEvent, JobFinishedEvent, JobStatus, JobSummary, JobWarningEvent, PageId, PipelineProgress,
|
| 17 |
+
PipelineStatus, Region,
|
| 18 |
};
|
| 19 |
use serde::{Deserialize, Serialize};
|
| 20 |
use utoipa_axum::{router::OpenApiRouter, routes};
|
|
|
|
| 125 |
overall_percent: tick.overall_percent,
|
| 126 |
}));
|
| 127 |
});
|
| 128 |
+
let warning_bus = app.bus.clone();
|
| 129 |
+
let warning_op_id = operation_id.clone();
|
| 130 |
+
let warning_sink: pipeline::WarningSink = Arc::new(move |tick: WarningTick| {
|
| 131 |
+
warning_bus.publish(AppEvent::JobWarning(JobWarningEvent {
|
| 132 |
+
job_id: warning_op_id.clone(),
|
| 133 |
+
page_index: tick.page_index,
|
| 134 |
+
total_pages: tick.total_pages,
|
| 135 |
+
step_id: tick.step_id,
|
| 136 |
+
message: tick.message,
|
| 137 |
+
}));
|
| 138 |
+
});
|
| 139 |
tokio::spawn(async move {
|
| 140 |
let result = pipeline::run(
|
| 141 |
session_c,
|
|
|
|
| 147 |
spec,
|
| 148 |
cancel,
|
| 149 |
Some(progress_sink),
|
| 150 |
+
Some(warning_sink),
|
| 151 |
)
|
| 152 |
.await;
|
| 153 |
let (status, error) = match &result {
|
| 154 |
+
Ok(outcome) if outcome.warning_count == 0 => (JobStatus::Completed, None),
|
| 155 |
+
Ok(outcome) => (
|
| 156 |
+
JobStatus::CompletedWithErrors,
|
| 157 |
+
Some(format!(
|
| 158 |
+
"{} step(s) failed; see warnings for details",
|
| 159 |
+
outcome.warning_count
|
| 160 |
+
)),
|
| 161 |
+
),
|
| 162 |
Err(e) if e.to_string().contains("cancelled") => (JobStatus::Cancelled, None),
|
| 163 |
+
Err(e) => {
|
| 164 |
+
tracing::warn!(operation_id = %op_id_c, "pipeline run failed: {e:#}");
|
| 165 |
+
(JobStatus::Failed, Some(format!("{e:#}")))
|
| 166 |
+
}
|
| 167 |
};
|
| 168 |
app_c.jobs.insert(
|
| 169 |
op_id_c.clone(),
|
ui/components/ActivityBubble.tsx
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
'use client'
|
| 2 |
|
| 3 |
-
import { CircleXIcon } from 'lucide-react'
|
| 4 |
import { type ReactNode } from 'react'
|
| 5 |
import { useTranslation } from 'react-i18next'
|
| 6 |
|
| 7 |
import { Button } from '@/components/ui/button'
|
| 8 |
import { cancelOperation } from '@/lib/api/default/default'
|
| 9 |
-
import type {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
import { useDownloadsStore } from '@/lib/stores/downloadsStore'
|
| 11 |
import { useEditorUiStore } from '@/lib/stores/editorUiStore'
|
| 12 |
import { type JobEntry, useJobsStore } from '@/lib/stores/jobsStore'
|
|
@@ -112,6 +117,39 @@ function ErrorCard({
|
|
| 112 |
)
|
| 113 |
}
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t: TranslateFunc }) {
|
| 116 |
const progress: PipelineProgress | undefined = job.progress
|
| 117 |
const percent = clampProgress(progress?.overallPercent)
|
|
@@ -133,6 +171,7 @@ function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t:
|
|
| 133 |
: undefined
|
| 134 |
const subtitle =
|
| 135 |
[pageText, stepLabel].filter(Boolean).join(' \u00b7 ') || t('operations.inProgress')
|
|
|
|
| 136 |
|
| 137 |
return (
|
| 138 |
<BubbleCard>
|
|
@@ -148,6 +187,7 @@ function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t:
|
|
| 148 |
</div>
|
| 149 |
</div>
|
| 150 |
<ProgressBar percent={percent} />
|
|
|
|
| 151 |
<div className='mt-3 flex justify-end'>
|
| 152 |
<Button
|
| 153 |
data-testid='operation-cancel'
|
|
|
|
| 1 |
'use client'
|
| 2 |
|
| 3 |
+
import { AlertTriangleIcon, CircleXIcon } from 'lucide-react'
|
| 4 |
import { type ReactNode } from 'react'
|
| 5 |
import { useTranslation } from 'react-i18next'
|
| 6 |
|
| 7 |
import { Button } from '@/components/ui/button'
|
| 8 |
import { cancelOperation } from '@/lib/api/default/default'
|
| 9 |
+
import type {
|
| 10 |
+
DownloadProgress,
|
| 11 |
+
JobSummary,
|
| 12 |
+
JobWarningEvent,
|
| 13 |
+
PipelineProgress,
|
| 14 |
+
} from '@/lib/api/schemas'
|
| 15 |
import { useDownloadsStore } from '@/lib/stores/downloadsStore'
|
| 16 |
import { useEditorUiStore } from '@/lib/stores/editorUiStore'
|
| 17 |
import { type JobEntry, useJobsStore } from '@/lib/stores/jobsStore'
|
|
|
|
| 117 |
)
|
| 118 |
}
|
| 119 |
|
| 120 |
+
function JobWarnings({ warnings, t }: { warnings: JobWarningEvent[]; t: TranslateFunc }) {
|
| 121 |
+
const latest = warnings[warnings.length - 1]
|
| 122 |
+
const count = warnings.length
|
| 123 |
+
const pageLabel =
|
| 124 |
+
typeof latest.totalPages === 'number' && latest.totalPages > 1
|
| 125 |
+
? t('operations.imageProgress', {
|
| 126 |
+
current: latest.pageIndex + 1,
|
| 127 |
+
total: latest.totalPages,
|
| 128 |
+
})
|
| 129 |
+
: undefined
|
| 130 |
+
const header =
|
| 131 |
+
count === 1 ? t('operations.warningsOne') : t('operations.warningsOther', { count })
|
| 132 |
+
return (
|
| 133 |
+
<div
|
| 134 |
+
data-testid='operation-warnings'
|
| 135 |
+
className='mt-3 rounded-lg border border-amber-200/70 bg-amber-50/80 p-2.5 dark:border-amber-900/70 dark:bg-amber-950/40'
|
| 136 |
+
>
|
| 137 |
+
<div className='flex items-start gap-2 text-amber-900 dark:text-amber-200'>
|
| 138 |
+
<AlertTriangleIcon className='mt-0.5 size-3.5 shrink-0' />
|
| 139 |
+
<div className='min-w-0 flex-1'>
|
| 140 |
+
<div className='text-[11px] font-semibold'>{header}</div>
|
| 141 |
+
<div className='mt-0.5 truncate text-[11px] text-amber-800/90 dark:text-amber-200/80'>
|
| 142 |
+
{[latest.stepId, pageLabel].filter(Boolean).join(' \u00b7 ')}
|
| 143 |
+
</div>
|
| 144 |
+
<div className='mt-1 line-clamp-2 text-[11px] break-words text-amber-900/80 dark:text-amber-100/80'>
|
| 145 |
+
{latest.message}
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
)
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
function JobCard({ job, onCancel, t }: { job: JobEntry; onCancel: () => void; t: TranslateFunc }) {
|
| 154 |
const progress: PipelineProgress | undefined = job.progress
|
| 155 |
const percent = clampProgress(progress?.overallPercent)
|
|
|
|
| 171 |
: undefined
|
| 172 |
const subtitle =
|
| 173 |
[pageText, stepLabel].filter(Boolean).join(' \u00b7 ') || t('operations.inProgress')
|
| 174 |
+
const warnings = job.warnings ?? []
|
| 175 |
|
| 176 |
return (
|
| 177 |
<BubbleCard>
|
|
|
|
| 187 |
</div>
|
| 188 |
</div>
|
| 189 |
<ProgressBar percent={percent} />
|
| 190 |
+
{warnings.length > 0 && <JobWarnings warnings={warnings} t={t} />}
|
| 191 |
<div className='mt-3 flex justify-end'>
|
| 192 |
<Button
|
| 193 |
data-testid='operation-cancel'
|
ui/lib/api/default/default.msw.ts
CHANGED
|
@@ -323,6 +323,16 @@ export const getEventsResponseMock = (): AppEvent =>
|
|
| 323 |
},
|
| 324 |
...{ event: faker.helpers.arrayElement(['jobProgress'] as const) },
|
| 325 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
{
|
| 327 |
...{
|
| 328 |
error: faker.helpers.arrayElement([
|
|
|
|
| 323 |
},
|
| 324 |
...{ event: faker.helpers.arrayElement(['jobProgress'] as const) },
|
| 325 |
},
|
| 326 |
+
{
|
| 327 |
+
...{
|
| 328 |
+
jobId: faker.string.alpha({ length: { min: 10, max: 20 } }),
|
| 329 |
+
message: faker.string.alpha({ length: { min: 10, max: 20 } }),
|
| 330 |
+
pageIndex: faker.number.int({ min: 0 }),
|
| 331 |
+
stepId: faker.string.alpha({ length: { min: 10, max: 20 } }),
|
| 332 |
+
totalPages: faker.number.int({ min: 0 }),
|
| 333 |
+
},
|
| 334 |
+
...{ event: faker.helpers.arrayElement(['jobWarning'] as const) },
|
| 335 |
+
},
|
| 336 |
{
|
| 337 |
...{
|
| 338 |
error: faker.helpers.arrayElement([
|
ui/lib/api/schemas/appEvent.ts
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
*/
|
| 6 |
import type { DownloadProgress } from './downloadProgress'
|
| 7 |
import type { JobFinishedEvent } from './jobFinishedEvent'
|
|
|
|
| 8 |
import type { LlmTarget } from './llmTarget'
|
| 9 |
import type { PipelineProgress } from './pipelineProgress'
|
| 10 |
import type { SnapshotEvent } from './snapshotEvent'
|
|
@@ -18,6 +19,9 @@ export type AppEvent =
|
|
| 18 |
| (PipelineProgress & {
|
| 19 |
event: 'jobProgress'
|
| 20 |
})
|
|
|
|
|
|
|
|
|
|
| 21 |
| (JobFinishedEvent & {
|
| 22 |
event: 'jobFinished'
|
| 23 |
})
|
|
|
|
| 5 |
*/
|
| 6 |
import type { DownloadProgress } from './downloadProgress'
|
| 7 |
import type { JobFinishedEvent } from './jobFinishedEvent'
|
| 8 |
+
import type { JobWarningEvent } from './jobWarningEvent'
|
| 9 |
import type { LlmTarget } from './llmTarget'
|
| 10 |
import type { PipelineProgress } from './pipelineProgress'
|
| 11 |
import type { SnapshotEvent } from './snapshotEvent'
|
|
|
|
| 19 |
| (PipelineProgress & {
|
| 20 |
event: 'jobProgress'
|
| 21 |
})
|
| 22 |
+
| (JobWarningEvent & {
|
| 23 |
+
event: 'jobWarning'
|
| 24 |
+
})
|
| 25 |
| (JobFinishedEvent & {
|
| 26 |
event: 'jobFinished'
|
| 27 |
})
|
ui/lib/api/schemas/index.ts
CHANGED
|
@@ -35,6 +35,7 @@ export * from './imageRole'
|
|
| 35 |
export * from './jobFinishedEvent'
|
| 36 |
export * from './jobStatus'
|
| 37 |
export * from './jobSummary'
|
|
|
|
| 38 |
export * from './listDownloadsResponse'
|
| 39 |
export * from './listOperationsResponse'
|
| 40 |
export * from './listProjectsResponse'
|
|
|
|
| 35 |
export * from './jobFinishedEvent'
|
| 36 |
export * from './jobStatus'
|
| 37 |
export * from './jobSummary'
|
| 38 |
+
export * from './jobWarningEvent'
|
| 39 |
export * from './listDownloadsResponse'
|
| 40 |
export * from './listOperationsResponse'
|
| 41 |
export * from './listProjectsResponse'
|
ui/lib/api/schemas/jobWarningEvent.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Generated by orval v8.8.0 🍺
|
| 3 |
+
* Do not edit manually.
|
| 4 |
+
* OpenAPI spec version: 0.0.1
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
/**
|
| 8 |
+
* A non-fatal step failure during a pipeline run. The pipeline recovers by
|
| 9 |
+
skipping the rest of the current page's steps and moving on to the next
|
| 10 |
+
page; the UI accumulates these into a list during the job.
|
| 11 |
+
*/
|
| 12 |
+
export interface JobWarningEvent {
|
| 13 |
+
jobId: string
|
| 14 |
+
message: string
|
| 15 |
+
/**
|
| 16 |
+
* 0-based page index where the failure happened.
|
| 17 |
+
* @minimum 0
|
| 18 |
+
*/
|
| 19 |
+
pageIndex: number
|
| 20 |
+
/** Engine id (e.g. `"lama-manga"`) of the step that failed. */
|
| 21 |
+
stepId: string
|
| 22 |
+
/** @minimum 0 */
|
| 23 |
+
totalPages: number
|
| 24 |
+
}
|
ui/lib/events.ts
CHANGED
|
@@ -151,6 +151,10 @@ function dispatch(event: AppEvent): void {
|
|
| 151 |
}
|
| 152 |
return
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
case 'jobFinished':
|
| 155 |
useJobsStore.getState().finished(event.id, event.status, event.error)
|
| 156 |
lastPageByJob.delete(event.id)
|
|
|
|
| 151 |
}
|
| 152 |
return
|
| 153 |
|
| 154 |
+
case 'jobWarning':
|
| 155 |
+
useJobsStore.getState().warning(event)
|
| 156 |
+
return
|
| 157 |
+
|
| 158 |
case 'jobFinished':
|
| 159 |
useJobsStore.getState().finished(event.id, event.status, event.error)
|
| 160 |
lastPageByJob.delete(event.id)
|
ui/lib/stores/jobsStore.ts
CHANGED
|
@@ -3,14 +3,17 @@
|
|
| 3 |
import { create } from 'zustand'
|
| 4 |
import { immer } from 'zustand/middleware/immer'
|
| 5 |
|
| 6 |
-
import type { JobSummary, PipelineProgress } from '@/lib/api/schemas'
|
| 7 |
|
| 8 |
/**
|
| 9 |
* Live job registry, fed by SSE. Keyed by id. `progress` is attached when
|
| 10 |
-
* the backend streams `JobProgress` for a running pipeline job.
|
|
|
|
|
|
|
| 11 |
*/
|
| 12 |
export type JobEntry = JobSummary & {
|
| 13 |
progress?: PipelineProgress
|
|
|
|
| 14 |
}
|
| 15 |
|
| 16 |
type JobsState = {
|
|
@@ -18,6 +21,7 @@ type JobsState = {
|
|
| 18 |
setSnapshot: (jobs: JobSummary[]) => void
|
| 19 |
started: (id: string, kind: string) => void
|
| 20 |
progress: (p: PipelineProgress) => void
|
|
|
|
| 21 |
finished: (id: string, status: JobSummary['status'], error: string | null | undefined) => void
|
| 22 |
clear: () => void
|
| 23 |
byStatus: (status: JobSummary['status']) => JobEntry[]
|
|
@@ -44,6 +48,16 @@ export const useJobsStore = create<JobsState>()(
|
|
| 44 |
}
|
| 45 |
s.jobs[p.jobId] = { ...existing, progress: p }
|
| 46 |
}),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
finished: (id, status, error) =>
|
| 48 |
set((s) => {
|
| 49 |
const existing = s.jobs[id] ?? { id, kind: 'pipeline', status }
|
|
|
|
| 3 |
import { create } from 'zustand'
|
| 4 |
import { immer } from 'zustand/middleware/immer'
|
| 5 |
|
| 6 |
+
import type { JobSummary, JobWarningEvent, PipelineProgress } from '@/lib/api/schemas'
|
| 7 |
|
| 8 |
/**
|
| 9 |
* Live job registry, fed by SSE. Keyed by id. `progress` is attached when
|
| 10 |
+
* the backend streams `JobProgress` for a running pipeline job. `warnings`
|
| 11 |
+
* accumulates non-fatal step failures as they arrive; the pipeline keeps
|
| 12 |
+
* running past them.
|
| 13 |
*/
|
| 14 |
export type JobEntry = JobSummary & {
|
| 15 |
progress?: PipelineProgress
|
| 16 |
+
warnings?: JobWarningEvent[]
|
| 17 |
}
|
| 18 |
|
| 19 |
type JobsState = {
|
|
|
|
| 21 |
setSnapshot: (jobs: JobSummary[]) => void
|
| 22 |
started: (id: string, kind: string) => void
|
| 23 |
progress: (p: PipelineProgress) => void
|
| 24 |
+
warning: (w: JobWarningEvent) => void
|
| 25 |
finished: (id: string, status: JobSummary['status'], error: string | null | undefined) => void
|
| 26 |
clear: () => void
|
| 27 |
byStatus: (status: JobSummary['status']) => JobEntry[]
|
|
|
|
| 48 |
}
|
| 49 |
s.jobs[p.jobId] = { ...existing, progress: p }
|
| 50 |
}),
|
| 51 |
+
warning: (w) =>
|
| 52 |
+
set((s) => {
|
| 53 |
+
const existing = s.jobs[w.jobId] ?? {
|
| 54 |
+
id: w.jobId,
|
| 55 |
+
kind: 'pipeline',
|
| 56 |
+
status: 'running' as JobSummary['status'],
|
| 57 |
+
}
|
| 58 |
+
const warnings = existing.warnings ?? []
|
| 59 |
+
s.jobs[w.jobId] = { ...existing, warnings: [...warnings, w] }
|
| 60 |
+
}),
|
| 61 |
finished: (id, status, error) =>
|
| 62 |
set((s) => {
|
| 63 |
const existing = s.jobs[id] ?? { id, kind: 'pipeline', status }
|
ui/openapi.json
CHANGED
|
@@ -894,6 +894,25 @@
|
|
| 894 |
}
|
| 895 |
]
|
| 896 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 897 |
{
|
| 898 |
"allOf": [
|
| 899 |
{
|
|
@@ -1574,6 +1593,32 @@
|
|
| 1574 |
}
|
| 1575 |
}
|
| 1576 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1577 |
"ListDownloadsResponse": {
|
| 1578 |
"type": "object",
|
| 1579 |
"required": ["downloads"],
|
|
|
|
| 894 |
}
|
| 895 |
]
|
| 896 |
},
|
| 897 |
+
{
|
| 898 |
+
"allOf": [
|
| 899 |
+
{
|
| 900 |
+
"$ref": "#/components/schemas/JobWarningEvent",
|
| 901 |
+
"description": "A single step on one page failed but the pipeline kept running.\nEmitted per failed step so clients can show a non-fatal warning while\nthe job continues with the next page."
|
| 902 |
+
},
|
| 903 |
+
{
|
| 904 |
+
"type": "object",
|
| 905 |
+
"required": ["event"],
|
| 906 |
+
"properties": {
|
| 907 |
+
"event": {
|
| 908 |
+
"type": "string",
|
| 909 |
+
"enum": ["jobWarning"]
|
| 910 |
+
}
|
| 911 |
+
}
|
| 912 |
+
}
|
| 913 |
+
],
|
| 914 |
+
"description": "A single step on one page failed but the pipeline kept running.\nEmitted per failed step so clients can show a non-fatal warning while\nthe job continues with the next page."
|
| 915 |
+
},
|
| 916 |
{
|
| 917 |
"allOf": [
|
| 918 |
{
|
|
|
|
| 1593 |
}
|
| 1594 |
}
|
| 1595 |
},
|
| 1596 |
+
"JobWarningEvent": {
|
| 1597 |
+
"type": "object",
|
| 1598 |
+
"description": "A non-fatal step failure during a pipeline run. The pipeline recovers by\nskipping the rest of the current page's steps and moving on to the next\npage; the UI accumulates these into a list during the job.",
|
| 1599 |
+
"required": ["jobId", "pageIndex", "totalPages", "stepId", "message"],
|
| 1600 |
+
"properties": {
|
| 1601 |
+
"jobId": {
|
| 1602 |
+
"type": "string"
|
| 1603 |
+
},
|
| 1604 |
+
"message": {
|
| 1605 |
+
"type": "string"
|
| 1606 |
+
},
|
| 1607 |
+
"pageIndex": {
|
| 1608 |
+
"type": "integer",
|
| 1609 |
+
"description": "0-based page index where the failure happened.",
|
| 1610 |
+
"minimum": 0
|
| 1611 |
+
},
|
| 1612 |
+
"stepId": {
|
| 1613 |
+
"type": "string",
|
| 1614 |
+
"description": "Engine id (e.g. `\"lama-manga\"`) of the step that failed."
|
| 1615 |
+
},
|
| 1616 |
+
"totalPages": {
|
| 1617 |
+
"type": "integer",
|
| 1618 |
+
"minimum": 0
|
| 1619 |
+
}
|
| 1620 |
+
}
|
| 1621 |
+
},
|
| 1622 |
"ListDownloadsResponse": {
|
| 1623 |
"type": "object",
|
| 1624 |
"required": ["downloads"],
|
ui/public/locales/en-US/translation.json
CHANGED
|
@@ -75,6 +75,8 @@
|
|
| 75 |
"processAll": "Processing all images",
|
| 76 |
"imageProgress": "Image {{current}} / {{total}}",
|
| 77 |
"stepProgress": "Step {{current}} / {{total}}: {{step}}",
|
|
|
|
|
|
|
| 78 |
"cancel": "Stop",
|
| 79 |
"cancelling": "Stopping..."
|
| 80 |
},
|
|
|
|
| 75 |
"processAll": "Processing all images",
|
| 76 |
"imageProgress": "Image {{current}} / {{total}}",
|
| 77 |
"stepProgress": "Step {{current}} / {{total}}: {{step}}",
|
| 78 |
+
"warningsOne": "1 step failed, continuing",
|
| 79 |
+
"warningsOther": "{{count}} steps failed, continuing",
|
| 80 |
"cancel": "Stop",
|
| 81 |
"cancelling": "Stopping..."
|
| 82 |
},
|
ui/public/locales/es-ES/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "Procesando todas las imágenes",
|
| 62 |
"imageProgress": "Imagen {{current}} / {{total}}",
|
| 63 |
"stepProgress": "Paso {{current}} / {{total}}: {{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "Detener",
|
| 65 |
"cancelling": "Deteniendo..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "Procesando todas las imágenes",
|
| 62 |
"imageProgress": "Imagen {{current}} / {{total}}",
|
| 63 |
"stepProgress": "Paso {{current}} / {{total}}: {{step}}",
|
| 64 |
+
"warningsOne": "1 paso falló, continuando",
|
| 65 |
+
"warningsOther": "{{count}} pasos fallaron, continuando",
|
| 66 |
"cancel": "Detener",
|
| 67 |
"cancelling": "Deteniendo..."
|
| 68 |
},
|
ui/public/locales/ja-JP/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "すべての画像を一括処理中",
|
| 62 |
"imageProgress": "画像 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "ステップ {{current}} / {{total}}:{{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "停止",
|
| 65 |
"cancelling": "停止中..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "すべての画像を一括処理中",
|
| 62 |
"imageProgress": "画像 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "ステップ {{current}} / {{total}}:{{step}}",
|
| 64 |
+
"warningsOne": "1 つのステップが失敗しましたが、続行します",
|
| 65 |
+
"warningsOther": "{{count}} 個のステップが失敗しましたが、続行します",
|
| 66 |
"cancel": "停止",
|
| 67 |
"cancelling": "停止中..."
|
| 68 |
},
|
ui/public/locales/ko-KR/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "모든 이미지 처리 중",
|
| 62 |
"imageProgress": "이미지 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "단계 {{current}} / {{total}}: {{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "취소",
|
| 65 |
"cancelling": "취소 중..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "모든 이미지 처리 중",
|
| 62 |
"imageProgress": "이미지 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "단계 {{current}} / {{total}}: {{step}}",
|
| 64 |
+
"warningsOne": "1개 단계 실패, 계속 진행",
|
| 65 |
+
"warningsOther": "{{count}}개 단계 실패, 계속 진행",
|
| 66 |
"cancel": "취소",
|
| 67 |
"cancelling": "취소 중..."
|
| 68 |
},
|
ui/public/locales/pt-BR/translation.json
CHANGED
|
@@ -62,6 +62,8 @@
|
|
| 62 |
"processAll": "Processando todas as imagens",
|
| 63 |
"imageProgress": "Imagem {{current}} / {{total}}",
|
| 64 |
"stepProgress": "Etapa {{current}} / {{total}}: {{step}}",
|
|
|
|
|
|
|
| 65 |
"cancel": "Parar",
|
| 66 |
"cancelling": "Parando..."
|
| 67 |
},
|
|
|
|
| 62 |
"processAll": "Processando todas as imagens",
|
| 63 |
"imageProgress": "Imagem {{current}} / {{total}}",
|
| 64 |
"stepProgress": "Etapa {{current}} / {{total}}: {{step}}",
|
| 65 |
+
"warningsOne": "1 etapa falhou, continuando",
|
| 66 |
+
"warningsOther": "{{count}} etapas falharam, continuando",
|
| 67 |
"cancel": "Parar",
|
| 68 |
"cancelling": "Parando..."
|
| 69 |
},
|
ui/public/locales/ru-RU/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "Обработка всех изображений",
|
| 62 |
"imageProgress": "Изображение {{current}} / {{total}}",
|
| 63 |
"stepProgress": "Шаг {{current}} / {{total}}: {{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "Остановить",
|
| 65 |
"cancelling": "Остановка..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "Обработка всех изображений",
|
| 62 |
"imageProgress": "Изображение {{current}} / {{total}}",
|
| 63 |
"stepProgress": "Шаг {{current}} / {{total}}: {{step}}",
|
| 64 |
+
"warningsOne": "1 шаг не выполнен, продолжаем",
|
| 65 |
+
"warningsOther": "{{count}} шагов не выполнены, продолжаем",
|
| 66 |
"cancel": "Остановить",
|
| 67 |
"cancelling": "Остановка..."
|
| 68 |
},
|
ui/public/locales/tr-TR/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "Tüm görseller işleniyor",
|
| 62 |
"imageProgress": "Görsel {{current}} / {{total}}",
|
| 63 |
"stepProgress": "Adım {{current}} / {{total}}: {{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "Durdur",
|
| 65 |
"cancelling": "Durduruluyor..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "Tüm görseller işleniyor",
|
| 62 |
"imageProgress": "Görsel {{current}} / {{total}}",
|
| 63 |
"stepProgress": "Adım {{current}} / {{total}}: {{step}}",
|
| 64 |
+
"warningsOne": "1 adım başarısız oldu, devam ediliyor",
|
| 65 |
+
"warningsOther": "{{count}} adım başarısız oldu, devam ediliyor",
|
| 66 |
"cancel": "Durdur",
|
| 67 |
"cancelling": "Durduruluyor..."
|
| 68 |
},
|
ui/public/locales/zh-CN/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "正在批量处理所有图片",
|
| 62 |
"imageProgress": "图片 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "步骤 {{current}} / {{total}}:{{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "停止",
|
| 65 |
"cancelling": "正在停止..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "正在批量处理所有图片",
|
| 62 |
"imageProgress": "图片 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "步骤 {{current}} / {{total}}:{{step}}",
|
| 64 |
+
"warningsOne": "1 个步骤失败,继续处理",
|
| 65 |
+
"warningsOther": "{{count}} 个步骤失败,继续处理",
|
| 66 |
"cancel": "停止",
|
| 67 |
"cancelling": "正在停止..."
|
| 68 |
},
|
ui/public/locales/zh-TW/translation.json
CHANGED
|
@@ -61,6 +61,8 @@
|
|
| 61 |
"processAll": "正在批次處理所有圖片",
|
| 62 |
"imageProgress": "圖片 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "步驟 {{current}} / {{total}}:{{step}}",
|
|
|
|
|
|
|
| 64 |
"cancel": "停止",
|
| 65 |
"cancelling": "停止中..."
|
| 66 |
},
|
|
|
|
| 61 |
"processAll": "正在批次處理所有圖片",
|
| 62 |
"imageProgress": "圖片 {{current}} / {{total}}",
|
| 63 |
"stepProgress": "步驟 {{current}} / {{total}}:{{step}}",
|
| 64 |
+
"warningsOne": "1 個步驟失敗,繼續處理",
|
| 65 |
+
"warningsOther": "{{count}} 個步驟失敗,繼續處理",
|
| 66 |
"cancel": "停止",
|
| 67 |
"cancelling": "停止中..."
|
| 68 |
},
|