starry / backend /libs /predictJianpuPages.ts
k-l-lambda's picture
feat: add Python ML services (CPU mode) with model download
2b7aae2
import { Canvas, Image, loadImage } from 'skia-canvas';
import sha1 from 'sha1';
import { WeakLRUCache } from 'weak-lru-cache';
import * as starry from '../../src/starry';
import { LayoutResult, PyClients } from './predictors';
import { constructSystem, convertImage } from './util';
import { SemanticGraph } from '../../src/starry';
globalThis.OffscreenCanvas = (globalThis as any).OffscreenCanvas || Canvas;
globalThis.Image = globalThis.Image || Image;
globalThis.btoa = globalThis.btoa || ((str: string) => Buffer.from(str, 'binary').toString('base64'));
const STAFF_PADDING_LEFT = 32;
const MAX_PAGE_WIDTH = 1200;
const GAUGE_VISION_SPEC = {
viewportHeight: 256,
viewportUnit: 8,
};
const MASK_VISION_SPEC = {
viewportHeight: 192,
viewportUnit: 8,
};
const SEMANTIC_VISION_SPEC = {
viewportHeight: 192,
viewportUnit: 8,
};
interface OMRStat {
cost: number; // in milliseconds
pagesCost: number; // in milliseconds
pages: number;
}
interface OMRSummary {
costTotal: number; // in milliseconds
costPerPage: number; // in milliseconds
pagesTotal: number;
scoreN: number;
}
/**
* 为布局识别的图片标准化处理
* @param image
* @param width
*/
function scaleForLayout(image: Image, width: number): Canvas {
let height = (image.height / image.width) * width;
const canvas = new Canvas(width, height);
const ctx = canvas.getContext('2d');
ctx.drawImage(image, 0, 0, width, (width * image.height) / image.width);
return canvas;
}
/**
* 根据所有图像的检测结果设置合适的全局页面尺寸
* @param score
* @param detections
* @param outputWidth
*/
function setGlobalPageSize(score: starry.Score, detections: LayoutResult[], outputWidth: number) {
const sizeRatios = detections
.filter((s) => s && s.detection)
.map((v, k) => {
const staffInterval = Math.min(...v.detection.areas.filter((area) => area.staves?.middleRhos?.length).map((x) => x.staves.interval));
const sourceSize = v.sourceSize;
return {
...v,
index: k,
vw: sourceSize.width / staffInterval, // 页面宽度(逻辑单位)
hwr: sourceSize.height / sourceSize.width, // 页面高宽比
};
});
if (!sizeRatios.length) {
throw new Error('empty result');
}
const maxVW = sizeRatios.sort((a, b) => b.vw - a.vw)[0];
const maxAspect = Math.max(...sizeRatios.map((r) => r.hwr));
score.unitSize = outputWidth / maxVW.vw;
// 页面显示尺寸
score.pageSize = {
width: outputWidth,
height: outputWidth * maxAspect,
};
}
const batchTask = (fn: () => Promise<any>) => fn();
const concurrencyTask = (fns: (() => Promise<any>)[]) => Promise.all(fns.map((fn) => fn()));
const shootStaffImage = async (
system: starry.System,
staffIndex: number,
{ paddingLeft = 0, scaling = 1, spec }: { paddingLeft?: number; scaling?: number; spec: { viewportHeight: number; viewportUnit: number } }
): Promise<Canvas> => {
if (!system || !system.backgroundImage) return null;
const staff = system.staves[staffIndex];
if (!staff) return null;
const middleUnits = spec.viewportHeight / spec.viewportUnit / 2;
const width = system.imagePosition.width * spec.viewportUnit;
const height = system.imagePosition.height * spec.viewportUnit;
const x = system.imagePosition.x * spec.viewportUnit + paddingLeft;
const y = (system.imagePosition.y - (staff.top + staff.staffY - middleUnits)) * spec.viewportUnit;
const canvas = new Canvas(Math.round(width + x) * scaling, spec.viewportHeight * scaling);
const context = canvas.getContext('2d');
context.fillStyle = 'white';
context.fillRect(0, 0, canvas.width, canvas.height);
context.drawImage(await loadImage(system.backgroundImage), x * scaling, y * scaling, width * scaling, height * scaling);
return canvas;
// .substr(22); // remove the prefix of 'data:image/png;base64,'
};
/**
* 根据布局检测结果进行截图
* @param score
* @param pageCanvas
* @param page
* @param detection
*/
async function shootImageByDetection({
page,
score,
pageCanvas,
}: {
score: starry.Score;
page: starry.Page;
pageCanvas: Canvas; // 原始图片绘制好的canvas
}) {
if (!page?.layout?.areas?.length) {
return null;
}
page.width = score.pageSize.width / score.unitSize;
page.height = score.pageSize.height / score.unitSize;
const correctCanvas = new Canvas(pageCanvas.width, pageCanvas.height);
const ctx = correctCanvas.getContext('2d');
ctx.save();
const { width, height } = correctCanvas;
const [a, b, c, d] = page.source.matrix;
ctx.setTransform(a, b, c, d, (-1 / 2) * width + (1 / 2) * a * width + (1 / 2) * b * height, (-1 / 2) * height + (1 / 2) * c * width + (1 / 2) * d * height);
ctx.drawImage(pageCanvas, 0, 0);
ctx.restore();
const interval = page.source.interval;
page.layout.areas.map((area, systemIndex) => {
console.assert(area.staves?.middleRhos?.length, '[shootImageByDetection] empty area:', area);
const data = ctx.getImageData(area.x, area.y, area.width, area.height);
const canvas = new Canvas(area.width, area.height);
const context = canvas.getContext('2d');
// context.rotate(-area.staves.theta);
context.putImageData(data, 0, 0);
const detection = area.staves;
const size = { width: area.width, height: area.height };
const sourceCenter = {
x: pageCanvas.width / 2 / interval,
y: pageCanvas.height / 2 / interval,
};
const position = {
x: (area.x + area.staves.phi1) / interval - sourceCenter.x + page.width / 2,
y: area.y / interval - sourceCenter.y + page.height / 2,
};
page.systems[systemIndex] = constructSystem({
page,
backgroundImage: canvas.toBufferSync('png'),
detection,
imageSize: size,
position,
});
});
return correctCanvas;
}
async function shootStaffBackgroundImage({ system, staff, staffIndex }: { system: starry.System; staff: starry.Staff; staffIndex: number }) {
const sourceCanvas = await shootStaffImage(system, staffIndex, {
paddingLeft: STAFF_PADDING_LEFT,
spec: SEMANTIC_VISION_SPEC,
});
staff.backgroundImage = sourceCanvas.toBufferSync('png');
staff.imagePosition = {
x: -STAFF_PADDING_LEFT / SEMANTIC_VISION_SPEC.viewportUnit,
y: staff.staffY - SEMANTIC_VISION_SPEC.viewportHeight / 2 / SEMANTIC_VISION_SPEC.viewportUnit,
width: sourceCanvas.width / SEMANTIC_VISION_SPEC.viewportUnit,
height: sourceCanvas.height / SEMANTIC_VISION_SPEC.viewportUnit,
};
}
/**
* 单个staff的变形矫正
* @param system
* @param staff
* @param staffIndex
* @param gaugeImage
* @param pyClients
*/
async function gaugeStaff({
system,
staff,
staffIndex,
gaugeImage,
pyClients,
}: {
system: starry.System;
staff: starry.Staff;
staffIndex: number;
gaugeImage: Buffer;
pyClients: PyClients;
}) {
const sourceCanvas = await shootStaffImage(system, staffIndex, {
paddingLeft: STAFF_PADDING_LEFT,
spec: GAUGE_VISION_SPEC,
scaling: 2,
});
const sourceBuffer = sourceCanvas.toBufferSync('png');
const baseY = (system.middleY - (staff.top + staff.staffY)) * GAUGE_VISION_SPEC.viewportUnit + GAUGE_VISION_SPEC.viewportHeight / 2;
const { buffer, size } = await pyClients.predictScoreImages('gaugeRenderer', [sourceBuffer, gaugeImage, baseY]);
staff.backgroundImage = buffer;
staff.imagePosition = {
x: -STAFF_PADDING_LEFT / GAUGE_VISION_SPEC.viewportUnit,
y: staff.staffY - GAUGE_VISION_SPEC.viewportHeight / 2 / GAUGE_VISION_SPEC.viewportUnit,
width: size.width / GAUGE_VISION_SPEC.viewportUnit,
height: size.height / GAUGE_VISION_SPEC.viewportUnit,
};
staff.maskImage = null;
}
/**
* 单个staff的降噪
* @param staff
* @param staffIndex
* @param maskImage
*/
async function maskStaff({ staff, staffIndex, maskImage }: { staff: starry.Staff; staffIndex: number; maskImage: Buffer }) {
const img = await loadImage(maskImage);
staff.maskImage = maskImage;
staff.imagePosition = {
x: -STAFF_PADDING_LEFT / MASK_VISION_SPEC.viewportUnit,
y: staff.staffY - MASK_VISION_SPEC.viewportHeight / 2 / MASK_VISION_SPEC.viewportUnit,
width: img.width / MASK_VISION_SPEC.viewportUnit,
height: img.height / MASK_VISION_SPEC.viewportUnit,
};
}
/**
* 单个staff的语义识别
* @param score
* @param staffIndex
* @param system
* @param staff
* @param graph
*/
async function semanticStaff({
score,
staffIndex,
system,
staff,
graph,
}: {
score: starry.Score;
staffIndex: number;
system: starry.System;
staff: starry.Staff;
graph: SemanticGraph;
}) {
graph.offset(-STAFF_PADDING_LEFT / SEMANTIC_VISION_SPEC.viewportUnit, 0);
system.assignSemantics(staffIndex, graph);
staff.assignSemantics(graph);
staff.clearPredictedTokens();
score.assembleSystem(system, score.settings?.semanticConfidenceThreshold || 1);
}
function replacePageImages(page: starry.Page, onReplaceImageKey: (src: string) => any) {
const tasks = [
[page.source, 'url'],
...page.systems
.map((system) => {
return [
[system, 'backgroundImage'],
...system.staves
.map((staff) => [
[staff, 'backgroundImage'],
[staff, 'maskImage'],
])
.flat(),
];
})
.flat(),
];
tasks.map(([target, key]: [any, string]) => {
target[key] = onReplaceImageKey(target[key]);
});
}
export type TaskProgress = { total?: number; finished?: number };
export interface OMRPage {
url: string | Buffer;
key?: string;
layout?: LayoutResult;
renew?: boolean;
enableGauge?: boolean;
}
export interface ProgressState {
layout?: TaskProgress;
text?: TaskProgress;
gauge?: TaskProgress;
mask?: TaskProgress;
semantic?: TaskProgress;
regulate?: TaskProgress;
brackets?: TaskProgress;
}
class OMRProgress {
state: ProgressState = {};
onChange: (evt: ProgressState) => void;
constructor(onChange: (evt: ProgressState) => void) {
this.onChange = onChange;
}
setTotal(stage: keyof ProgressState, total: number) {
this.state[stage] = this.state[stage] || {
total,
finished: 0,
};
}
increase(stage: keyof ProgressState, step = 1) {
const info: TaskProgress = this.state[stage] || {
finished: 0,
};
info.finished += step;
this.onChange(this.state);
}
}
type SourceImage = string | Buffer;
export interface OMROption {
outputWidth?: number;
title?: string; // 曲谱标题
pageStore?: {
has?: (key: string) => Promise<Boolean>;
get: (key: string) => Promise<string>;
set: (key: string, val: string) => Promise<void>;
};
renew?: boolean;
processes?: (keyof ProgressState)[]; // 选择流程
onProgress?: (progress: ProgressState) => void;
onReplaceImage?: (src: SourceImage) => Promise<string>; // 替换所有图片地址,用于上传或者格式转换
}
const lruCache = new WeakLRUCache();
// 默认store
const pageStore = {
async get(key: string) {
return lruCache.getValue(key) as string;
},
async set(key: string, val: string) {
lruCache.setValue(key, val);
},
};
/**
* 默认将图片转换为webp格式的base64字符串
* @param src
*/
const onReplaceImage = async (src: SourceImage) => {
if (src instanceof Buffer || (typeof src === 'string' && (/^https?:\/\//.test(src) || /^data:image\//.test(src)))) {
const webpBuffer = (await convertImage(src)).buffer;
return `data:image/webp;base64,${webpBuffer.toString('base64')}`;
}
return src;
};
/**
* 识别所有图片
* @param pyClients
* @param images
* @param option
*/
export const predictPages = async (
pyClients: PyClients,
images: OMRPage[],
option: OMROption = { outputWidth: 1200, pageStore, onReplaceImage }
): Promise<{ score: starry.Score; omitPages: number[]; stat: OMRStat }> => {
// return {
// score,
// omitPages,
// stat: {
// cost: t3 - t0,
// pagesCost: t2 - t1,
// pages: n_page,
// },
// };
};