Spaces:
Running
Running
File size: 1,937 Bytes
ca97aa9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import { AutoProcessor, VLChatProcessor } from "../../../src/transformers.js";
import { MAX_PROCESSOR_LOAD_TIME, MAX_TEST_EXECUTION_TIME } from "../../init.js";
export default () => {
describe("VLChatProcessor", () => {
const model_id = "onnx-community/Janus-1.3B-ONNX";
/** @type {VLChatProcessor} */
let processor;
beforeAll(async () => {
processor = await AutoProcessor.from_pretrained(model_id);
}, MAX_PROCESSOR_LOAD_TIME);
it(
"Image and text",
async () => {
// Prepare inputs
const conversation = [
{
role: "User",
content: "<image_placeholder>\nConvert the formula into latex code.",
images: ["https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/quadratic_formula.png"],
},
];
const { input_ids, attention_mask, images_seq_mask, images_emb_mask, pixel_values, original_sizes, reshaped_input_sizes } = await processor(conversation);
const num_tokens = 631;
const { num_image_tokens } = processor.config; // 576
const { image_size } = processor.image_processor.config; // 384
expect(input_ids.dims).toEqual([1, num_tokens]);
expect(attention_mask.dims).toEqual([1, num_tokens]);
expect(images_seq_mask.dims).toEqual([1, num_tokens]);
expect(images_seq_mask.to("float32").mean().item()).toBeCloseTo(num_image_tokens / num_tokens, 6);
expect(images_emb_mask.dims).toEqual([1, 1, num_image_tokens]);
expect(images_emb_mask.to("float32").mean().item()).toBeCloseTo(1);
expect(pixel_values.dims).toEqual([1, 1, 3, image_size, image_size]);
expect(pixel_values.mean().item()).toBeCloseTo(0.5999642610549927, 6);
expect(original_sizes).toEqual([[206, 767]]);
expect(reshaped_input_sizes).toEqual([[103, image_size]]);
},
MAX_TEST_EXECUTION_TIME,
);
});
};
|