Spaces:

ConradLax
/

doc-classifier

Running

File size: 3,998 Bytes

import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.10.1';

// Since we will download the model from the Hugging Face Hub, we can skip the local model check
env.allowLocalModels = false;

// Reference the elements that we will need
const status = document.getElementById('status');
const fileUpload = document.getElementById('upload');
const imageContainer = document.getElementById('container');
const example = document.getElementById('example');

const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';

// Create a new object detection pipeline
status.textContent = 'Loading model...';
const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
status.textContent = 'Ready';

example.addEventListener('click', (e) => {
    e.preventDefault();
    detect(EXAMPLE_URL);
});

fileUpload.addEventListener('change', function (e) {
    const file = e.target.files[0];
    if (!file) {
        return;
    }

    const reader = new FileReader();

    // Set up a callback when the file is loaded
    reader.onload = e2 => detect(e2.target.result);

    reader.readAsDataURL(file);
});


// Detect objects in the image
async function detect(img) {
    imageContainer.innerHTML = '';
    imageContainer.style.backgroundImage = `url(${img})`;

    status.textContent = 'Analysing...';
    const output = await detector(img, {
        threshold: 0.5,
        percentage: true,
    });
    status.textContent = '';
    output.forEach(renderBox);
}

// Render a bounding box and label on the image
function renderBox({ box, label }) {
    const { xmax, xmin, ymax, ymin } = box;

    // Generate a random color for the box
    const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);

    // Draw the box
    const boxElement = document.createElement('div');
    boxElement.className = 'bounding-box';
    Object.assign(boxElement.style, {
        borderColor: color,
        left: 100 * xmin + '%',
        top: 100 * ymin + '%',
        width: 100 * (xmax - xmin) + '%',
        height: 100 * (ymax - ymin) + '%',
    })

    // Draw label
    const labelElement = document.createElement('span');
    labelElement.textContent = label;
    labelElement.className = 'bounding-box-label';
    labelElement.style.backgroundColor = color;

    boxElement.appendChild(labelElement);
    imageContainer.appendChild(boxElement);
}


model = VisionEncoderDecoderModel.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")
processor = DonutProcessor.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")

function doctype_classify(image_input, filename) {
    model = classifier_doctype_model
    processor = classifier_doctype_processor
    seq, is_confident = inference(image_input, model, processor, threshold=0.90, task_prompt="<s_classifier_acct>", get_confidence=True)
    return seq.get('class')    
}

function inference(input, model, processor, threshold=1.0, task_prompt="", get_confidence=False){
    is_confident = True
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

    pil_img=input

    image = np.array(pil_img)
    pixel_values = processor(image, return_tensors="pt").pixel_values

    outputs = model.generate(
        early_stopping=True,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id= processor.tokenizer.eos_token_id,
        use_cache=True,
        num_beams=1,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    console.log(sequence)
    //  sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()

    // seq = processor.token2json(sequence)

    return seq
}