import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.10.1'; // Since we will download the model from the Hugging Face Hub, we can skip the local model check env.allowLocalModels = false; // Reference the elements that we will need const status = document.getElementById('status'); const fileUpload = document.getElementById('upload'); const imageContainer = document.getElementById('container'); const example = document.getElementById('example'); const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg'; // Create a new object detection pipeline status.textContent = 'Loading model...'; const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50'); status.textContent = 'Ready'; example.addEventListener('click', (e) => { e.preventDefault(); detect(EXAMPLE_URL); }); fileUpload.addEventListener('change', function (e) { const file = e.target.files[0]; if (!file) { return; } const reader = new FileReader(); // Set up a callback when the file is loaded reader.onload = e2 => detect(e2.target.result); reader.readAsDataURL(file); }); // Detect objects in the image async function detect(img) { imageContainer.innerHTML = ''; imageContainer.style.backgroundImage = `url(${img})`; status.textContent = 'Analysing...'; const output = await detector(img, { threshold: 0.5, percentage: true, }); status.textContent = ''; output.forEach(renderBox); } // Render a bounding box and label on the image function renderBox({ box, label }) { const { xmax, xmin, ymax, ymin } = box; // Generate a random color for the box const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0); // Draw the box const boxElement = document.createElement('div'); boxElement.className = 'bounding-box'; Object.assign(boxElement.style, { borderColor: color, left: 100 * xmin + '%', top: 100 * ymin + '%', width: 100 * (xmax - xmin) + '%', height: 100 * (ymax - ymin) + '%', }) // Draw label const labelElement = document.createElement('span'); labelElement.textContent = label; labelElement.className = 'bounding-box-label'; labelElement.style.backgroundColor = color; boxElement.appendChild(labelElement); imageContainer.appendChild(boxElement); } model = VisionEncoderDecoderModel.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype") processor = DonutProcessor.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype") function doctype_classify(image_input, filename) { model = classifier_doctype_model processor = classifier_doctype_processor seq, is_confident = inference(image_input, model, processor, threshold=0.90, task_prompt="", get_confidence=True) return seq.get('class') } function inference(input, model, processor, threshold=1.0, task_prompt="", get_confidence=False){ is_confident = True decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids pil_img=input image = np.array(pil_img) pixel_values = processor(image, return_tensors="pt").pixel_values outputs = model.generate( early_stopping=True, pad_token_id=processor.tokenizer.pad_token_id, eos_token_id= processor.tokenizer.eos_token_id, use_cache=True, num_beams=1, bad_words_ids=[[processor.tokenizer.unk_token_id]], return_dict_in_generate=True, output_scores=True, ) sequence = processor.batch_decode(outputs.sequences)[0] sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") console.log(sequence) // sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() // seq = processor.token2json(sequence) return seq }