| <!DOCTYPE html> |
| <html lang="en"> |
|
|
| <head> |
| <meta charset="UTF-8"> |
| <title>Image to Text - Hugging Face Transformers.js</title> |
|
|
| <script type="module"> |
| |
| import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.5.4'; |
| |
| |
| window.pipeline = pipeline; |
| </script> |
|
|
| <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.0/dist/css/bootstrap.min.css" rel="stylesheet"> |
|
|
| <link rel="stylesheet" href="css/styles.css"></head> |
| <body> |
| <div class="container-main"> |
|
|
| |
| <div class="row mt-5"> |
| <div class="col-md-12 text-center"> |
| <a href="index.html" class="btn btn-outline-secondary" |
| style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a> |
| </div> |
| </div> |
|
|
| |
| <div class="container mt-5"> |
| |
| <div class="text-center"> |
| <h2>Computer Vision</h2> |
| <h4>Image to Text</h4> |
| </div> |
|
|
| |
| <div id="image-to-text-container" class="container mt-4"> |
| <h5>Generate a Caption for an Image w/ Xenova/vit-gpt2-image-captionin:</h5> |
| <div class="d-flex align-items-center"> |
| <label for="imageToTextURLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter |
| image to Caption URL:</label> |
| <input type="text" class="form-control flex-grow-1" id="imageToTextURLText" |
| value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg" |
| placeholder="Enter image" style="margin-right: 15px; margin-left: 15px;"> |
| <button id="ImagetoTextButton" class="btn btn-primary" onclick="captionImage()">Caption</button> |
| </div> |
| <div class="mt-4"> |
| <h4>Output:</h4> |
| <pre id="outputArea"></pre> |
| </div> |
| </div> |
|
|
| <hr> |
|
|
| <div id="image-to-text-local-container" class="container mt-4"> |
| <h5>Generate a Caption for a Local Image:</h5> |
| <div class="d-flex align-items-center"> |
| <label for="imagetoTextLocalFile" class="mb-0 text-nowrap" |
| style="margin-right: 15px;">Select Local Image:</label> |
| <input type="file" id="imagetoTextLocalFile" accept="image/*" /> |
| <button id="CaptionButtonLocal" class="btn btn-primary" |
| onclick="captionImageLocal()">Caption</button> |
| </div> |
| <div class="mt-4"> |
| <h4>Output:</h4> |
| <pre id="outputAreaLocal"></pre> |
| </div> |
| </div> |
|
|
| |
| <div class="row mt-5"> |
| <div class="col-md-12 text-center"> |
| <a href="index.html" class="btn btn-outline-secondary" |
| style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a> |
| </div> |
| </div> |
| </div> |
| </div> |
|
|
| <script> |
| |
| let captioner; |
| |
| |
| async function initializeModel() { |
| captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning'); |
| |
| } |
| |
| async function captionImage() { |
| const textFieldValue = document.getElementById("imageToTextURLText").value.trim(); |
| |
| const result = await captioner(textFieldValue); |
| |
| document.getElementById("outputArea").innerText = JSON.stringify(result, null, 2); |
| } |
| |
| async function captionImageLocal() { |
| const fileInput = document.getElementById("imagetoTextLocalFile"); |
| const file = fileInput.files[0]; |
| |
| if (!file) { |
| alert('Please select an image file first.'); |
| return; |
| } |
| |
| |
| const url = URL.createObjectURL(file); |
| |
| const result = await captioner(url); |
| |
| document.getElementById("outputAreaLocal").innerText = JSON.stringify(result, null, 2); |
| } |
| |
| |
| window.addEventListener("DOMContentLoaded", initializeModel); |
| </script> |
| </body> |
|
|
| </html> |