File size: 6,584 Bytes
b6999e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import { VectorSearch } from '/assets/vectorsearch-min.js';

// DOM references.
const DB_NAME_INPUT = document.getElementById('db-name-input');
const DB_SELECT = document.getElementById('db-select');
const STATUS_EL = document.getElementById('status');
const QUERY_EMBEDDING_TEXT = document.getElementById('query-embedding-text');
const QUERY_TOKENS_OUTPUT = document.getElementById('query-tokens-output');
const QUERY_EMBEDDING_VIZ = document.getElementById('query-embedding-viz');
const BEST_MATCH_EMBEDDING_VIZ = document.getElementById('best-match-embedding-viz');
const BEST_MATCH_EMBEDDING_TEXT = document.getElementById('best-match-embedding-text');
const INPUT_TEXT = document.getElementById('input-text');
const TARGET_TEXT = document.getElementById('target-text');
const STORE_BTN = document.getElementById('store-btn');
const PREDICT_BTN = document.getElementById('predict-btn');
const THRESHOLD_INPUT = document.getElementById('threshold-input');
const THRESHOLD_VALUE = document.getElementById('threshold-value');
const RESULTS_TEXT = document.getElementById('results-text');
const SIMILARITY_CONTAINER = document.getElementById('similarity-container');
const SIMILARITY_SCORE_EL = document.getElementById('similarity-score');
const SIMILARITY_LABEL_EL = document.getElementById('similarity-label');


// Embedding Model Configuration.
const MODEL_RUNTIME = 'transformersjs'; // OPTIONS 'transformersjs' OR 'litertjs'
const MODEL_URL = 'huggingworld/all-MiniLM-L6-v2'; // OPTIONS 'Xenova/all-MiniLM-L6-v2' if transformersjs runtime or model/embeddinggemma-300M_seq1024_mixed-precision.tflite if litertjs runtime
const SEQ_LENGTH = 1024;
const TOKENIZER = 'huggingworld/embeddinggemma-300m-ONNX';
const EMBEDDING_MODEL_CONFIG = {
  runtime: MODEL_RUNTIME,
  url: MODEL_URL,
  sequenceLength: SEQ_LENGTH,
  tokenizer: TOKENIZER
};

// Instantiate VectorSearch Master Class.
const VECTOR_SEARCH = new VectorSearch(EMBEDDING_MODEL_CONFIG);


async function predictBtnClickHandler() {
  const QUERY_TEXT_VALUE = TARGET_TEXT.value;
  const THRESHOLD = parseFloat(THRESHOLD_INPUT.value) || 0.5;
  const SELECTED_DB = DB_SELECT.value;

  if (QUERY_TEXT_VALUE && SELECTED_DB) {
    VECTOR_SEARCH.setDb(SELECTED_DB);
    PREDICT_BTN.disabled = true;
    STATUS_EL.innerText = `Searching VectorDB (${SELECTED_DB})...`;
    const t0 = performance.now();
    await predict(QUERY_TEXT_VALUE, THRESHOLD);
    const t1 = performance.now();
    console.log(`Total search time (query embedding + vector search) took ${t1 - t0} milliseconds.`);
    STATUS_EL.innerText = 'Search complete';
    PREDICT_BTN.disabled = false;
  }
}


async function storeBtnClickHandler() {
  const text = INPUT_TEXT.value.trim();
  const dbName = DB_NAME_INPUT.value.trim();
  if (!text || !dbName) return;

  STORE_BTN.disabled = true;

  const paragraphs = text.split(/\n\s*\n/).map(p => p.trim()).filter(p => p.length > 0);

  await VECTOR_SEARCH.storeTexts(paragraphs, dbName, STATUS_EL);

  STATUS_EL.innerText = `Stored ${paragraphs.length} paragraphs.`;
  STORE_BTN.disabled = false;
  INPUT_TEXT.value = '';

  await updateDbList();
}


async function load() {
  try {
    await updateDbList();

    // Actually load the runtime and model so ready to use.
    await VECTOR_SEARCH.load(STATUS_EL);

    STATUS_EL.innerText = 'Ready to store and search';
    STORE_BTN.disabled = false;
    PREDICT_BTN.disabled = false;

    STORE_BTN.addEventListener('click', storeBtnClickHandler);
    PREDICT_BTN.addEventListener('click', predictBtnClickHandler);
    THRESHOLD_INPUT.addEventListener('input', () => {
      THRESHOLD_VALUE.innerText = THRESHOLD_INPUT.value;
    });
  } catch (e) {
    console.error(e);
    STATUS_EL.innerText = 'Error: ' + e.message;
  }
}


async function predict(queryText, threshold) {
  // Visualize embeddings and tokens for the search query text.
  const { embedding: EMBEDDING_DATA, tokens: TOKENS } = await VECTOR_SEARCH.getEmbedding(queryText);
  if (TOKENS) {
    VECTOR_SEARCH.renderTokens(TOKENS, QUERY_TOKENS_OUTPUT);
  }
  await VECTOR_SEARCH.renderEmbedding(EMBEDDING_DATA, QUERY_EMBEDDING_VIZ, QUERY_EMBEDDING_TEXT);

  // Now actually search the vector database.
  const { results: RESULTS, bestScore: BEST_SCORE, bestIndex: BEST_INDEX } = await VECTOR_SEARCH.search(EMBEDDING_DATA, threshold, DB_SELECT.value);

  if (RESULTS.length > 0) {
    RESULTS_TEXT.value = RESULTS.map(m => `[Score: ${m.score.toFixed(4)}]\n${m.text}`).join('\n\n');
    updateSimilarityUI(BEST_SCORE);

    const bestMatchVector = RESULTS[BEST_INDEX].vector;
    if (bestMatchVector) {
      await VECTOR_SEARCH.renderEmbedding(bestMatchVector, BEST_MATCH_EMBEDDING_VIZ, BEST_MATCH_EMBEDDING_TEXT);
    }
  } else {
    RESULTS_TEXT.value = "No matches found above threshold.";
    SIMILARITY_CONTAINER.classList.add('hidden');
    BEST_MATCH_EMBEDDING_VIZ.innerHTML = '';
    BEST_MATCH_EMBEDDING_TEXT.innerText = '';
  }
}


function updateSimilarityUI(score) {
  SIMILARITY_CONTAINER.classList.remove('hidden');
  SIMILARITY_SCORE_EL.innerText = score.toFixed(4);

  const HUE = Math.max(0, Math.min(120, score * 120));
  const BACKGROUND_COLOUR = `hsla(${HUE}, 70%, 20%, 0.4)`;
  const BORDER_COLOUR = `hsla(${HUE}, 70%, 50%, 0.6)`;

  SIMILARITY_CONTAINER.style.backgroundColor = BACKGROUND_COLOUR;
  SIMILARITY_CONTAINER.style.borderColor = BORDER_COLOUR;

  let label = 'Low Similarity';
  if (score > 0.8) {
    label = 'Very High Similarity';
  } else if (score > 0.6) {
    label = 'High Similarity';
  } else if (score > 0.4) {
    label = 'Moderate Similarity';
  }

  SIMILARITY_LABEL_EL.innerText = label;
}


async function updateDbList() {
  if (!window.indexedDB.databases) {
    console.warn('indexedDB.databases() is not supported in this browser.');
    return;
  }

  try {
    const dbs = await window.indexedDB.databases();
    const currentSelection = DB_SELECT.value;

    DB_SELECT.innerHTML = '';
    const currentInputName = DB_NAME_INPUT.value.trim();
    let names = dbs.map(db => db.name).filter(name => name !== undefined);

    if (currentInputName && !names.includes(currentInputName)) {
      names.push(currentInputName);
    }

    names.sort();

    names.forEach(name => {
      const option = document.createElement('option');
      option.value = name;
      option.text = name;
      if (name === currentSelection || (currentSelection === '' && name === currentInputName)) {
        option.selected = true;
      }
      DB_SELECT.appendChild(option);
    });
  } catch (e) {
    console.error('Error fetching databases:', e);
  }
}


load();