fhueni commited on
Commit
3a94de3
·
1 Parent(s): 9b487db

feat: add imdb and ag news dataset to datasets

Browse files
README.md CHANGED
@@ -22,3 +22,10 @@ Furthermore, it investigates two different scheduling policies to send request t
22
  ## Getting Started
23
  To run the experiments, open `index.html` in a web browser. Ensure that you have an API key for the OpenRouter service to run the cloud inference.
24
  You can then download the models and run them in the browser by leveraging the transformers.js library.
 
 
 
 
 
 
 
 
22
  ## Getting Started
23
  To run the experiments, open `index.html` in a web browser. Ensure that you have an API key for the OpenRouter service to run the cloud inference.
24
  You can then download the models and run them in the browser by leveraging the transformers.js library.
25
+
26
+
27
+ ## Dataset preparation
28
+ To prepare the dataset for the experiments, we follow these steps:
29
+ - Download the dataset from kaggle or huggingface
30
+ - Add row indexes to each entry in the dataset for easy reference
31
+ - Save the prepared dataset in the `dataset/` directory
dataset/ag_news_test.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/preprocess_datasets.ipynb ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2025-12-25T10:54:53.050054Z",
10
+ "start_time": "2025-12-25T10:54:49.296474Z"
11
+ }
12
+ },
13
+ "source": "import pandas as pd",
14
+ "outputs": [],
15
+ "execution_count": 1
16
+ },
17
+ {
18
+ "metadata": {},
19
+ "cell_type": "markdown",
20
+ "source": "# Prepare the spam/ham email dataset",
21
+ "id": "fd1d0f3beb9aa893"
22
+ },
23
+ {
24
+ "metadata": {
25
+ "ExecuteTime": {
26
+ "end_time": "2025-12-21T16:20:15.450476Z",
27
+ "start_time": "2025-12-21T16:20:15.389394Z"
28
+ }
29
+ },
30
+ "cell_type": "code",
31
+ "source": "dataset = pd.read_csv('./emails.csv')",
32
+ "id": "f72a26d75f7a2588",
33
+ "outputs": [],
34
+ "execution_count": 11
35
+ },
36
+ {
37
+ "metadata": {
38
+ "ExecuteTime": {
39
+ "end_time": "2025-12-21T16:20:15.562018Z",
40
+ "start_time": "2025-12-21T16:20:15.550178Z"
41
+ }
42
+ },
43
+ "cell_type": "code",
44
+ "source": [
45
+ "# add column with row id\n",
46
+ "dataset['ID'] = range(1, len(dataset) + 1)"
47
+ ],
48
+ "id": "e6a938ba1a501431",
49
+ "outputs": [],
50
+ "execution_count": 12
51
+ },
52
+ {
53
+ "metadata": {
54
+ "ExecuteTime": {
55
+ "end_time": "2025-12-21T16:20:15.714419Z",
56
+ "start_time": "2025-12-21T16:20:15.710137Z"
57
+ }
58
+ },
59
+ "cell_type": "code",
60
+ "source": [
61
+ "# add the column to the first position\n",
62
+ "cols = dataset.columns.tolist()\n",
63
+ "cols = cols[-1:] + cols[:-1]\n",
64
+ "dataset = dataset[cols]"
65
+ ],
66
+ "id": "9d56f16fc000b2e2",
67
+ "outputs": [],
68
+ "execution_count": 13
69
+ },
70
+ {
71
+ "metadata": {
72
+ "ExecuteTime": {
73
+ "end_time": "2025-12-21T16:20:15.896626Z",
74
+ "start_time": "2025-12-21T16:20:15.883918Z"
75
+ }
76
+ },
77
+ "cell_type": "code",
78
+ "source": "dataset.head()",
79
+ "id": "c853a292fe6d4d01",
80
+ "outputs": [
81
+ {
82
+ "data": {
83
+ "text/plain": [
84
+ " ID Text Spam\n",
85
+ "0 1 Subject: naturally irresistible your corporate... 1\n",
86
+ "1 2 Subject: the stock trading gunslinger fanny i... 1\n",
87
+ "2 3 Subject: unbelievable new homes made easy im ... 1\n",
88
+ "3 4 Subject: 4 color printing special request add... 1\n",
89
+ "4 5 Subject: do not have money , get software cds ... 1"
90
+ ],
91
+ "text/html": [
92
+ "<div>\n",
93
+ "<style scoped>\n",
94
+ " .dataframe tbody tr th:only-of-type {\n",
95
+ " vertical-align: middle;\n",
96
+ " }\n",
97
+ "\n",
98
+ " .dataframe tbody tr th {\n",
99
+ " vertical-align: top;\n",
100
+ " }\n",
101
+ "\n",
102
+ " .dataframe thead th {\n",
103
+ " text-align: right;\n",
104
+ " }\n",
105
+ "</style>\n",
106
+ "<table border=\"1\" class=\"dataframe\">\n",
107
+ " <thead>\n",
108
+ " <tr style=\"text-align: right;\">\n",
109
+ " <th></th>\n",
110
+ " <th>ID</th>\n",
111
+ " <th>Text</th>\n",
112
+ " <th>Spam</th>\n",
113
+ " </tr>\n",
114
+ " </thead>\n",
115
+ " <tbody>\n",
116
+ " <tr>\n",
117
+ " <th>0</th>\n",
118
+ " <td>1</td>\n",
119
+ " <td>Subject: naturally irresistible your corporate...</td>\n",
120
+ " <td>1</td>\n",
121
+ " </tr>\n",
122
+ " <tr>\n",
123
+ " <th>1</th>\n",
124
+ " <td>2</td>\n",
125
+ " <td>Subject: the stock trading gunslinger fanny i...</td>\n",
126
+ " <td>1</td>\n",
127
+ " </tr>\n",
128
+ " <tr>\n",
129
+ " <th>2</th>\n",
130
+ " <td>3</td>\n",
131
+ " <td>Subject: unbelievable new homes made easy im ...</td>\n",
132
+ " <td>1</td>\n",
133
+ " </tr>\n",
134
+ " <tr>\n",
135
+ " <th>3</th>\n",
136
+ " <td>4</td>\n",
137
+ " <td>Subject: 4 color printing special request add...</td>\n",
138
+ " <td>1</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>4</th>\n",
142
+ " <td>5</td>\n",
143
+ " <td>Subject: do not have money , get software cds ...</td>\n",
144
+ " <td>1</td>\n",
145
+ " </tr>\n",
146
+ " </tbody>\n",
147
+ "</table>\n",
148
+ "</div>"
149
+ ]
150
+ },
151
+ "execution_count": 14,
152
+ "metadata": {},
153
+ "output_type": "execute_result"
154
+ }
155
+ ],
156
+ "execution_count": 14
157
+ },
158
+ {
159
+ "metadata": {
160
+ "ExecuteTime": {
161
+ "end_time": "2025-12-21T16:20:33.041536Z",
162
+ "start_time": "2025-12-21T16:20:32.900818Z"
163
+ }
164
+ },
165
+ "cell_type": "code",
166
+ "source": [
167
+ "# store csv in dataset folder\n",
168
+ "dataset.to_csv('./spam_ham_dataset.csv', index=False)"
169
+ ],
170
+ "id": "2bc331812feedf10",
171
+ "outputs": [],
172
+ "execution_count": 15
173
+ },
174
+ {
175
+ "metadata": {},
176
+ "cell_type": "markdown",
177
+ "source": "# Prepare the AG news dataset",
178
+ "id": "ec3f208efcc1eeda"
179
+ },
180
+ {
181
+ "metadata": {
182
+ "ExecuteTime": {
183
+ "end_time": "2025-12-25T10:56:01.120606Z",
184
+ "start_time": "2025-12-25T10:56:00.990306Z"
185
+ }
186
+ },
187
+ "cell_type": "code",
188
+ "source": "dataset = pd.read_csv('./ag_news_test.csv')",
189
+ "id": "6396678647ac4f8",
190
+ "outputs": [],
191
+ "execution_count": 2
192
+ },
193
+ {
194
+ "metadata": {
195
+ "ExecuteTime": {
196
+ "end_time": "2025-12-25T10:56:05.735124Z",
197
+ "start_time": "2025-12-25T10:56:05.722137Z"
198
+ }
199
+ },
200
+ "cell_type": "code",
201
+ "source": [
202
+ "# add column with row id\n",
203
+ "dataset['ID'] = range(1, len(dataset) + 1)"
204
+ ],
205
+ "id": "445a51aa7a9d6de9",
206
+ "outputs": [],
207
+ "execution_count": 3
208
+ },
209
+ {
210
+ "metadata": {
211
+ "ExecuteTime": {
212
+ "end_time": "2025-12-25T10:56:10.368440Z",
213
+ "start_time": "2025-12-25T10:56:10.320560Z"
214
+ }
215
+ },
216
+ "cell_type": "code",
217
+ "source": [
218
+ "# add the column to the first position\n",
219
+ "cols = dataset.columns.tolist()\n",
220
+ "cols = cols[-1:] + cols[:-1]\n",
221
+ "dataset = dataset[cols]"
222
+ ],
223
+ "id": "a945120ba657ceaa",
224
+ "outputs": [],
225
+ "execution_count": 4
226
+ },
227
+ {
228
+ "metadata": {
229
+ "ExecuteTime": {
230
+ "end_time": "2025-12-25T10:56:12.070628Z",
231
+ "start_time": "2025-12-25T10:56:12.039268Z"
232
+ }
233
+ },
234
+ "cell_type": "code",
235
+ "source": "dataset.head()",
236
+ "id": "3c95d325ace568ef",
237
+ "outputs": [
238
+ {
239
+ "data": {
240
+ "text/plain": [
241
+ " ID Class Index Title \\\n",
242
+ "0 1 3 Fears for T N pension after talks \n",
243
+ "1 2 4 The Race is On: Second Private Team Sets Launc... \n",
244
+ "2 3 4 Ky. Company Wins Grant to Study Peptides (AP) \n",
245
+ "3 4 4 Prediction Unit Helps Forecast Wildfires (AP) \n",
246
+ "4 5 4 Calif. Aims to Limit Farm-Related Smog (AP) \n",
247
+ "\n",
248
+ " Description \n",
249
+ "0 Unions representing workers at Turner Newall... \n",
250
+ "1 SPACE.com - TORONTO, Canada -- A second\\team o... \n",
251
+ "2 AP - A company founded by a chemistry research... \n",
252
+ "3 AP - It's barely dawn when Mike Fitzpatrick st... \n",
253
+ "4 AP - Southern California's smog-fighting agenc... "
254
+ ],
255
+ "text/html": [
256
+ "<div>\n",
257
+ "<style scoped>\n",
258
+ " .dataframe tbody tr th:only-of-type {\n",
259
+ " vertical-align: middle;\n",
260
+ " }\n",
261
+ "\n",
262
+ " .dataframe tbody tr th {\n",
263
+ " vertical-align: top;\n",
264
+ " }\n",
265
+ "\n",
266
+ " .dataframe thead th {\n",
267
+ " text-align: right;\n",
268
+ " }\n",
269
+ "</style>\n",
270
+ "<table border=\"1\" class=\"dataframe\">\n",
271
+ " <thead>\n",
272
+ " <tr style=\"text-align: right;\">\n",
273
+ " <th></th>\n",
274
+ " <th>ID</th>\n",
275
+ " <th>Class Index</th>\n",
276
+ " <th>Title</th>\n",
277
+ " <th>Description</th>\n",
278
+ " </tr>\n",
279
+ " </thead>\n",
280
+ " <tbody>\n",
281
+ " <tr>\n",
282
+ " <th>0</th>\n",
283
+ " <td>1</td>\n",
284
+ " <td>3</td>\n",
285
+ " <td>Fears for T N pension after talks</td>\n",
286
+ " <td>Unions representing workers at Turner Newall...</td>\n",
287
+ " </tr>\n",
288
+ " <tr>\n",
289
+ " <th>1</th>\n",
290
+ " <td>2</td>\n",
291
+ " <td>4</td>\n",
292
+ " <td>The Race is On: Second Private Team Sets Launc...</td>\n",
293
+ " <td>SPACE.com - TORONTO, Canada -- A second\\team o...</td>\n",
294
+ " </tr>\n",
295
+ " <tr>\n",
296
+ " <th>2</th>\n",
297
+ " <td>3</td>\n",
298
+ " <td>4</td>\n",
299
+ " <td>Ky. Company Wins Grant to Study Peptides (AP)</td>\n",
300
+ " <td>AP - A company founded by a chemistry research...</td>\n",
301
+ " </tr>\n",
302
+ " <tr>\n",
303
+ " <th>3</th>\n",
304
+ " <td>4</td>\n",
305
+ " <td>4</td>\n",
306
+ " <td>Prediction Unit Helps Forecast Wildfires (AP)</td>\n",
307
+ " <td>AP - It's barely dawn when Mike Fitzpatrick st...</td>\n",
308
+ " </tr>\n",
309
+ " <tr>\n",
310
+ " <th>4</th>\n",
311
+ " <td>5</td>\n",
312
+ " <td>4</td>\n",
313
+ " <td>Calif. Aims to Limit Farm-Related Smog (AP)</td>\n",
314
+ " <td>AP - Southern California's smog-fighting agenc...</td>\n",
315
+ " </tr>\n",
316
+ " </tbody>\n",
317
+ "</table>\n",
318
+ "</div>"
319
+ ]
320
+ },
321
+ "execution_count": 5,
322
+ "metadata": {},
323
+ "output_type": "execute_result"
324
+ }
325
+ ],
326
+ "execution_count": 5
327
+ },
328
+ {
329
+ "metadata": {
330
+ "ExecuteTime": {
331
+ "end_time": "2025-12-25T10:56:34.202882Z",
332
+ "start_time": "2025-12-25T10:56:34.156909Z"
333
+ }
334
+ },
335
+ "cell_type": "code",
336
+ "source": [
337
+ "# store csv in dataset folder\n",
338
+ "dataset.to_csv('./ag_news_test.csv', index=False)"
339
+ ],
340
+ "id": "d62c782d061d0427",
341
+ "outputs": [],
342
+ "execution_count": 6
343
+ },
344
+ {
345
+ "metadata": {},
346
+ "cell_type": "markdown",
347
+ "source": "# Prepare the IMDB reviews dataset",
348
+ "id": "d6d26cc073ed9c20"
349
+ },
350
+ {
351
+ "metadata": {
352
+ "ExecuteTime": {
353
+ "end_time": "2025-12-25T10:57:20.086793Z",
354
+ "start_time": "2025-12-25T10:57:19.626919Z"
355
+ }
356
+ },
357
+ "cell_type": "code",
358
+ "source": "dataset = pd.read_csv('./imdb_dataset.csv')",
359
+ "id": "d428b1226c135eeb",
360
+ "outputs": [],
361
+ "execution_count": 7
362
+ },
363
+ {
364
+ "metadata": {
365
+ "ExecuteTime": {
366
+ "end_time": "2025-12-25T10:57:21.530556Z",
367
+ "start_time": "2025-12-25T10:57:21.516704Z"
368
+ }
369
+ },
370
+ "cell_type": "code",
371
+ "source": [
372
+ "# add column with row id\n",
373
+ "dataset['ID'] = range(1, len(dataset) + 1)"
374
+ ],
375
+ "id": "fcd59bb4a4faaee9",
376
+ "outputs": [],
377
+ "execution_count": 8
378
+ },
379
+ {
380
+ "metadata": {
381
+ "ExecuteTime": {
382
+ "end_time": "2025-12-25T10:57:23.603144Z",
383
+ "start_time": "2025-12-25T10:57:23.588206Z"
384
+ }
385
+ },
386
+ "cell_type": "code",
387
+ "source": [
388
+ "# add the column to the first position\n",
389
+ "cols = dataset.columns.tolist()\n",
390
+ "cols = cols[-1:] + cols[:-1]\n",
391
+ "dataset = dataset[cols]"
392
+ ],
393
+ "id": "4f484cca1f2663f3",
394
+ "outputs": [],
395
+ "execution_count": 9
396
+ },
397
+ {
398
+ "metadata": {
399
+ "ExecuteTime": {
400
+ "end_time": "2025-12-25T10:57:24.466785Z",
401
+ "start_time": "2025-12-25T10:57:24.448227Z"
402
+ }
403
+ },
404
+ "cell_type": "code",
405
+ "source": "dataset.head()",
406
+ "id": "c8a8d1415d414af3",
407
+ "outputs": [
408
+ {
409
+ "data": {
410
+ "text/plain": [
411
+ " ID review sentiment\n",
412
+ "0 1 One of the other reviewers has mentioned that ... positive\n",
413
+ "1 2 A wonderful little production. <br /><br />The... positive\n",
414
+ "2 3 I thought this was a wonderful way to spend ti... positive\n",
415
+ "3 4 Basically there's a family where a little boy ... negative\n",
416
+ "4 5 Petter Mattei's \"Love in the Time of Money\" is... positive"
417
+ ],
418
+ "text/html": [
419
+ "<div>\n",
420
+ "<style scoped>\n",
421
+ " .dataframe tbody tr th:only-of-type {\n",
422
+ " vertical-align: middle;\n",
423
+ " }\n",
424
+ "\n",
425
+ " .dataframe tbody tr th {\n",
426
+ " vertical-align: top;\n",
427
+ " }\n",
428
+ "\n",
429
+ " .dataframe thead th {\n",
430
+ " text-align: right;\n",
431
+ " }\n",
432
+ "</style>\n",
433
+ "<table border=\"1\" class=\"dataframe\">\n",
434
+ " <thead>\n",
435
+ " <tr style=\"text-align: right;\">\n",
436
+ " <th></th>\n",
437
+ " <th>ID</th>\n",
438
+ " <th>review</th>\n",
439
+ " <th>sentiment</th>\n",
440
+ " </tr>\n",
441
+ " </thead>\n",
442
+ " <tbody>\n",
443
+ " <tr>\n",
444
+ " <th>0</th>\n",
445
+ " <td>1</td>\n",
446
+ " <td>One of the other reviewers has mentioned that ...</td>\n",
447
+ " <td>positive</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <th>1</th>\n",
451
+ " <td>2</td>\n",
452
+ " <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n",
453
+ " <td>positive</td>\n",
454
+ " </tr>\n",
455
+ " <tr>\n",
456
+ " <th>2</th>\n",
457
+ " <td>3</td>\n",
458
+ " <td>I thought this was a wonderful way to spend ti...</td>\n",
459
+ " <td>positive</td>\n",
460
+ " </tr>\n",
461
+ " <tr>\n",
462
+ " <th>3</th>\n",
463
+ " <td>4</td>\n",
464
+ " <td>Basically there's a family where a little boy ...</td>\n",
465
+ " <td>negative</td>\n",
466
+ " </tr>\n",
467
+ " <tr>\n",
468
+ " <th>4</th>\n",
469
+ " <td>5</td>\n",
470
+ " <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n",
471
+ " <td>positive</td>\n",
472
+ " </tr>\n",
473
+ " </tbody>\n",
474
+ "</table>\n",
475
+ "</div>"
476
+ ]
477
+ },
478
+ "execution_count": 10,
479
+ "metadata": {},
480
+ "output_type": "execute_result"
481
+ }
482
+ ],
483
+ "execution_count": 10
484
+ },
485
+ {
486
+ "metadata": {
487
+ "ExecuteTime": {
488
+ "end_time": "2025-12-25T10:57:38.303913Z",
489
+ "start_time": "2025-12-25T10:57:37.271989Z"
490
+ }
491
+ },
492
+ "cell_type": "code",
493
+ "source": [
494
+ "# store csv in dataset folder\n",
495
+ "dataset.to_csv('./imdb_dataset.csv', index=False)"
496
+ ],
497
+ "id": "df642952f75ee514",
498
+ "outputs": [],
499
+ "execution_count": 11
500
+ }
501
+ ],
502
+ "metadata": {
503
+ "kernelspec": {
504
+ "display_name": "Python 3",
505
+ "language": "python",
506
+ "name": "python3"
507
+ },
508
+ "language_info": {
509
+ "codemirror_mode": {
510
+ "name": "ipython",
511
+ "version": 2
512
+ },
513
+ "file_extension": ".py",
514
+ "mimetype": "text/x-python",
515
+ "name": "python",
516
+ "nbconvert_exporter": "python",
517
+ "pygments_lexer": "ipython2",
518
+ "version": "2.7.6"
519
+ }
520
+ },
521
+ "nbformat": 4,
522
+ "nbformat_minor": 5
523
+ }
index.html CHANGED
@@ -125,6 +125,8 @@
125
  class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
126
  <option value="boolq_validation">BoolQ</option>
127
  <option value="spam_ham_dataset">Spam</option>
 
 
128
  </select>
129
  </label>
130
 
 
125
  class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
126
  <option value="boolq_validation">BoolQ</option>
127
  <option value="spam_ham_dataset">Spam</option>
128
+ <option value="imdb_dataset">IMDB</option>
129
+ <option value="ag_news_test">AG News</option>
130
  </select>
131
  </label>
132
 
src/datasetLoader.js CHANGED
@@ -36,29 +36,24 @@ export class DatasetLoader {
36
  this._dataset = lines
37
  .filter(l => l.trim().length > 0)
38
  .map(line => {
39
- let id, answer, full_prompt, question, context, text;
40
 
41
- // load different datasets based on name
42
- if (name === 'boolq_validation') {
43
- // parse line into fields handling quoted commas
44
- [id, question, answer, context] = this._parseCSVLine(line);
45
-
46
- // set the prompt
47
- full_prompt = `Question: ${question}
48
- Context: ${context}
49
- Instructions: Answer with ONLY the word "true" or "false". Do not provide any explanation or additional text.
50
- Answer:`;
51
- } else if (name === 'spam_ham_dataset') {
52
- [id, text, answer] = this._parseCSVLine(line);
53
-
54
- // convert answer to string boolean
55
- answer = (answer.toLowerCase() === 'spam') ? 'true' : 'false';
56
-
57
- // set the prompt
58
- full_prompt = `Task: Determine whether the following message is spam or not.
59
- Instructions: Answer with ONLY the word "true" or "false". Do not provide any explanation or additional text.
60
- Message: ${text}
61
- Answer:`;
62
  }
63
 
64
  return {id: id, prompt: full_prompt, groundTruth: answer};
@@ -73,6 +68,94 @@ export class DatasetLoader {
73
  });
74
  }
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  /**
77
  * Parse a single CSV line into fields, handling quoted fields with commas
78
  *
 
36
  this._dataset = lines
37
  .filter(l => l.trim().length > 0)
38
  .map(line => {
39
+ let id, answer, full_prompt;
40
 
41
+ // load different datasets based on the dataset name
42
+ switch (name) {
43
+ case 'boolq_validation':
44
+ ({id, full_prompt, answer} = this._loadBoolQLine(line));
45
+ break;
46
+ case 'spam_ham_dataset':
47
+ ({id, full_prompt, answer} = this._loadSpamHamLine(line));
48
+ break;
49
+ case 'imdb_dataset':
50
+ ({id, full_prompt, answer} = this._loadIMDBLine(line));
51
+ break;
52
+ case 'ag_news_test':
53
+ ({id, full_prompt, answer} = this._loadAGNewsLine(line));
54
+ break;
55
+ default:
56
+ throw new Error(`DatasetLoader: Unsupported dataset name '${name}'`);
 
 
 
 
 
57
  }
58
 
59
  return {id: id, prompt: full_prompt, groundTruth: answer};
 
68
  });
69
  }
70
 
71
+
72
+ /**
73
+ * Load a single line from the BoolQ dataset and prepare the prompt
74
+ *
75
+ * @param line - A single line from the BoolQ CSV dataset
76
+ * @returns {{full_prompt: string, answer: *, id: *}}
77
+ * @private
78
+ */
79
+ _loadBoolQLine(line) {
80
+ // parse line into fields handling quoted commas
81
+ const [id, question, answer, context] = this._parseCSVLine(line);
82
+
83
+ // set the prompt
84
+ const full_prompt = `Question: ${question}
85
+ Context: ${context}
86
+ Instructions: Answer with ONLY the word "true" or "false". Do not provide any explanation or additional text.
87
+ Answer:`;
88
+
89
+ return {id, full_prompt, answer}
90
+ }
91
+
92
+
93
+ /**
94
+ * Load a single line from the SpamHam dataset and prepare the prompt
95
+ *
96
+ * @param line - A single line from the SpamHam CSV dataset
97
+ * @returns {{full_prompt: string, answer: (string), id: *}}
98
+ * @private
99
+ */
100
+ _loadSpamHamLine(line) {
101
+ let [id, text, answer] = this._parseCSVLine(line);
102
+
103
+ // convert answer to string boolean
104
+ answer = (answer.toLowerCase() === 'spam') ? 'true' : 'false';
105
+
106
+ // set the prompt
107
+ const full_prompt = `Task: Determine whether the following message is spam or not.
108
+ Instructions: Answer with ONLY the word "true" or "false". Do not provide any explanation or additional text.
109
+ Message: ${text}
110
+ Answer:`;
111
+
112
+ return {id, full_prompt, answer}
113
+ }
114
+
115
+
116
+ /**
117
+ * Load a single line from the IMDB dataset and prepare the prompt
118
+ *
119
+ * @param line - A single line from the IMDB CSV dataset
120
+ * @returns {{full_prompt: string, answer: *, id: *}}
121
+ * @private
122
+ */
123
+ _loadIMDBLine(line) {
124
+ let [id, review, answer] = this._parseCSVLine(line);
125
+
126
+ // set the prompt
127
+ const full_prompt = `Task: Determine whether the sentiment of the following review is positive or negative.
128
+ Instructions: Answer with ONLY the word "positive" or "negative". Do not provide any explanation or additional text.
129
+ Review: ${review}
130
+ Sentiment:`;
131
+
132
+ return {id, full_prompt, answer}
133
+ }
134
+
135
+
136
+ /**
137
+ * Load a single line from the AG News dataset and prepare the prompt
138
+ *
139
+ * @param line - A single line from the AG News CSV dataset
140
+ * @returns {{full_prompt: string, answer: *, id: *}}
141
+ * @private
142
+ */
143
+ _loadAGNewsLine(line) {
144
+ let [id, answer, title, description] = this._parseCSVLine(line);
145
+
146
+ // set the prompt
147
+ const full_prompt = `Task: Determine whether the following news article belong to world, sports, business or Sci/Tech category.
148
+ Categories: World (1), Sports (2), Business (3), Sci/Tech (4).
149
+ Instructions: Answer with ONLY the id (1,2,3 or 4) of the class. Do not provide any explanation or additional text.
150
+ News Title: ${title}
151
+ News Description: ${description}
152
+ `;
153
+
154
+ return {id, full_prompt, answer}
155
+ }
156
+
157
+
158
+
159
  /**
160
  * Parse a single CSV line into fields, handling quoted fields with commas
161
  *
src/utils.js CHANGED
@@ -33,7 +33,7 @@ export function logTo(el, evt) {
33
  <td>${evt.totalLatency?.toFixed(2) || evt.latency?.toFixed(2) || 0}ms</td>
34
  <td>${evt.queueingTime?.toFixed(2) || 0}ms</td>
35
  <td>${evt.inferenceTime?.toFixed(2) || evt.latency?.toFixed(2) || 0}ms</td>
36
- <td title="${evt.job.prompt}">${evt.job.prompt.substring(0, 30)}...</td>
37
  <td title="${evt.response || ''}">${(evt.response || '').substring(0, 30)}</td>
38
  <td>${evt.evalRes.exactMatch}</td>
39
  `;
@@ -41,6 +41,24 @@ export function logTo(el, evt) {
41
  el.scrollTop = el.scrollHeight;
42
  }
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  /**
46
  * Approximates the number of words in a given text string
 
33
  <td>${evt.totalLatency?.toFixed(2) || evt.latency?.toFixed(2) || 0}ms</td>
34
  <td>${evt.queueingTime?.toFixed(2) || 0}ms</td>
35
  <td>${evt.inferenceTime?.toFixed(2) || evt.latency?.toFixed(2) || 0}ms</td>
36
+ <td title="${escapeHtml(evt.job.prompt)}">${escapeHtml(evt.job.prompt.substring(0, 30))}...</td>
37
  <td title="${evt.response || ''}">${(evt.response || '').substring(0, 30)}</td>
38
  <td>${evt.evalRes.exactMatch}</td>
39
  `;
 
41
  el.scrollTop = el.scrollHeight;
42
  }
43
 
44
+ /**
45
+ * Escapes HTML special characters in a string to prevent HTML injection
46
+ *
47
+ * @param str - Input string
48
+ * @returns {string} - Escaped string
49
+ */
50
+ function escapeHtml(str) {
51
+ return str.replace(/[&<>"']/g, (char) => {
52
+ const escapeMap = {
53
+ '&': '&amp;',
54
+ '<': '&lt;',
55
+ '>': '&gt;',
56
+ '"': '&quot;',
57
+ "'": '&#039;',
58
+ };
59
+ return escapeMap[char];
60
+ });
61
+ }
62
 
63
  /**
64
  * Approximates the number of words in a given text string