Added fugashi sample code.
Browse files- fine-tune-whisper-streaming.ipynb +303 -10
fine-tune-whisper-streaming.ipynb
CHANGED
|
@@ -306,7 +306,7 @@
|
|
| 306 |
},
|
| 307 |
{
|
| 308 |
"cell_type": "code",
|
| 309 |
-
"execution_count":
|
| 310 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
| 311 |
"metadata": {},
|
| 312 |
"outputs": [],
|
|
@@ -328,7 +328,8 @@
|
|
| 328 |
" transcription = normalizer(transcription).strip()\n",
|
| 329 |
" \n",
|
| 330 |
" # encode target text to label ids\n",
|
| 331 |
-
"
|
|
|
|
| 332 |
" return batch"
|
| 333 |
]
|
| 334 |
},
|
|
@@ -342,7 +343,7 @@
|
|
| 342 |
},
|
| 343 |
{
|
| 344 |
"cell_type": "code",
|
| 345 |
-
"execution_count":
|
| 346 |
"id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
|
| 347 |
"metadata": {},
|
| 348 |
"outputs": [],
|
|
@@ -360,7 +361,7 @@
|
|
| 360 |
},
|
| 361 |
{
|
| 362 |
"cell_type": "code",
|
| 363 |
-
"execution_count":
|
| 364 |
"id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
|
| 365 |
"metadata": {},
|
| 366 |
"outputs": [],
|
|
@@ -381,7 +382,7 @@
|
|
| 381 |
},
|
| 382 |
{
|
| 383 |
"cell_type": "code",
|
| 384 |
-
"execution_count":
|
| 385 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
| 386 |
"metadata": {},
|
| 387 |
"outputs": [],
|
|
@@ -402,7 +403,7 @@
|
|
| 402 |
},
|
| 403 |
{
|
| 404 |
"cell_type": "code",
|
| 405 |
-
"execution_count":
|
| 406 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
| 407 |
"metadata": {},
|
| 408 |
"outputs": [],
|
|
@@ -413,14 +414,252 @@
|
|
| 413 |
")"
|
| 414 |
]
|
| 415 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
{
|
| 417 |
"cell_type": "code",
|
| 418 |
"execution_count": null,
|
| 419 |
-
"id": "
|
| 420 |
"metadata": {},
|
| 421 |
"outputs": [],
|
| 422 |
"source": [
|
| 423 |
-
"
|
| 424 |
]
|
| 425 |
},
|
| 426 |
{
|
|
@@ -895,7 +1134,7 @@
|
|
| 895 |
"execution_count": 26,
|
| 896 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
| 897 |
"metadata": {
|
| 898 |
-
"scrolled":
|
| 899 |
},
|
| 900 |
"outputs": [
|
| 901 |
{
|
|
@@ -1139,7 +1378,7 @@
|
|
| 1139 |
},
|
| 1140 |
{
|
| 1141 |
"cell_type": "code",
|
| 1142 |
-
"execution_count":
|
| 1143 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
| 1144 |
"metadata": {},
|
| 1145 |
"outputs": [
|
|
@@ -1155,6 +1394,60 @@
|
|
| 1155 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 1156 |
"added tokens file saved in ./added_tokens.json\n"
|
| 1157 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1158 |
}
|
| 1159 |
],
|
| 1160 |
"source": [
|
|
|
|
| 306 |
},
|
| 307 |
{
|
| 308 |
"cell_type": "code",
|
| 309 |
+
"execution_count": 44,
|
| 310 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
| 311 |
"metadata": {},
|
| 312 |
"outputs": [],
|
|
|
|
| 328 |
" transcription = normalizer(transcription).strip()\n",
|
| 329 |
" \n",
|
| 330 |
" # encode target text to label ids\n",
|
| 331 |
+
"# batch[\"labels\"] = processor.tokenizer(transcription).input_ids\n",
|
| 332 |
+
" batch['labels'] = transcription\n",
|
| 333 |
" return batch"
|
| 334 |
]
|
| 335 |
},
|
|
|
|
| 343 |
},
|
| 344 |
{
|
| 345 |
"cell_type": "code",
|
| 346 |
+
"execution_count": 45,
|
| 347 |
"id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
|
| 348 |
"metadata": {},
|
| 349 |
"outputs": [],
|
|
|
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"cell_type": "code",
|
| 364 |
+
"execution_count": 46,
|
| 365 |
"id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
|
| 366 |
"metadata": {},
|
| 367 |
"outputs": [],
|
|
|
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"cell_type": "code",
|
| 385 |
+
"execution_count": 47,
|
| 386 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
| 387 |
"metadata": {},
|
| 388 |
"outputs": [],
|
|
|
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"cell_type": "code",
|
| 406 |
+
"execution_count": 48,
|
| 407 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
| 408 |
"metadata": {},
|
| 409 |
"outputs": [],
|
|
|
|
| 414 |
")"
|
| 415 |
]
|
| 416 |
},
|
| 417 |
+
{
|
| 418 |
+
"cell_type": "code",
|
| 419 |
+
"execution_count": 49,
|
| 420 |
+
"id": "bede1184",
|
| 421 |
+
"metadata": {},
|
| 422 |
+
"outputs": [
|
| 423 |
+
{
|
| 424 |
+
"name": "stderr",
|
| 425 |
+
"output_type": "stream",
|
| 426 |
+
"text": [
|
| 427 |
+
"Reading metadata...: 6505it [00:00, 35406.66it/s]\n",
|
| 428 |
+
"Reading metadata...: 4485it [00:00, 19930.24it/s]\n"
|
| 429 |
+
]
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"data": {
|
| 433 |
+
"text/plain": [
|
| 434 |
+
"'多から一へというのは、世界を因果的に決定論的に考えることである、過去から考えることである、機械的に考えることである。'"
|
| 435 |
+
]
|
| 436 |
+
},
|
| 437 |
+
"execution_count": 49,
|
| 438 |
+
"metadata": {},
|
| 439 |
+
"output_type": "execute_result"
|
| 440 |
+
}
|
| 441 |
+
],
|
| 442 |
+
"source": [
|
| 443 |
+
"xb = next(iter(vectorized_datasets['train']))\n",
|
| 444 |
+
"xb['labels']"
|
| 445 |
+
]
|
| 446 |
+
},
|
| 447 |
+
{
|
| 448 |
+
"cell_type": "code",
|
| 449 |
+
"execution_count": 59,
|
| 450 |
+
"id": "ac1e8d5b",
|
| 451 |
+
"metadata": {},
|
| 452 |
+
"outputs": [
|
| 453 |
+
{
|
| 454 |
+
"name": "stdout",
|
| 455 |
+
"output_type": "stream",
|
| 456 |
+
"text": [
|
| 457 |
+
"<|startoftranscript|>\n",
|
| 458 |
+
"<|ja|>\n",
|
| 459 |
+
"<|transcribe|>\n",
|
| 460 |
+
"<|notimestamps|>\n",
|
| 461 |
+
"多\n",
|
| 462 |
+
"から\n",
|
| 463 |
+
"一\n",
|
| 464 |
+
"へ\n",
|
| 465 |
+
"という\n",
|
| 466 |
+
"のは\n",
|
| 467 |
+
"、\n",
|
| 468 |
+
"世界\n",
|
| 469 |
+
"を\n",
|
| 470 |
+
"因\n",
|
| 471 |
+
"果\n",
|
| 472 |
+
"的\n",
|
| 473 |
+
"に\n",
|
| 474 |
+
"決\n",
|
| 475 |
+
"定\n",
|
| 476 |
+
"論\n",
|
| 477 |
+
"的\n",
|
| 478 |
+
"に\n",
|
| 479 |
+
"考\n",
|
| 480 |
+
"える\n",
|
| 481 |
+
"こと\n",
|
| 482 |
+
"で\n",
|
| 483 |
+
"ある\n",
|
| 484 |
+
"、\n",
|
| 485 |
+
"過去\n",
|
| 486 |
+
"から\n",
|
| 487 |
+
"考\n",
|
| 488 |
+
"える\n",
|
| 489 |
+
"こと\n",
|
| 490 |
+
"で\n",
|
| 491 |
+
"ある\n",
|
| 492 |
+
"、\n",
|
| 493 |
+
"機\n",
|
| 494 |
+
"�\n",
|
| 495 |
+
"�\n",
|
| 496 |
+
"的\n",
|
| 497 |
+
"に\n",
|
| 498 |
+
"考\n",
|
| 499 |
+
"える\n",
|
| 500 |
+
"こと\n",
|
| 501 |
+
"で\n",
|
| 502 |
+
"ある\n",
|
| 503 |
+
"。\n",
|
| 504 |
+
"<|endoftext|>\n"
|
| 505 |
+
]
|
| 506 |
+
}
|
| 507 |
+
],
|
| 508 |
+
"source": [
|
| 509 |
+
"idxs = processor.tokenizer(xb['labels']).input_ids\n",
|
| 510 |
+
"for idx in idxs:\n",
|
| 511 |
+
" print(processor.tokenizer.decode(idx))"
|
| 512 |
+
]
|
| 513 |
+
},
|
| 514 |
+
{
|
| 515 |
+
"cell_type": "code",
|
| 516 |
+
"execution_count": 60,
|
| 517 |
+
"id": "d33cefc4",
|
| 518 |
+
"metadata": {},
|
| 519 |
+
"outputs": [
|
| 520 |
+
{
|
| 521 |
+
"data": {
|
| 522 |
+
"text/plain": [
|
| 523 |
+
"[多から,\n",
|
| 524 |
+
" 一,\n",
|
| 525 |
+
" へ,\n",
|
| 526 |
+
" と,\n",
|
| 527 |
+
" いう,\n",
|
| 528 |
+
" の,\n",
|
| 529 |
+
" は,\n",
|
| 530 |
+
" 、,\n",
|
| 531 |
+
" 世界,\n",
|
| 532 |
+
" を,\n",
|
| 533 |
+
" 因果,\n",
|
| 534 |
+
" 的,\n",
|
| 535 |
+
" に,\n",
|
| 536 |
+
" 決定,\n",
|
| 537 |
+
" 論,\n",
|
| 538 |
+
" 的,\n",
|
| 539 |
+
" に,\n",
|
| 540 |
+
" 考える,\n",
|
| 541 |
+
" こと,\n",
|
| 542 |
+
" で,\n",
|
| 543 |
+
" ある,\n",
|
| 544 |
+
" 、,\n",
|
| 545 |
+
" 過去,\n",
|
| 546 |
+
" から,\n",
|
| 547 |
+
" 考える,\n",
|
| 548 |
+
" こと,\n",
|
| 549 |
+
" で,\n",
|
| 550 |
+
" ある,\n",
|
| 551 |
+
" 、,\n",
|
| 552 |
+
" 機械,\n",
|
| 553 |
+
" 的,\n",
|
| 554 |
+
" に,\n",
|
| 555 |
+
" 考える,\n",
|
| 556 |
+
" こと,\n",
|
| 557 |
+
" で,\n",
|
| 558 |
+
" ある,\n",
|
| 559 |
+
" 。]"
|
| 560 |
+
]
|
| 561 |
+
},
|
| 562 |
+
"execution_count": 60,
|
| 563 |
+
"metadata": {},
|
| 564 |
+
"output_type": "execute_result"
|
| 565 |
+
}
|
| 566 |
+
],
|
| 567 |
+
"source": [
|
| 568 |
+
"tagger(xb['labels'])"
|
| 569 |
+
]
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"cell_type": "code",
|
| 573 |
+
"execution_count": 55,
|
| 574 |
+
"id": "2cbb82ef",
|
| 575 |
+
"metadata": {},
|
| 576 |
+
"outputs": [
|
| 577 |
+
{
|
| 578 |
+
"name": "stdout",
|
| 579 |
+
"output_type": "stream",
|
| 580 |
+
"text": [
|
| 581 |
+
"Help on method decode in module transformers.tokenization_utils_base:\n",
|
| 582 |
+
"\n",
|
| 583 |
+
"decode(token_ids: Union[int, List[int], ForwardRef('np.ndarray'), ForwardRef('torch.Tensor'), ForwardRef('tf.Tensor')], skip_special_tokens: bool = False, clean_up_tokenization_spaces: bool = True, **kwargs) -> str method of transformers.models.whisper.tokenization_whisper.WhisperTokenizer instance\n",
|
| 584 |
+
" Converts a sequence of ids in a string, using the tokenizer and vocabulary with options to remove special\n",
|
| 585 |
+
" tokens and clean up tokenization spaces.\n",
|
| 586 |
+
" \n",
|
| 587 |
+
" Similar to doing `self.convert_tokens_to_string(self.convert_ids_to_tokens(token_ids))`.\n",
|
| 588 |
+
" \n",
|
| 589 |
+
" Args:\n",
|
| 590 |
+
" token_ids (`Union[int, List[int], np.ndarray, torch.Tensor, tf.Tensor]`):\n",
|
| 591 |
+
" List of tokenized input ids. Can be obtained using the `__call__` method.\n",
|
| 592 |
+
" skip_special_tokens (`bool`, *optional*, defaults to `False`):\n",
|
| 593 |
+
" Whether or not to remove special tokens in the decoding.\n",
|
| 594 |
+
" clean_up_tokenization_spaces (`bool`, *optional*, defaults to `True`):\n",
|
| 595 |
+
" Whether or not to clean up the tokenization spaces.\n",
|
| 596 |
+
" kwargs (additional keyword arguments, *optional*):\n",
|
| 597 |
+
" Will be passed to the underlying model specific decode method.\n",
|
| 598 |
+
" \n",
|
| 599 |
+
" Returns:\n",
|
| 600 |
+
" `str`: The decoded sentence.\n",
|
| 601 |
+
"\n"
|
| 602 |
+
]
|
| 603 |
+
}
|
| 604 |
+
],
|
| 605 |
+
"source": [
|
| 606 |
+
"help(processor.tokenizer.decode)"
|
| 607 |
+
]
|
| 608 |
+
},
|
| 609 |
+
{
|
| 610 |
+
"cell_type": "code",
|
| 611 |
+
"execution_count": 41,
|
| 612 |
+
"id": "b4b9bbfc",
|
| 613 |
+
"metadata": {},
|
| 614 |
+
"outputs": [
|
| 615 |
+
{
|
| 616 |
+
"data": {
|
| 617 |
+
"text/plain": [
|
| 618 |
+
"'麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。'"
|
| 619 |
+
]
|
| 620 |
+
},
|
| 621 |
+
"execution_count": 41,
|
| 622 |
+
"metadata": {},
|
| 623 |
+
"output_type": "execute_result"
|
| 624 |
+
}
|
| 625 |
+
],
|
| 626 |
+
"source": [
|
| 627 |
+
"from fugashi import Tagger\n",
|
| 628 |
+
"\n",
|
| 629 |
+
"tagger = Tagger('-Owakati')\n",
|
| 630 |
+
"text = \"麩菓子は、麩を主材料とした日本の菓子。\"\n",
|
| 631 |
+
"tagger.parse(text)"
|
| 632 |
+
]
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"cell_type": "code",
|
| 636 |
+
"execution_count": 43,
|
| 637 |
+
"id": "833ca62d",
|
| 638 |
+
"metadata": {},
|
| 639 |
+
"outputs": [
|
| 640 |
+
{
|
| 641 |
+
"data": {
|
| 642 |
+
"text/plain": [
|
| 643 |
+
"[麩, 菓子, は, 、, 麩, を, 主材, 料, と, し, た, 日本, の, 菓子, 。]"
|
| 644 |
+
]
|
| 645 |
+
},
|
| 646 |
+
"execution_count": 43,
|
| 647 |
+
"metadata": {},
|
| 648 |
+
"output_type": "execute_result"
|
| 649 |
+
}
|
| 650 |
+
],
|
| 651 |
+
"source": [
|
| 652 |
+
"tagger(text)"
|
| 653 |
+
]
|
| 654 |
+
},
|
| 655 |
{
|
| 656 |
"cell_type": "code",
|
| 657 |
"execution_count": null,
|
| 658 |
+
"id": "7b7854d6",
|
| 659 |
"metadata": {},
|
| 660 |
"outputs": [],
|
| 661 |
"source": [
|
| 662 |
+
"raw_datasets['']"
|
| 663 |
]
|
| 664 |
},
|
| 665 |
{
|
|
|
|
| 1134 |
"execution_count": 26,
|
| 1135 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
| 1136 |
"metadata": {
|
| 1137 |
+
"scrolled": true
|
| 1138 |
},
|
| 1139 |
"outputs": [
|
| 1140 |
{
|
|
|
|
| 1378 |
},
|
| 1379 |
{
|
| 1380 |
"cell_type": "code",
|
| 1381 |
+
"execution_count": 28,
|
| 1382 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
| 1383 |
"metadata": {},
|
| 1384 |
"outputs": [
|
|
|
|
| 1394 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 1395 |
"added tokens file saved in ./added_tokens.json\n"
|
| 1396 |
]
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"data": {
|
| 1400 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 1401 |
+
"model_id": "a47d7e61b9144723a4208cc4cc492eee",
|
| 1402 |
+
"version_major": 2,
|
| 1403 |
+
"version_minor": 0
|
| 1404 |
+
},
|
| 1405 |
+
"text/plain": [
|
| 1406 |
+
"Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
|
| 1407 |
+
]
|
| 1408 |
+
},
|
| 1409 |
+
"metadata": {},
|
| 1410 |
+
"output_type": "display_data"
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"data": {
|
| 1414 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 1415 |
+
"model_id": "a7eb0d82c2fd4f978981915aa2314463",
|
| 1416 |
+
"version_major": 2,
|
| 1417 |
+
"version_minor": 0
|
| 1418 |
+
},
|
| 1419 |
+
"text/plain": [
|
| 1420 |
+
"Upload file runs/Dec12_04-37-47_150-136-44-233/events.out.tfevents.1670819878.150-136-44-233.69039.0: 100%|###…"
|
| 1421 |
+
]
|
| 1422 |
+
},
|
| 1423 |
+
"metadata": {},
|
| 1424 |
+
"output_type": "display_data"
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"name": "stderr",
|
| 1428 |
+
"output_type": "stream",
|
| 1429 |
+
"text": [
|
| 1430 |
+
"remote: Scanning LFS files for validity, may be slow... \n",
|
| 1431 |
+
"remote: LFS file scan complete. \n",
|
| 1432 |
+
"To https://huggingface.co/kimbochen/whisper-small-jp\n",
|
| 1433 |
+
" d83a98f..0ff52f0 main -> main\n",
|
| 1434 |
+
"\n",
|
| 1435 |
+
"Dropping the following result as it does not have all the necessary fields:\n",
|
| 1436 |
+
"{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'ja', 'split': 'test', 'args': 'ja'}}\n",
|
| 1437 |
+
"To https://huggingface.co/kimbochen/whisper-small-jp\n",
|
| 1438 |
+
" 0ff52f0..22e3a01 main -> main\n",
|
| 1439 |
+
"\n"
|
| 1440 |
+
]
|
| 1441 |
+
},
|
| 1442 |
+
{
|
| 1443 |
+
"data": {
|
| 1444 |
+
"text/plain": [
|
| 1445 |
+
"'https://huggingface.co/kimbochen/whisper-small-jp/commit/0ff52f0f1d63daf816427096a83f7bbf8f3892eb'"
|
| 1446 |
+
]
|
| 1447 |
+
},
|
| 1448 |
+
"execution_count": 28,
|
| 1449 |
+
"metadata": {},
|
| 1450 |
+
"output_type": "execute_result"
|
| 1451 |
}
|
| 1452 |
],
|
| 1453 |
"source": [
|