Spaces:
Build error
Build error
feat: Implement alternative solution achieving functional goals
Browse filesThis commit represents a mixed outcome. While the implementation successfully achieves the intended functionality, it diverges from the original plan. The solution, though unconventional, meets the necessary requirements and ensures operational effectiveness. Future revisions may align it more closely with the initial strategy.
- Cargo.lock +245 -18
- Cargo.toml +7 -1
- config/dev.yaml +17 -9
- ggml-metal.metal +0 -0
- src/config.rs +70 -32
- src/main.rs +3 -3
- src/whisper.rs +208 -106
Cargo.lock
CHANGED
|
@@ -37,6 +37,15 @@ dependencies = [
|
|
| 37 |
"memchr",
|
| 38 |
]
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
[[package]]
|
| 41 |
name = "anyhow"
|
| 42 |
version = "1.0.75"
|
|
@@ -62,7 +71,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
|
| 62 |
dependencies = [
|
| 63 |
"proc-macro2",
|
| 64 |
"quote",
|
| 65 |
-
"syn",
|
| 66 |
]
|
| 67 |
|
| 68 |
[[package]]
|
|
@@ -73,7 +82,18 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
|
|
| 73 |
dependencies = [
|
| 74 |
"proc-macro2",
|
| 75 |
"quote",
|
| 76 |
-
"syn",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
]
|
| 78 |
|
| 79 |
[[package]]
|
|
@@ -511,6 +531,29 @@ dependencies = [
|
|
| 511 |
"vsimd",
|
| 512 |
]
|
| 513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
[[package]]
|
| 515 |
name = "bindgen"
|
| 516 |
version = "0.68.1"
|
|
@@ -518,7 +561,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
| 518 |
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
|
| 519 |
dependencies = [
|
| 520 |
"bitflags 2.4.1",
|
| 521 |
-
"cexpr",
|
| 522 |
"clang-sys",
|
| 523 |
"lazy_static",
|
| 524 |
"lazycell",
|
|
@@ -529,9 +572,9 @@ dependencies = [
|
|
| 529 |
"quote",
|
| 530 |
"regex",
|
| 531 |
"rustc-hash",
|
| 532 |
-
"shlex",
|
| 533 |
-
"syn",
|
| 534 |
-
"which",
|
| 535 |
]
|
| 536 |
|
| 537 |
[[package]]
|
|
@@ -586,13 +629,22 @@ dependencies = [
|
|
| 586 |
"libc",
|
| 587 |
]
|
| 588 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
[[package]]
|
| 590 |
name = "cexpr"
|
| 591 |
version = "0.6.0"
|
| 592 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 593 |
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
| 594 |
dependencies = [
|
| 595 |
-
"nom",
|
| 596 |
]
|
| 597 |
|
| 598 |
[[package]]
|
|
@@ -612,6 +664,21 @@ dependencies = [
|
|
| 612 |
"libloading",
|
| 613 |
]
|
| 614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
[[package]]
|
| 616 |
name = "cmake"
|
| 617 |
version = "0.1.50"
|
|
@@ -630,7 +697,7 @@ dependencies = [
|
|
| 630 |
"async-trait",
|
| 631 |
"json5",
|
| 632 |
"lazy_static",
|
| 633 |
-
"nom",
|
| 634 |
"pathdiff",
|
| 635 |
"ron",
|
| 636 |
"rust-ini",
|
|
@@ -722,6 +789,19 @@ version = "1.9.0"
|
|
| 722 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 723 |
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
| 724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
[[package]]
|
| 726 |
name = "equivalent"
|
| 727 |
version = "1.0.1"
|
|
@@ -788,7 +868,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
|
|
| 788 |
dependencies = [
|
| 789 |
"proc-macro2",
|
| 790 |
"quote",
|
| 791 |
-
"syn",
|
| 792 |
]
|
| 793 |
|
| 794 |
[[package]]
|
|
@@ -818,6 +898,15 @@ dependencies = [
|
|
| 818 |
"slab",
|
| 819 |
]
|
| 820 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
[[package]]
|
| 822 |
name = "generic-array"
|
| 823 |
version = "0.14.7"
|
|
@@ -909,6 +998,15 @@ dependencies = [
|
|
| 909 |
"http",
|
| 910 |
]
|
| 911 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
[[package]]
|
| 913 |
name = "hermit-abi"
|
| 914 |
version = "0.3.3"
|
|
@@ -939,6 +1037,12 @@ dependencies = [
|
|
| 939 |
"windows-sys",
|
| 940 |
]
|
| 941 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
[[package]]
|
| 943 |
name = "http"
|
| 944 |
version = "0.2.9"
|
|
@@ -973,6 +1077,12 @@ version = "1.0.3"
|
|
| 973 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 974 |
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
| 975 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 976 |
[[package]]
|
| 977 |
name = "hyper"
|
| 978 |
version = "0.14.27"
|
|
@@ -1078,6 +1188,16 @@ version = "0.2.150"
|
|
| 1078 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1079 |
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
| 1080 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1081 |
[[package]]
|
| 1082 |
name = "libloading"
|
| 1083 |
version = "0.7.4"
|
|
@@ -1173,6 +1293,16 @@ dependencies = [
|
|
| 1173 |
"windows-sys",
|
| 1174 |
]
|
| 1175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1176 |
[[package]]
|
| 1177 |
name = "nom"
|
| 1178 |
version = "7.1.3"
|
|
@@ -1218,7 +1348,7 @@ version = "1.16.0"
|
|
| 1218 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1219 |
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
| 1220 |
dependencies = [
|
| 1221 |
-
"hermit-abi",
|
| 1222 |
"libc",
|
| 1223 |
]
|
| 1224 |
|
|
@@ -1337,7 +1467,7 @@ dependencies = [
|
|
| 1337 |
"pest_meta",
|
| 1338 |
"proc-macro2",
|
| 1339 |
"quote",
|
| 1340 |
-
"syn",
|
| 1341 |
]
|
| 1342 |
|
| 1343 |
[[package]]
|
|
@@ -1405,7 +1535,7 @@ dependencies = [
|
|
| 1405 |
"proc-macro-crate",
|
| 1406 |
"proc-macro2",
|
| 1407 |
"quote",
|
| 1408 |
-
"syn",
|
| 1409 |
]
|
| 1410 |
|
| 1411 |
[[package]]
|
|
@@ -1420,6 +1550,8 @@ dependencies = [
|
|
| 1420 |
"aws-sdk-translate",
|
| 1421 |
"config",
|
| 1422 |
"futures-util",
|
|
|
|
|
|
|
| 1423 |
"once_cell",
|
| 1424 |
"poem",
|
| 1425 |
"serde",
|
|
@@ -1429,6 +1561,7 @@ dependencies = [
|
|
| 1429 |
"tokio-stream",
|
| 1430 |
"tracing",
|
| 1431 |
"tracing-subscriber",
|
|
|
|
| 1432 |
"whisper-rs",
|
| 1433 |
"whisper-rs-sys",
|
| 1434 |
]
|
|
@@ -1452,7 +1585,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
| 1452 |
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
|
| 1453 |
dependencies = [
|
| 1454 |
"proc-macro2",
|
| 1455 |
-
"syn",
|
| 1456 |
]
|
| 1457 |
|
| 1458 |
[[package]]
|
|
@@ -1764,7 +1897,7 @@ checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1"
|
|
| 1764 |
dependencies = [
|
| 1765 |
"proc-macro2",
|
| 1766 |
"quote",
|
| 1767 |
-
"syn",
|
| 1768 |
]
|
| 1769 |
|
| 1770 |
[[package]]
|
|
@@ -1834,6 +1967,12 @@ dependencies = [
|
|
| 1834 |
"lazy_static",
|
| 1835 |
]
|
| 1836 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1837 |
[[package]]
|
| 1838 |
name = "shlex"
|
| 1839 |
version = "1.2.0"
|
|
@@ -1890,12 +2029,29 @@ version = "0.9.8"
|
|
| 1890 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1891 |
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
| 1892 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1893 |
[[package]]
|
| 1894 |
name = "subtle"
|
| 1895 |
version = "2.5.0"
|
| 1896 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1897 |
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
| 1898 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1899 |
[[package]]
|
| 1900 |
name = "syn"
|
| 1901 |
version = "2.0.39"
|
|
@@ -1907,6 +2063,24 @@ dependencies = [
|
|
| 1907 |
"unicode-ident",
|
| 1908 |
]
|
| 1909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1910 |
[[package]]
|
| 1911 |
name = "thiserror"
|
| 1912 |
version = "1.0.50"
|
|
@@ -1924,7 +2098,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
|
|
| 1924 |
dependencies = [
|
| 1925 |
"proc-macro2",
|
| 1926 |
"quote",
|
| 1927 |
-
"syn",
|
| 1928 |
]
|
| 1929 |
|
| 1930 |
[[package]]
|
|
@@ -2006,7 +2180,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
|
| 2006 |
dependencies = [
|
| 2007 |
"proc-macro2",
|
| 2008 |
"quote",
|
| 2009 |
-
"syn",
|
| 2010 |
]
|
| 2011 |
|
| 2012 |
[[package]]
|
|
@@ -2107,7 +2281,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
|
|
| 2107 |
dependencies = [
|
| 2108 |
"proc-macro2",
|
| 2109 |
"quote",
|
| 2110 |
-
"syn",
|
| 2111 |
]
|
| 2112 |
|
| 2113 |
[[package]]
|
|
@@ -2149,6 +2323,29 @@ dependencies = [
|
|
| 2149 |
"tracing-log",
|
| 2150 |
]
|
| 2151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2152 |
[[package]]
|
| 2153 |
name = "try-lock"
|
| 2154 |
version = "0.2.4"
|
|
@@ -2225,6 +2422,12 @@ dependencies = [
|
|
| 2225 |
"tinyvec",
|
| 2226 |
]
|
| 2227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2228 |
[[package]]
|
| 2229 |
name = "unsafe-libyaml"
|
| 2230 |
version = "0.2.9"
|
|
@@ -2272,6 +2475,12 @@ version = "0.1.0"
|
|
| 2272 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2273 |
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
| 2274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2275 |
[[package]]
|
| 2276 |
name = "version_check"
|
| 2277 |
version = "0.9.4"
|
|
@@ -2299,6 +2508,15 @@ version = "0.11.0+wasi-snapshot-preview1"
|
|
| 2299 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2300 |
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
| 2301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2302 |
[[package]]
|
| 2303 |
name = "which"
|
| 2304 |
version = "4.4.2"
|
|
@@ -2324,7 +2542,7 @@ name = "whisper-rs-sys"
|
|
| 2324 |
version = "0.7.3"
|
| 2325 |
source = "git+https://github.com/mingyang91/whisper-rs.git#f8e424a19b13cc348395afd862f0dcb864fcb1fc"
|
| 2326 |
dependencies = [
|
| 2327 |
-
"bindgen",
|
| 2328 |
"cfg-if",
|
| 2329 |
"cmake",
|
| 2330 |
"fs_extra",
|
|
@@ -2346,6 +2564,15 @@ version = "0.4.0"
|
|
| 2346 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2347 |
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
| 2348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2349 |
[[package]]
|
| 2350 |
name = "winapi-x86_64-pc-windows-gnu"
|
| 2351 |
version = "0.4.0"
|
|
|
|
| 37 |
"memchr",
|
| 38 |
]
|
| 39 |
|
| 40 |
+
[[package]]
|
| 41 |
+
name = "ansi_term"
|
| 42 |
+
version = "0.12.1"
|
| 43 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 44 |
+
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
|
| 45 |
+
dependencies = [
|
| 46 |
+
"winapi",
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
[[package]]
|
| 50 |
name = "anyhow"
|
| 51 |
version = "1.0.75"
|
|
|
|
| 71 |
dependencies = [
|
| 72 |
"proc-macro2",
|
| 73 |
"quote",
|
| 74 |
+
"syn 2.0.39",
|
| 75 |
]
|
| 76 |
|
| 77 |
[[package]]
|
|
|
|
| 82 |
dependencies = [
|
| 83 |
"proc-macro2",
|
| 84 |
"quote",
|
| 85 |
+
"syn 2.0.39",
|
| 86 |
+
]
|
| 87 |
+
|
| 88 |
+
[[package]]
|
| 89 |
+
name = "atty"
|
| 90 |
+
version = "0.2.14"
|
| 91 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 92 |
+
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
| 93 |
+
dependencies = [
|
| 94 |
+
"hermit-abi 0.1.19",
|
| 95 |
+
"libc",
|
| 96 |
+
"winapi",
|
| 97 |
]
|
| 98 |
|
| 99 |
[[package]]
|
|
|
|
| 531 |
"vsimd",
|
| 532 |
]
|
| 533 |
|
| 534 |
+
[[package]]
|
| 535 |
+
name = "bindgen"
|
| 536 |
+
version = "0.56.0"
|
| 537 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 538 |
+
checksum = "2da379dbebc0b76ef63ca68d8fc6e71c0f13e59432e0987e508c1820e6ab5239"
|
| 539 |
+
dependencies = [
|
| 540 |
+
"bitflags 1.3.2",
|
| 541 |
+
"cexpr 0.4.0",
|
| 542 |
+
"clang-sys",
|
| 543 |
+
"clap",
|
| 544 |
+
"env_logger",
|
| 545 |
+
"lazy_static",
|
| 546 |
+
"lazycell",
|
| 547 |
+
"log",
|
| 548 |
+
"peeking_take_while",
|
| 549 |
+
"proc-macro2",
|
| 550 |
+
"quote",
|
| 551 |
+
"regex",
|
| 552 |
+
"rustc-hash",
|
| 553 |
+
"shlex 0.1.1",
|
| 554 |
+
"which 3.1.1",
|
| 555 |
+
]
|
| 556 |
+
|
| 557 |
[[package]]
|
| 558 |
name = "bindgen"
|
| 559 |
version = "0.68.1"
|
|
|
|
| 561 |
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
|
| 562 |
dependencies = [
|
| 563 |
"bitflags 2.4.1",
|
| 564 |
+
"cexpr 0.6.0",
|
| 565 |
"clang-sys",
|
| 566 |
"lazy_static",
|
| 567 |
"lazycell",
|
|
|
|
| 572 |
"quote",
|
| 573 |
"regex",
|
| 574 |
"rustc-hash",
|
| 575 |
+
"shlex 1.2.0",
|
| 576 |
+
"syn 2.0.39",
|
| 577 |
+
"which 4.4.2",
|
| 578 |
]
|
| 579 |
|
| 580 |
[[package]]
|
|
|
|
| 629 |
"libc",
|
| 630 |
]
|
| 631 |
|
| 632 |
+
[[package]]
|
| 633 |
+
name = "cexpr"
|
| 634 |
+
version = "0.4.0"
|
| 635 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 636 |
+
checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
|
| 637 |
+
dependencies = [
|
| 638 |
+
"nom 5.1.3",
|
| 639 |
+
]
|
| 640 |
+
|
| 641 |
[[package]]
|
| 642 |
name = "cexpr"
|
| 643 |
version = "0.6.0"
|
| 644 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 645 |
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
| 646 |
dependencies = [
|
| 647 |
+
"nom 7.1.3",
|
| 648 |
]
|
| 649 |
|
| 650 |
[[package]]
|
|
|
|
| 664 |
"libloading",
|
| 665 |
]
|
| 666 |
|
| 667 |
+
[[package]]
|
| 668 |
+
name = "clap"
|
| 669 |
+
version = "2.34.0"
|
| 670 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 671 |
+
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
|
| 672 |
+
dependencies = [
|
| 673 |
+
"ansi_term",
|
| 674 |
+
"atty",
|
| 675 |
+
"bitflags 1.3.2",
|
| 676 |
+
"strsim",
|
| 677 |
+
"textwrap",
|
| 678 |
+
"unicode-width",
|
| 679 |
+
"vec_map",
|
| 680 |
+
]
|
| 681 |
+
|
| 682 |
[[package]]
|
| 683 |
name = "cmake"
|
| 684 |
version = "0.1.50"
|
|
|
|
| 697 |
"async-trait",
|
| 698 |
"json5",
|
| 699 |
"lazy_static",
|
| 700 |
+
"nom 7.1.3",
|
| 701 |
"pathdiff",
|
| 702 |
"ron",
|
| 703 |
"rust-ini",
|
|
|
|
| 789 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 790 |
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
| 791 |
|
| 792 |
+
[[package]]
|
| 793 |
+
name = "env_logger"
|
| 794 |
+
version = "0.8.4"
|
| 795 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 796 |
+
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
|
| 797 |
+
dependencies = [
|
| 798 |
+
"atty",
|
| 799 |
+
"humantime",
|
| 800 |
+
"log",
|
| 801 |
+
"regex",
|
| 802 |
+
"termcolor",
|
| 803 |
+
]
|
| 804 |
+
|
| 805 |
[[package]]
|
| 806 |
name = "equivalent"
|
| 807 |
version = "1.0.1"
|
|
|
|
| 868 |
dependencies = [
|
| 869 |
"proc-macro2",
|
| 870 |
"quote",
|
| 871 |
+
"syn 2.0.39",
|
| 872 |
]
|
| 873 |
|
| 874 |
[[package]]
|
|
|
|
| 898 |
"slab",
|
| 899 |
]
|
| 900 |
|
| 901 |
+
[[package]]
|
| 902 |
+
name = "fvad"
|
| 903 |
+
version = "0.1.3"
|
| 904 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 905 |
+
checksum = "8b8e04cf8731da968d9456575a0ae44cb8760dee46169a5289a0e87d4cc4743a"
|
| 906 |
+
dependencies = [
|
| 907 |
+
"libfvad-sys",
|
| 908 |
+
]
|
| 909 |
+
|
| 910 |
[[package]]
|
| 911 |
name = "generic-array"
|
| 912 |
version = "0.14.7"
|
|
|
|
| 998 |
"http",
|
| 999 |
]
|
| 1000 |
|
| 1001 |
+
[[package]]
|
| 1002 |
+
name = "hermit-abi"
|
| 1003 |
+
version = "0.1.19"
|
| 1004 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1005 |
+
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
| 1006 |
+
dependencies = [
|
| 1007 |
+
"libc",
|
| 1008 |
+
]
|
| 1009 |
+
|
| 1010 |
[[package]]
|
| 1011 |
name = "hermit-abi"
|
| 1012 |
version = "0.3.3"
|
|
|
|
| 1037 |
"windows-sys",
|
| 1038 |
]
|
| 1039 |
|
| 1040 |
+
[[package]]
|
| 1041 |
+
name = "hound"
|
| 1042 |
+
version = "3.5.1"
|
| 1043 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1044 |
+
checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
|
| 1045 |
+
|
| 1046 |
[[package]]
|
| 1047 |
name = "http"
|
| 1048 |
version = "0.2.9"
|
|
|
|
| 1077 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1078 |
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
| 1079 |
|
| 1080 |
+
[[package]]
|
| 1081 |
+
name = "humantime"
|
| 1082 |
+
version = "2.1.0"
|
| 1083 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1084 |
+
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
| 1085 |
+
|
| 1086 |
[[package]]
|
| 1087 |
name = "hyper"
|
| 1088 |
version = "0.14.27"
|
|
|
|
| 1188 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1189 |
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
| 1190 |
|
| 1191 |
+
[[package]]
|
| 1192 |
+
name = "libfvad-sys"
|
| 1193 |
+
version = "1.0.0"
|
| 1194 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1195 |
+
checksum = "473b5389760c65fab561600c78f609ee5779799ae8d29818eccea95d8a8c94d8"
|
| 1196 |
+
dependencies = [
|
| 1197 |
+
"bindgen 0.56.0",
|
| 1198 |
+
"cc",
|
| 1199 |
+
]
|
| 1200 |
+
|
| 1201 |
[[package]]
|
| 1202 |
name = "libloading"
|
| 1203 |
version = "0.7.4"
|
|
|
|
| 1293 |
"windows-sys",
|
| 1294 |
]
|
| 1295 |
|
| 1296 |
+
[[package]]
|
| 1297 |
+
name = "nom"
|
| 1298 |
+
version = "5.1.3"
|
| 1299 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1300 |
+
checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b"
|
| 1301 |
+
dependencies = [
|
| 1302 |
+
"memchr",
|
| 1303 |
+
"version_check",
|
| 1304 |
+
]
|
| 1305 |
+
|
| 1306 |
[[package]]
|
| 1307 |
name = "nom"
|
| 1308 |
version = "7.1.3"
|
|
|
|
| 1348 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1349 |
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
| 1350 |
dependencies = [
|
| 1351 |
+
"hermit-abi 0.3.3",
|
| 1352 |
"libc",
|
| 1353 |
]
|
| 1354 |
|
|
|
|
| 1467 |
"pest_meta",
|
| 1468 |
"proc-macro2",
|
| 1469 |
"quote",
|
| 1470 |
+
"syn 2.0.39",
|
| 1471 |
]
|
| 1472 |
|
| 1473 |
[[package]]
|
|
|
|
| 1535 |
"proc-macro-crate",
|
| 1536 |
"proc-macro2",
|
| 1537 |
"quote",
|
| 1538 |
+
"syn 2.0.39",
|
| 1539 |
]
|
| 1540 |
|
| 1541 |
[[package]]
|
|
|
|
| 1550 |
"aws-sdk-translate",
|
| 1551 |
"config",
|
| 1552 |
"futures-util",
|
| 1553 |
+
"fvad",
|
| 1554 |
+
"hound",
|
| 1555 |
"once_cell",
|
| 1556 |
"poem",
|
| 1557 |
"serde",
|
|
|
|
| 1561 |
"tokio-stream",
|
| 1562 |
"tracing",
|
| 1563 |
"tracing-subscriber",
|
| 1564 |
+
"tracing-test",
|
| 1565 |
"whisper-rs",
|
| 1566 |
"whisper-rs-sys",
|
| 1567 |
]
|
|
|
|
| 1585 |
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
|
| 1586 |
dependencies = [
|
| 1587 |
"proc-macro2",
|
| 1588 |
+
"syn 2.0.39",
|
| 1589 |
]
|
| 1590 |
|
| 1591 |
[[package]]
|
|
|
|
| 1897 |
dependencies = [
|
| 1898 |
"proc-macro2",
|
| 1899 |
"quote",
|
| 1900 |
+
"syn 2.0.39",
|
| 1901 |
]
|
| 1902 |
|
| 1903 |
[[package]]
|
|
|
|
| 1967 |
"lazy_static",
|
| 1968 |
]
|
| 1969 |
|
| 1970 |
+
[[package]]
|
| 1971 |
+
name = "shlex"
|
| 1972 |
+
version = "0.1.1"
|
| 1973 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1974 |
+
checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
|
| 1975 |
+
|
| 1976 |
[[package]]
|
| 1977 |
name = "shlex"
|
| 1978 |
version = "1.2.0"
|
|
|
|
| 2029 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2030 |
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
| 2031 |
|
| 2032 |
+
[[package]]
|
| 2033 |
+
name = "strsim"
|
| 2034 |
+
version = "0.8.0"
|
| 2035 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2036 |
+
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
| 2037 |
+
|
| 2038 |
[[package]]
|
| 2039 |
name = "subtle"
|
| 2040 |
version = "2.5.0"
|
| 2041 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2042 |
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
| 2043 |
|
| 2044 |
+
[[package]]
|
| 2045 |
+
name = "syn"
|
| 2046 |
+
version = "1.0.109"
|
| 2047 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2048 |
+
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
| 2049 |
+
dependencies = [
|
| 2050 |
+
"proc-macro2",
|
| 2051 |
+
"quote",
|
| 2052 |
+
"unicode-ident",
|
| 2053 |
+
]
|
| 2054 |
+
|
| 2055 |
[[package]]
|
| 2056 |
name = "syn"
|
| 2057 |
version = "2.0.39"
|
|
|
|
| 2063 |
"unicode-ident",
|
| 2064 |
]
|
| 2065 |
|
| 2066 |
+
[[package]]
|
| 2067 |
+
name = "termcolor"
|
| 2068 |
+
version = "1.4.0"
|
| 2069 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2070 |
+
checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449"
|
| 2071 |
+
dependencies = [
|
| 2072 |
+
"winapi-util",
|
| 2073 |
+
]
|
| 2074 |
+
|
| 2075 |
+
[[package]]
|
| 2076 |
+
name = "textwrap"
|
| 2077 |
+
version = "0.11.0"
|
| 2078 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2079 |
+
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
| 2080 |
+
dependencies = [
|
| 2081 |
+
"unicode-width",
|
| 2082 |
+
]
|
| 2083 |
+
|
| 2084 |
[[package]]
|
| 2085 |
name = "thiserror"
|
| 2086 |
version = "1.0.50"
|
|
|
|
| 2098 |
dependencies = [
|
| 2099 |
"proc-macro2",
|
| 2100 |
"quote",
|
| 2101 |
+
"syn 2.0.39",
|
| 2102 |
]
|
| 2103 |
|
| 2104 |
[[package]]
|
|
|
|
| 2180 |
dependencies = [
|
| 2181 |
"proc-macro2",
|
| 2182 |
"quote",
|
| 2183 |
+
"syn 2.0.39",
|
| 2184 |
]
|
| 2185 |
|
| 2186 |
[[package]]
|
|
|
|
| 2281 |
dependencies = [
|
| 2282 |
"proc-macro2",
|
| 2283 |
"quote",
|
| 2284 |
+
"syn 2.0.39",
|
| 2285 |
]
|
| 2286 |
|
| 2287 |
[[package]]
|
|
|
|
| 2323 |
"tracing-log",
|
| 2324 |
]
|
| 2325 |
|
| 2326 |
+
[[package]]
|
| 2327 |
+
name = "tracing-test"
|
| 2328 |
+
version = "0.2.4"
|
| 2329 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2330 |
+
checksum = "3a2c0ff408fe918a94c428a3f2ad04e4afd5c95bbc08fcf868eff750c15728a4"
|
| 2331 |
+
dependencies = [
|
| 2332 |
+
"lazy_static",
|
| 2333 |
+
"tracing-core",
|
| 2334 |
+
"tracing-subscriber",
|
| 2335 |
+
"tracing-test-macro",
|
| 2336 |
+
]
|
| 2337 |
+
|
| 2338 |
+
[[package]]
|
| 2339 |
+
name = "tracing-test-macro"
|
| 2340 |
+
version = "0.2.4"
|
| 2341 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2342 |
+
checksum = "258bc1c4f8e2e73a977812ab339d503e6feeb92700f6d07a6de4d321522d5c08"
|
| 2343 |
+
dependencies = [
|
| 2344 |
+
"lazy_static",
|
| 2345 |
+
"quote",
|
| 2346 |
+
"syn 1.0.109",
|
| 2347 |
+
]
|
| 2348 |
+
|
| 2349 |
[[package]]
|
| 2350 |
name = "try-lock"
|
| 2351 |
version = "0.2.4"
|
|
|
|
| 2422 |
"tinyvec",
|
| 2423 |
]
|
| 2424 |
|
| 2425 |
+
[[package]]
|
| 2426 |
+
name = "unicode-width"
|
| 2427 |
+
version = "0.1.11"
|
| 2428 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2429 |
+
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
|
| 2430 |
+
|
| 2431 |
[[package]]
|
| 2432 |
name = "unsafe-libyaml"
|
| 2433 |
version = "0.2.9"
|
|
|
|
| 2475 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2476 |
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
| 2477 |
|
| 2478 |
+
[[package]]
|
| 2479 |
+
name = "vec_map"
|
| 2480 |
+
version = "0.8.2"
|
| 2481 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2482 |
+
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
| 2483 |
+
|
| 2484 |
[[package]]
|
| 2485 |
name = "version_check"
|
| 2486 |
version = "0.9.4"
|
|
|
|
| 2508 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2509 |
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
| 2510 |
|
| 2511 |
+
[[package]]
|
| 2512 |
+
name = "which"
|
| 2513 |
+
version = "3.1.1"
|
| 2514 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2515 |
+
checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
|
| 2516 |
+
dependencies = [
|
| 2517 |
+
"libc",
|
| 2518 |
+
]
|
| 2519 |
+
|
| 2520 |
[[package]]
|
| 2521 |
name = "which"
|
| 2522 |
version = "4.4.2"
|
|
|
|
| 2542 |
version = "0.7.3"
|
| 2543 |
source = "git+https://github.com/mingyang91/whisper-rs.git#f8e424a19b13cc348395afd862f0dcb864fcb1fc"
|
| 2544 |
dependencies = [
|
| 2545 |
+
"bindgen 0.68.1",
|
| 2546 |
"cfg-if",
|
| 2547 |
"cmake",
|
| 2548 |
"fs_extra",
|
|
|
|
| 2564 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2565 |
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
| 2566 |
|
| 2567 |
+
[[package]]
|
| 2568 |
+
name = "winapi-util"
|
| 2569 |
+
version = "0.1.6"
|
| 2570 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2571 |
+
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
|
| 2572 |
+
dependencies = [
|
| 2573 |
+
"winapi",
|
| 2574 |
+
]
|
| 2575 |
+
|
| 2576 |
[[package]]
|
| 2577 |
name = "winapi-x86_64-pc-windows-gnu"
|
| 2578 |
version = "0.4.0"
|
Cargo.toml
CHANGED
|
@@ -18,8 +18,9 @@ serde_json = "1.0"
|
|
| 18 |
serde_yaml = "0.9"
|
| 19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync", "signal"] }
|
| 20 |
tokio-stream = "0.1"
|
| 21 |
-
tracing = "0.1"
|
| 22 |
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
|
|
| 23 |
|
| 24 |
[dependencies.poem]
|
| 25 |
version = "1.3"
|
|
@@ -27,6 +28,11 @@ features = ["websocket", "static-files"]
|
|
| 27 |
|
| 28 |
[dependencies.whisper-rs]
|
| 29 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
|
|
|
| 30 |
[dependencies.whisper-rs-sys]
|
| 31 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
| 32 |
package = "whisper-rs-sys"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
serde_yaml = "0.9"
|
| 19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync", "signal"] }
|
| 20 |
tokio-stream = "0.1"
|
| 21 |
+
tracing = { version = "0.1", features = [] }
|
| 22 |
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
| 23 |
+
fvad = "0.1"
|
| 24 |
|
| 25 |
[dependencies.poem]
|
| 26 |
version = "1.3"
|
|
|
|
| 28 |
|
| 29 |
[dependencies.whisper-rs]
|
| 30 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
| 31 |
+
features = ["coreml", "metal"]
|
| 32 |
[dependencies.whisper-rs-sys]
|
| 33 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
| 34 |
package = "whisper-rs-sys"
|
| 35 |
+
|
| 36 |
+
[dev-dependencies]
|
| 37 |
+
hound = "3.5.1"
|
| 38 |
+
tracing-test = "*"
|
config/dev.yaml
CHANGED
|
@@ -2,24 +2,32 @@ server:
|
|
| 2 |
port: 8080
|
| 3 |
host: 0.0.0.0
|
| 4 |
whisper:
|
| 5 |
-
length_ms:
|
| 6 |
keep_ms: 200
|
| 7 |
step_ms: 5000
|
| 8 |
-
model: "models/ggml-large-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
params:
|
| 11 |
-
|
| 12 |
-
max_tokens: 0
|
| 13 |
-
audio_ctx: 0
|
| 14 |
speed_up: false
|
| 15 |
-
single_segment:
|
| 16 |
translate: false
|
| 17 |
-
|
| 18 |
-
temperature_inc:
|
|
|
|
|
|
|
|
|
|
| 19 |
print_special: false
|
| 20 |
print_progress: false
|
| 21 |
print_realtime: false
|
|
|
|
| 22 |
no_context: false
|
| 23 |
no_timestamps: false
|
|
|
|
| 24 |
tinydiarize: false
|
| 25 |
language: "en"
|
|
|
|
| 2 |
port: 8080
|
| 3 |
host: 0.0.0.0
|
| 4 |
whisper:
|
| 5 |
+
length_ms: 5000
|
| 6 |
keep_ms: 200
|
| 7 |
step_ms: 5000
|
| 8 |
+
model: "models/ggml-large-v3.bin"
|
| 9 |
+
# model: "models/ggml-base.bin"
|
| 10 |
+
# model: "models/ggml-medium.en.bin"
|
| 11 |
+
max_prompt_tokens: 32
|
| 12 |
+
context_confidence_threshold: 0.5
|
| 13 |
params:
|
| 14 |
+
# n_threads: 8
|
| 15 |
+
# max_tokens: 0
|
| 16 |
+
# audio_ctx: 0
|
| 17 |
speed_up: false
|
| 18 |
+
single_segment: true
|
| 19 |
translate: false
|
| 20 |
+
# temperature_inc: 0.2 #0.4
|
| 21 |
+
# temperature_inc: 0
|
| 22 |
+
# entropy_threshold: 2.5
|
| 23 |
+
# entropy_threshold: 2.8
|
| 24 |
+
# n_max_text_ctx: 64 #16384
|
| 25 |
print_special: false
|
| 26 |
print_progress: false
|
| 27 |
print_realtime: false
|
| 28 |
+
token_timestamps: false
|
| 29 |
no_context: false
|
| 30 |
no_timestamps: false
|
| 31 |
+
suppress_non_speech_tokens: false
|
| 32 |
tinydiarize: false
|
| 33 |
language: "en"
|
ggml-metal.metal
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/config.rs
CHANGED
|
@@ -3,7 +3,7 @@ use std::{env, ffi::c_int, net::IpAddr};
|
|
| 3 |
use config::{Config, Environment, File};
|
| 4 |
use once_cell::sync::Lazy;
|
| 5 |
use serde::Deserialize;
|
| 6 |
-
use whisper_rs::FullParams;
|
| 7 |
use tracing::debug;
|
| 8 |
|
| 9 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
|
@@ -12,28 +12,33 @@ pub(crate) static SETTINGS: Lazy<Settings> =
|
|
| 12 |
#[derive(Debug, Deserialize, Clone)]
|
| 13 |
pub(crate) struct WhisperConfig {
|
| 14 |
pub(crate) params: WhisperParams,
|
| 15 |
-
pub(crate) step_ms:
|
| 16 |
-
pub(crate) length_ms:
|
| 17 |
-
pub(crate) keep_ms:
|
| 18 |
pub(crate) model: String,
|
| 19 |
pub(crate) max_prompt_tokens: usize,
|
|
|
|
| 20 |
}
|
| 21 |
|
| 22 |
#[allow(dead_code)]
|
| 23 |
#[derive(Debug, Deserialize, Clone)]
|
| 24 |
pub(crate) struct WhisperParams {
|
| 25 |
pub(crate) n_threads: Option<usize>,
|
| 26 |
-
pub(crate) max_tokens: u32
|
| 27 |
-
pub(crate) audio_ctx: u32
|
| 28 |
-
pub(crate) speed_up: bool
|
| 29 |
-
pub(crate) translate: bool
|
| 30 |
-
pub(crate)
|
| 31 |
-
pub(crate) print_special: bool
|
| 32 |
-
pub(crate) print_realtime: bool
|
| 33 |
-
pub(crate) print_progress: bool
|
| 34 |
-
pub(crate)
|
| 35 |
-
pub(crate)
|
| 36 |
-
pub(crate)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
// pub(crate) tinydiarize: bool,
|
| 38 |
pub(crate) language: Option<String>,
|
| 39 |
}
|
|
@@ -41,25 +46,58 @@ pub(crate) struct WhisperParams {
|
|
| 41 |
impl WhisperParams {
|
| 42 |
pub(crate) fn to_full_params<'a, 'b>(&'a self, tokens: &'b [c_int]) -> FullParams<'a, 'b> {
|
| 43 |
let mut param = FullParams::new(Default::default());
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
let
|
| 54 |
-
.
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
// param.set_tdrz_enable(self.tinydiarize);
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
|
|
|
| 63 |
param
|
| 64 |
}
|
| 65 |
}
|
|
|
|
| 3 |
use config::{Config, Environment, File};
|
| 4 |
use once_cell::sync::Lazy;
|
| 5 |
use serde::Deserialize;
|
| 6 |
+
use whisper_rs::{FullParams};
|
| 7 |
use tracing::debug;
|
| 8 |
|
| 9 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
|
|
|
| 12 |
#[derive(Debug, Deserialize, Clone)]
|
| 13 |
pub(crate) struct WhisperConfig {
|
| 14 |
pub(crate) params: WhisperParams,
|
| 15 |
+
pub(crate) step_ms: usize,
|
| 16 |
+
pub(crate) length_ms: usize,
|
| 17 |
+
pub(crate) keep_ms: usize,
|
| 18 |
pub(crate) model: String,
|
| 19 |
pub(crate) max_prompt_tokens: usize,
|
| 20 |
+
pub(crate) context_confidence_threshold: f32,
|
| 21 |
}
|
| 22 |
|
| 23 |
#[allow(dead_code)]
|
| 24 |
#[derive(Debug, Deserialize, Clone)]
|
| 25 |
pub(crate) struct WhisperParams {
|
| 26 |
pub(crate) n_threads: Option<usize>,
|
| 27 |
+
pub(crate) max_tokens: Option<u32>,
|
| 28 |
+
pub(crate) audio_ctx: Option<u32>,
|
| 29 |
+
pub(crate) speed_up: Option<bool>,
|
| 30 |
+
pub(crate) translate: Option<bool>,
|
| 31 |
+
pub(crate) no_context: Option<bool>,
|
| 32 |
+
pub(crate) print_special: Option<bool>,
|
| 33 |
+
pub(crate) print_realtime: Option<bool>,
|
| 34 |
+
pub(crate) print_progress: Option<bool>,
|
| 35 |
+
pub(crate) token_timestamps: Option<bool>,
|
| 36 |
+
pub(crate) no_timestamps: Option<bool>,
|
| 37 |
+
pub(crate) temperature_inc: Option<f32>,
|
| 38 |
+
pub(crate) entropy_threshold: Option<f32>,
|
| 39 |
+
pub(crate) single_segment: Option<bool>,
|
| 40 |
+
pub(crate) suppress_non_speech_tokens: Option<bool>,
|
| 41 |
+
pub(crate) n_max_text_ctx: Option<usize>,
|
| 42 |
// pub(crate) tinydiarize: bool,
|
| 43 |
pub(crate) language: Option<String>,
|
| 44 |
}
|
|
|
|
| 46 |
impl WhisperParams {
|
| 47 |
pub(crate) fn to_full_params<'a, 'b>(&'a self, tokens: &'b [c_int]) -> FullParams<'a, 'b> {
|
| 48 |
let mut param = FullParams::new(Default::default());
|
| 49 |
+
if let Some(print_progress) = self.print_progress.as_ref() {
|
| 50 |
+
param.set_print_progress(*print_progress);
|
| 51 |
+
}
|
| 52 |
+
if let Some(print_special) = self.print_special.as_ref() {
|
| 53 |
+
param.set_print_special(*print_special);
|
| 54 |
+
}
|
| 55 |
+
if let Some(print_realtime) = self.print_realtime.as_ref() {
|
| 56 |
+
param.set_print_realtime(*print_realtime);
|
| 57 |
+
}
|
| 58 |
+
if let Some(single_segment) = self.single_segment.as_ref() {
|
| 59 |
+
param.set_single_segment(*single_segment);
|
| 60 |
+
}
|
| 61 |
+
if let Some(no_timestamps) = self.no_timestamps.as_ref() {
|
| 62 |
+
param.set_print_timestamps(!no_timestamps);
|
| 63 |
+
}
|
| 64 |
+
if let Some(token_timestamps) = self.token_timestamps.as_ref() {
|
| 65 |
+
param.set_token_timestamps(*token_timestamps);
|
| 66 |
+
}
|
| 67 |
+
if let Some(translate) = self.translate.as_ref() {
|
| 68 |
+
param.set_translate(*translate);
|
| 69 |
+
}
|
| 70 |
+
if let Some(max_tokens) = self.max_tokens.as_ref() {
|
| 71 |
+
param.set_max_tokens(*max_tokens as i32);
|
| 72 |
+
}
|
| 73 |
+
param.set_language(self.language.as_deref());
|
| 74 |
+
if let Some(n_threads) = self.n_threads.as_ref() {
|
| 75 |
+
param.set_n_threads(*n_threads as i32);
|
| 76 |
+
}
|
| 77 |
+
if let Some(audio_ctx) = self.audio_ctx.as_ref() {
|
| 78 |
+
param.set_audio_ctx(*audio_ctx as i32);
|
| 79 |
+
}
|
| 80 |
+
if let Some(speed_up) = self.speed_up.as_ref() {
|
| 81 |
+
param.set_speed_up(*speed_up);
|
| 82 |
+
}
|
| 83 |
// param.set_tdrz_enable(self.tinydiarize);
|
| 84 |
+
if let Some(temperature_inc) = self.temperature_inc.as_ref() {
|
| 85 |
+
param.set_temperature_inc(*temperature_inc);
|
| 86 |
+
}
|
| 87 |
+
if let Some(suppress_non_speech_tokens) = self.suppress_non_speech_tokens.as_ref() {
|
| 88 |
+
param.set_suppress_non_speech_tokens(*suppress_non_speech_tokens);
|
| 89 |
+
}
|
| 90 |
+
if let Some(no_context) = self.no_context.as_ref() {
|
| 91 |
+
param.set_no_context(*no_context);
|
| 92 |
+
}
|
| 93 |
+
if let Some(entropy_threshold) = self.entropy_threshold.as_ref() {
|
| 94 |
+
param.set_entropy_thold(*entropy_threshold);
|
| 95 |
+
}
|
| 96 |
+
if let Some(n_max_text_ctx) = self.n_max_text_ctx.as_ref() {
|
| 97 |
+
param.set_n_max_text_ctx(*n_max_text_ctx as i32);
|
| 98 |
+
}
|
| 99 |
|
| 100 |
+
param.set_tokens(tokens);
|
| 101 |
param
|
| 102 |
}
|
| 103 |
}
|
src/main.rs
CHANGED
|
@@ -104,7 +104,7 @@ async fn stream_speaker(
|
|
| 104 |
ws.on_upgrade(|mut socket| async move {
|
| 105 |
let _origin_tx = lesson.voice_channel();
|
| 106 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 107 |
-
let whisper = WhisperHandler::new(SETTINGS.whisper.clone(), prompt)
|
| 108 |
.expect("failed to create whisper");
|
| 109 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
| 110 |
loop {
|
|
@@ -118,7 +118,7 @@ async fn stream_speaker(
|
|
| 118 |
msg = socket.next() => {
|
| 119 |
match msg.as_ref() {
|
| 120 |
Some(Ok(Message::Binary(bin))) => {
|
| 121 |
-
let _ = whisper.
|
| 122 |
// if let Err(e) = origin_tx.send(bin.to_vec()).await {
|
| 123 |
// tracing::warn!("failed to send voice: {}", e);
|
| 124 |
// break;
|
|
@@ -173,7 +173,7 @@ async fn stream_listener(
|
|
| 173 |
ws: WebSocket,
|
| 174 |
) -> impl IntoResponse {
|
| 175 |
let lesson_opt = ctx.lessons_manager.get_lesson(query.id).await;
|
| 176 |
-
|
| 177 |
|
| 178 |
ws.on_upgrade(|mut socket| async move {
|
| 179 |
let voice_id = match query.voice.parse() {
|
|
|
|
| 104 |
ws.on_upgrade(|mut socket| async move {
|
| 105 |
let _origin_tx = lesson.voice_channel();
|
| 106 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 107 |
+
let mut whisper = WhisperHandler::new(SETTINGS.whisper.clone(), prompt)
|
| 108 |
.expect("failed to create whisper");
|
| 109 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
| 110 |
loop {
|
|
|
|
| 118 |
msg = socket.next() => {
|
| 119 |
match msg.as_ref() {
|
| 120 |
Some(Ok(Message::Binary(bin))) => {
|
| 121 |
+
let _ = whisper.send_bytes(bin.to_vec()).await; // whisper test
|
| 122 |
// if let Err(e) = origin_tx.send(bin.to_vec()).await {
|
| 123 |
// tracing::warn!("failed to send voice: {}", e);
|
| 124 |
// break;
|
|
|
|
| 173 |
ws: WebSocket,
|
| 174 |
) -> impl IntoResponse {
|
| 175 |
let lesson_opt = ctx.lessons_manager.get_lesson(query.id).await;
|
| 176 |
+
debug!("listener param = {:?}", query);
|
| 177 |
|
| 178 |
ws.on_upgrade(|mut socket| async move {
|
| 179 |
let voice_id = match query.voice.parse() {
|
src/whisper.rs
CHANGED
|
@@ -1,24 +1,26 @@
|
|
| 1 |
use std::{
|
| 2 |
collections::VecDeque,
|
| 3 |
-
ffi::c_int,
|
| 4 |
fmt::{Debug, Display, Formatter},
|
| 5 |
thread::sleep,
|
| 6 |
time::Duration,
|
| 7 |
};
|
|
|
|
| 8 |
|
| 9 |
use once_cell::sync::Lazy;
|
| 10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
| 11 |
-
use
|
| 12 |
-
use
|
| 13 |
-
use
|
| 14 |
|
| 15 |
use crate::config::{Settings, SETTINGS};
|
| 16 |
use crate::{config::WhisperConfig, group::GroupedWithin};
|
| 17 |
|
|
|
|
|
|
|
| 18 |
static WHISPER_CONTEXT: Lazy<WhisperContext> = Lazy::new(|| {
|
| 19 |
let settings = Settings::new().expect("Failed to initialize settings.");
|
| 20 |
if tracing::enabled!(tracing::Level::DEBUG) {
|
| 21 |
-
let info = print_system_info();
|
| 22 |
debug!("system_info: n_threads = {} / {} | {}\n",
|
| 23 |
settings.whisper.params.n_threads.unwrap_or(0),
|
| 24 |
std::thread::available_parallelism().map(|c| c.get()).unwrap_or(0),
|
|
@@ -27,13 +29,6 @@ static WHISPER_CONTEXT: Lazy<WhisperContext> = Lazy::new(|| {
|
|
| 27 |
WhisperContext::new(&settings.whisper.model).expect("failed to create WhisperContext")
|
| 28 |
});
|
| 29 |
|
| 30 |
-
fn print_system_info() -> String {
|
| 31 |
-
unsafe {
|
| 32 |
-
let raw_info = whisper_rs_sys::whisper_print_system_info();
|
| 33 |
-
let info = std::ffi::CStr::from_ptr(raw_info);
|
| 34 |
-
info.to_str().unwrap_or("failed to get system info").to_string()
|
| 35 |
-
}
|
| 36 |
-
}
|
| 37 |
|
| 38 |
#[derive(Debug)]
|
| 39 |
pub(crate) enum Error {
|
|
@@ -70,16 +65,21 @@ impl std::error::Error for Error {
|
|
| 70 |
}
|
| 71 |
}
|
| 72 |
|
| 73 |
-
fn
|
| 74 |
-
|
| 75 |
.chunks_exact(2)
|
| 76 |
.map(|chunk| {
|
| 77 |
let mut buf = [0u8; 2];
|
| 78 |
buf.copy_from_slice(chunk);
|
| 79 |
i16::from_le_bytes(buf)
|
| 80 |
})
|
| 81 |
-
.collect::<Vec<i16>>()
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
#[derive(Clone, Debug)]
|
|
@@ -87,20 +87,21 @@ pub struct Segment {
|
|
| 87 |
pub start_timestamp: i64,
|
| 88 |
pub end_timestamp: i64,
|
| 89 |
pub text: String,
|
| 90 |
-
tokens: Vec<
|
| 91 |
}
|
| 92 |
|
| 93 |
pub struct WhisperHandler {
|
| 94 |
-
tx: mpsc::Sender<Vec<
|
| 95 |
-
transcription_tx: broadcast::Sender<Vec<
|
| 96 |
stop_handle: Option<oneshot::Sender<()>>,
|
| 97 |
}
|
| 98 |
|
| 99 |
impl WhisperHandler {
|
| 100 |
pub(crate) fn new(config: WhisperConfig, prompt: String) -> Result<Self, Error> {
|
|
|
|
| 101 |
let (stop_handle, mut stop_signal) = oneshot::channel();
|
| 102 |
-
let (pcm_tx, pcm_rx) = mpsc::channel::<Vec<
|
| 103 |
-
let (transcription_tx, _) = broadcast::channel::<Vec<
|
| 104 |
let shared_transcription_tx = transcription_tx.clone();
|
| 105 |
let state = WHISPER_CONTEXT
|
| 106 |
.create_state()
|
|
@@ -109,21 +110,46 @@ impl WhisperHandler {
|
|
| 109 |
.tokenize(prompt.as_str(), SETTINGS.whisper.max_prompt_tokens)
|
| 110 |
.map_err(|e| Error::whisper_error("failed to tokenize prompt", e))?;
|
| 111 |
tokio::task::spawn_blocking(move || {
|
|
|
|
|
|
|
| 112 |
let mut detector = Detector::new(state, &SETTINGS.whisper, preset_prompt_tokens);
|
| 113 |
let mut grouped = GroupedWithin::new(
|
| 114 |
-
detector.n_samples_step
|
| 115 |
Duration::from_millis(config.step_ms as u64),
|
| 116 |
pcm_rx,
|
| 117 |
u16::MAX as usize,
|
| 118 |
);
|
| 119 |
while let Err(oneshot::error::TryRecvError::Empty) = stop_signal.try_recv() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
let new_pcm_f32 = match grouped.next() {
|
| 121 |
Err(mpsc::error::TryRecvError::Disconnected) => break,
|
| 122 |
Err(mpsc::error::TryRecvError::Empty) => {
|
| 123 |
sleep(Duration::from_millis(10));
|
| 124 |
continue;
|
| 125 |
}
|
| 126 |
-
Ok(data) =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
};
|
| 128 |
|
| 129 |
detector.feed(new_pcm_f32);
|
|
@@ -135,26 +161,22 @@ impl WhisperHandler {
|
|
| 135 |
result
|
| 136 |
}
|
| 137 |
Err(err) => {
|
| 138 |
-
|
| 139 |
continue;
|
| 140 |
}
|
| 141 |
};
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
segment.text
|
| 149 |
-
);
|
| 150 |
-
}
|
| 151 |
-
|
| 152 |
-
if let Err(e) = shared_transcription_tx.send(segments) {
|
| 153 |
tracing::error!("failed to send transcription: {}", e);
|
| 154 |
break;
|
| 155 |
};
|
| 156 |
}
|
| 157 |
});
|
|
|
|
| 158 |
Ok(Self {
|
| 159 |
tx: pcm_tx,
|
| 160 |
transcription_tx,
|
|
@@ -162,27 +184,34 @@ impl WhisperHandler {
|
|
| 162 |
})
|
| 163 |
}
|
| 164 |
|
| 165 |
-
pub fn subscribe(&self) -> broadcast::Receiver<Vec<
|
| 166 |
self.transcription_tx.subscribe()
|
| 167 |
}
|
| 168 |
|
| 169 |
-
pub async fn
|
| 170 |
self.tx.send(data).await
|
| 171 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
}
|
| 173 |
|
| 174 |
#[allow(dead_code)]
|
| 175 |
struct Detector {
|
| 176 |
state: WhisperState<'static>,
|
| 177 |
config: &'static WhisperConfig,
|
|
|
|
|
|
|
|
|
|
| 178 |
preset_prompt_tokens: Vec<WhisperToken>,
|
| 179 |
n_samples_keep: usize,
|
| 180 |
n_samples_step: usize,
|
| 181 |
n_samples_len: usize,
|
| 182 |
-
prompt_tokens: Vec<
|
| 183 |
pcm_f32: VecDeque<f32>,
|
| 184 |
offset: usize,
|
| 185 |
-
stable_offset: usize,
|
| 186 |
}
|
| 187 |
|
| 188 |
impl Detector {
|
|
@@ -194,14 +223,16 @@ impl Detector {
|
|
| 194 |
Detector {
|
| 195 |
state,
|
| 196 |
config,
|
|
|
|
|
|
|
|
|
|
| 197 |
preset_prompt_tokens,
|
| 198 |
-
n_samples_keep:
|
| 199 |
-
n_samples_step:
|
| 200 |
-
n_samples_len:
|
| 201 |
prompt_tokens: Default::default(),
|
| 202 |
-
pcm_f32: VecDeque::
|
| 203 |
offset: 0,
|
| 204 |
-
stable_offset: 0,
|
| 205 |
}
|
| 206 |
}
|
| 207 |
|
|
@@ -210,20 +241,16 @@ impl Detector {
|
|
| 210 |
if self.pcm_f32.len() < self.n_samples_len {
|
| 211 |
return;
|
| 212 |
}
|
| 213 |
-
let len_to_drain = self
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
| 218 |
}
|
| 219 |
|
| 220 |
fn inference(&mut self) -> Result<Vec<Segment>, Error> {
|
| 221 |
-
let
|
| 222 |
-
self.preset_prompt_tokens.as_slice(),
|
| 223 |
-
self.prompt_tokens.as_slice(),
|
| 224 |
-
]
|
| 225 |
-
.concat();
|
| 226 |
-
let params = self.config.params.to_full_params(prompt_tokens.as_slice());
|
| 227 |
let start = std::time::Instant::now();
|
| 228 |
let _ = self
|
| 229 |
.state
|
|
@@ -231,35 +258,32 @@ impl Detector {
|
|
| 231 |
.map_err(|e| Error::whisper_error("failed to initialize WhisperState", e))?;
|
| 232 |
let end = std::time::Instant::now();
|
| 233 |
if end - start > Duration::from_millis(self.config.step_ms as u64) {
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
);
|
| 239 |
}
|
| 240 |
|
| 241 |
-
let timestamp_offset: i64 = (self.offset * 1000 / WHISPER_SAMPLE_RATE
|
| 242 |
-
let stable_offset: i64 = (self.stable_offset * 1000 / WHISPER_SAMPLE_RATE as usize) as i64;
|
| 243 |
let num_segments = self
|
| 244 |
.state
|
| 245 |
.full_n_segments()
|
| 246 |
.map_err(|e| Error::whisper_error("failed to get number of segments", e))?;
|
| 247 |
let mut segments: Vec<Segment> = Vec::with_capacity(num_segments as usize);
|
| 248 |
for i in 0..num_segments {
|
| 249 |
-
let end_timestamp: i64 = timestamp_offset
|
| 250 |
-
+ 10 * self
|
| 251 |
-
.state
|
| 252 |
-
.full_get_segment_t1(i)
|
| 253 |
-
.map_err(|e| Error::whisper_error("failed to get end timestamp", e))?;
|
| 254 |
-
if end_timestamp <= stable_offset {
|
| 255 |
-
continue;
|
| 256 |
-
}
|
| 257 |
-
|
| 258 |
let start_timestamp: i64 = timestamp_offset
|
| 259 |
+ 10 * self
|
| 260 |
.state
|
| 261 |
.full_get_segment_t0(i)
|
| 262 |
.map_err(|e| Error::whisper_error("failed to get start timestamp", e))?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
let segment = self
|
| 264 |
.state
|
| 265 |
.full_get_segment_text(i)
|
|
@@ -270,11 +294,9 @@ impl Detector {
|
|
| 270 |
.map_err(|e| Error::whisper_error("failed to get segment tokens", e))?;
|
| 271 |
let mut segment_tokens = Vec::with_capacity(num_tokens as usize);
|
| 272 |
for j in 0..num_tokens {
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
.map_err(|e| Error::whisper_error("failed to get token", e))?,
|
| 277 |
-
);
|
| 278 |
}
|
| 279 |
|
| 280 |
segments.push(Segment {
|
|
@@ -285,52 +307,132 @@ impl Detector {
|
|
| 285 |
});
|
| 286 |
}
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
|
| 292 |
-
|
| 293 |
-
|
|
|
|
| 294 |
};
|
| 295 |
|
| 296 |
-
let
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
}
|
| 302 |
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
};
|
| 307 |
-
let drop_offset: usize =
|
| 308 |
-
last.end_timestamp as usize / 1000 * WHISPER_SAMPLE_RATE as usize - self.offset;
|
| 309 |
-
if drop_offset > self.pcm_f32.len() {
|
| 310 |
-
return; // Arithmetic overflow
|
| 311 |
}
|
| 312 |
-
|
| 313 |
-
self.offset += len_to_drain;
|
| 314 |
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
}
|
| 325 |
}
|
| 326 |
|
| 327 |
impl Drop for WhisperHandler {
|
| 328 |
fn drop(&mut self) {
|
| 329 |
let Some(stop_handle) = self.stop_handle.take() else {
|
| 330 |
-
return
|
| 331 |
};
|
| 332 |
if stop_handle.send(()).is_err() {
|
| 333 |
-
|
| 334 |
}
|
| 335 |
}
|
| 336 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
use std::{
|
| 2 |
collections::VecDeque,
|
|
|
|
| 3 |
fmt::{Debug, Display, Formatter},
|
| 4 |
thread::sleep,
|
| 5 |
time::Duration,
|
| 6 |
};
|
| 7 |
+
use fvad::SampleRate;
|
| 8 |
|
| 9 |
use once_cell::sync::Lazy;
|
| 10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
| 11 |
+
use tokio::time::Instant;
|
| 12 |
+
use tracing::{debug, trace, warn};
|
| 13 |
+
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperState, WhisperToken, WhisperTokenData};
|
| 14 |
|
| 15 |
use crate::config::{Settings, SETTINGS};
|
| 16 |
use crate::{config::WhisperConfig, group::GroupedWithin};
|
| 17 |
|
| 18 |
+
const WHISPER_SAMPLE_RATE: usize = whisper_rs_sys::WHISPER_SAMPLE_RATE as usize;
|
| 19 |
+
|
| 20 |
static WHISPER_CONTEXT: Lazy<WhisperContext> = Lazy::new(|| {
|
| 21 |
let settings = Settings::new().expect("Failed to initialize settings.");
|
| 22 |
if tracing::enabled!(tracing::Level::DEBUG) {
|
| 23 |
+
let info = whisper_rs::print_system_info();
|
| 24 |
debug!("system_info: n_threads = {} / {} | {}\n",
|
| 25 |
settings.whisper.params.n_threads.unwrap_or(0),
|
| 26 |
std::thread::available_parallelism().map(|c| c.get()).unwrap_or(0),
|
|
|
|
| 29 |
WhisperContext::new(&settings.whisper.model).expect("failed to create WhisperContext")
|
| 30 |
});
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
#[derive(Debug)]
|
| 34 |
pub(crate) enum Error {
|
|
|
|
| 65 |
}
|
| 66 |
}
|
| 67 |
|
| 68 |
+
fn u8_to_i16(input: &[u8]) -> Vec<i16> {
|
| 69 |
+
input
|
| 70 |
.chunks_exact(2)
|
| 71 |
.map(|chunk| {
|
| 72 |
let mut buf = [0u8; 2];
|
| 73 |
buf.copy_from_slice(chunk);
|
| 74 |
i16::from_le_bytes(buf)
|
| 75 |
})
|
| 76 |
+
.collect::<Vec<i16>>()
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
#[derive(Clone, Debug)]
|
| 80 |
+
pub enum Output {
|
| 81 |
+
Unstable(Segment),
|
| 82 |
+
Stable(Segment),
|
| 83 |
}
|
| 84 |
|
| 85 |
#[derive(Clone, Debug)]
|
|
|
|
| 87 |
pub start_timestamp: i64,
|
| 88 |
pub end_timestamp: i64,
|
| 89 |
pub text: String,
|
| 90 |
+
tokens: Vec<WhisperTokenData>,
|
| 91 |
}
|
| 92 |
|
| 93 |
pub struct WhisperHandler {
|
| 94 |
+
tx: mpsc::Sender<Vec<i16>>,
|
| 95 |
+
transcription_tx: broadcast::Sender<Vec<Output>>,
|
| 96 |
stop_handle: Option<oneshot::Sender<()>>,
|
| 97 |
}
|
| 98 |
|
| 99 |
impl WhisperHandler {
|
| 100 |
pub(crate) fn new(config: WhisperConfig, prompt: String) -> Result<Self, Error> {
|
| 101 |
+
let vad_slice_size = WHISPER_SAMPLE_RATE / 100 * 3;
|
| 102 |
let (stop_handle, mut stop_signal) = oneshot::channel();
|
| 103 |
+
let (pcm_tx, pcm_rx) = mpsc::channel::<Vec<i16>>(128);
|
| 104 |
+
let (transcription_tx, _) = broadcast::channel::<Vec<Output>>(128);
|
| 105 |
let shared_transcription_tx = transcription_tx.clone();
|
| 106 |
let state = WHISPER_CONTEXT
|
| 107 |
.create_state()
|
|
|
|
| 110 |
.tokenize(prompt.as_str(), SETTINGS.whisper.max_prompt_tokens)
|
| 111 |
.map_err(|e| Error::whisper_error("failed to tokenize prompt", e))?;
|
| 112 |
tokio::task::spawn_blocking(move || {
|
| 113 |
+
let mut vad = fvad::Fvad::new().expect("failed to create VAD")
|
| 114 |
+
.set_sample_rate(SampleRate::Rate16kHz);
|
| 115 |
let mut detector = Detector::new(state, &SETTINGS.whisper, preset_prompt_tokens);
|
| 116 |
let mut grouped = GroupedWithin::new(
|
| 117 |
+
detector.n_samples_step,
|
| 118 |
Duration::from_millis(config.step_ms as u64),
|
| 119 |
pcm_rx,
|
| 120 |
u16::MAX as usize,
|
| 121 |
);
|
| 122 |
while let Err(oneshot::error::TryRecvError::Empty) = stop_signal.try_recv() {
|
| 123 |
+
if detector.has_crossed_next_line() {
|
| 124 |
+
if let Some(segment) = detector.next_line() {
|
| 125 |
+
let segments = vec![Output::Stable(segment)];
|
| 126 |
+
if let Err(e) = shared_transcription_tx.send(segments) {
|
| 127 |
+
tracing::error!("failed to send transcription: {}", e);
|
| 128 |
+
break;
|
| 129 |
+
};
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
let new_pcm_f32 = match grouped.next() {
|
| 133 |
Err(mpsc::error::TryRecvError::Disconnected) => break,
|
| 134 |
Err(mpsc::error::TryRecvError::Empty) => {
|
| 135 |
sleep(Duration::from_millis(10));
|
| 136 |
continue;
|
| 137 |
}
|
| 138 |
+
Ok(data) => {
|
| 139 |
+
let active_voice = data
|
| 140 |
+
.chunks(vad_slice_size)
|
| 141 |
+
.filter(|frame| {
|
| 142 |
+
if frame.len() != vad_slice_size {
|
| 143 |
+
true
|
| 144 |
+
} else {
|
| 145 |
+
vad.is_voice_frame(frame).unwrap_or(true)
|
| 146 |
+
}
|
| 147 |
+
// true
|
| 148 |
+
})
|
| 149 |
+
.collect::<Vec<_>>()
|
| 150 |
+
.concat();
|
| 151 |
+
convert_integer_to_float_audio(&active_voice)
|
| 152 |
+
},
|
| 153 |
};
|
| 154 |
|
| 155 |
detector.feed(new_pcm_f32);
|
|
|
|
| 161 |
result
|
| 162 |
}
|
| 163 |
Err(err) => {
|
| 164 |
+
warn!("failed to inference: {}", err);
|
| 165 |
continue;
|
| 166 |
}
|
| 167 |
};
|
| 168 |
|
| 169 |
+
let outputs = segments
|
| 170 |
+
.iter()
|
| 171 |
+
.map(|segment| Output::Unstable(segment.clone()))
|
| 172 |
+
.collect::<Vec<_>>();
|
| 173 |
+
if let Err(e) = shared_transcription_tx.send(outputs) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
tracing::error!("failed to send transcription: {}", e);
|
| 175 |
break;
|
| 176 |
};
|
| 177 |
}
|
| 178 |
});
|
| 179 |
+
|
| 180 |
Ok(Self {
|
| 181 |
tx: pcm_tx,
|
| 182 |
transcription_tx,
|
|
|
|
| 184 |
})
|
| 185 |
}
|
| 186 |
|
| 187 |
+
pub fn subscribe(&self) -> broadcast::Receiver<Vec<Output>> {
|
| 188 |
self.transcription_tx.subscribe()
|
| 189 |
}
|
| 190 |
|
| 191 |
+
pub async fn send_i16(&mut self, data: Vec<i16>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
| 192 |
self.tx.send(data).await
|
| 193 |
}
|
| 194 |
+
|
| 195 |
+
pub async fn send_bytes(&mut self, data: Vec<u8>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
| 196 |
+
let i16_data = u8_to_i16(&data);
|
| 197 |
+
self.send_i16(i16_data).await
|
| 198 |
+
}
|
| 199 |
}
|
| 200 |
|
| 201 |
#[allow(dead_code)]
|
| 202 |
struct Detector {
|
| 203 |
state: WhisperState<'static>,
|
| 204 |
config: &'static WhisperConfig,
|
| 205 |
+
start_time: Instant,
|
| 206 |
+
segment: Option<Segment>,
|
| 207 |
+
line_num: usize,
|
| 208 |
preset_prompt_tokens: Vec<WhisperToken>,
|
| 209 |
n_samples_keep: usize,
|
| 210 |
n_samples_step: usize,
|
| 211 |
n_samples_len: usize,
|
| 212 |
+
prompt_tokens: Vec<WhisperToken>,
|
| 213 |
pcm_f32: VecDeque<f32>,
|
| 214 |
offset: usize,
|
|
|
|
| 215 |
}
|
| 216 |
|
| 217 |
impl Detector {
|
|
|
|
| 223 |
Detector {
|
| 224 |
state,
|
| 225 |
config,
|
| 226 |
+
start_time: Instant::now(),
|
| 227 |
+
segment: None,
|
| 228 |
+
line_num: 0,
|
| 229 |
preset_prompt_tokens,
|
| 230 |
+
n_samples_keep: config.keep_ms * WHISPER_SAMPLE_RATE / 1000,
|
| 231 |
+
n_samples_step: config.step_ms * WHISPER_SAMPLE_RATE / 1000,
|
| 232 |
+
n_samples_len: config.length_ms * WHISPER_SAMPLE_RATE / 1000,
|
| 233 |
prompt_tokens: Default::default(),
|
| 234 |
+
pcm_f32: VecDeque::with_capacity(config.length_ms * WHISPER_SAMPLE_RATE / 1000),
|
| 235 |
offset: 0,
|
|
|
|
| 236 |
}
|
| 237 |
}
|
| 238 |
|
|
|
|
| 241 |
if self.pcm_f32.len() < self.n_samples_len {
|
| 242 |
return;
|
| 243 |
}
|
| 244 |
+
// let len_to_drain = self
|
| 245 |
+
// .pcm_f32
|
| 246 |
+
// .drain(0..(self.pcm_f32.len() - self.n_samples_len))
|
| 247 |
+
// .len();
|
| 248 |
+
// warn!("ASR too slow, drain {} samples", len_to_drain);
|
| 249 |
+
// self.offset += len_to_drain;
|
| 250 |
}
|
| 251 |
|
| 252 |
fn inference(&mut self) -> Result<Vec<Segment>, Error> {
|
| 253 |
+
let params = self.config.params.to_full_params(self.prompt_tokens.as_slice());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
let start = std::time::Instant::now();
|
| 255 |
let _ = self
|
| 256 |
.state
|
|
|
|
| 258 |
.map_err(|e| Error::whisper_error("failed to initialize WhisperState", e))?;
|
| 259 |
let end = std::time::Instant::now();
|
| 260 |
if end - start > Duration::from_millis(self.config.step_ms as u64) {
|
| 261 |
+
// warn!(
|
| 262 |
+
// "full([{}]) took {} ms too slow",
|
| 263 |
+
// self.pcm_f32.len(),
|
| 264 |
+
// (end - start).as_millis()
|
| 265 |
+
// );
|
| 266 |
}
|
| 267 |
|
| 268 |
+
let timestamp_offset: i64 = (self.offset * 1000 / WHISPER_SAMPLE_RATE) as i64;
|
|
|
|
| 269 |
let num_segments = self
|
| 270 |
.state
|
| 271 |
.full_n_segments()
|
| 272 |
.map_err(|e| Error::whisper_error("failed to get number of segments", e))?;
|
| 273 |
let mut segments: Vec<Segment> = Vec::with_capacity(num_segments as usize);
|
| 274 |
for i in 0..num_segments {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
let start_timestamp: i64 = timestamp_offset
|
| 276 |
+ 10 * self
|
| 277 |
.state
|
| 278 |
.full_get_segment_t0(i)
|
| 279 |
.map_err(|e| Error::whisper_error("failed to get start timestamp", e))?;
|
| 280 |
+
|
| 281 |
+
let end_timestamp: i64 = timestamp_offset
|
| 282 |
+
+ 10 * self
|
| 283 |
+
.state
|
| 284 |
+
.full_get_segment_t1(i)
|
| 285 |
+
.map_err(|e| Error::whisper_error("failed to get end timestamp", e))?;
|
| 286 |
+
|
| 287 |
let segment = self
|
| 288 |
.state
|
| 289 |
.full_get_segment_text(i)
|
|
|
|
| 294 |
.map_err(|e| Error::whisper_error("failed to get segment tokens", e))?;
|
| 295 |
let mut segment_tokens = Vec::with_capacity(num_tokens as usize);
|
| 296 |
for j in 0..num_tokens {
|
| 297 |
+
let token_data = self.state.full_get_token_data(i, j)
|
| 298 |
+
.map_err(|e| Error::whisper_error("failed to get token data", e))?;
|
| 299 |
+
segment_tokens.push(token_data);
|
|
|
|
|
|
|
| 300 |
}
|
| 301 |
|
| 302 |
segments.push(Segment {
|
|
|
|
| 307 |
});
|
| 308 |
}
|
| 309 |
|
| 310 |
+
self.segment = segments.first().cloned();
|
| 311 |
+
Ok(segments.to_vec())
|
| 312 |
+
}
|
| 313 |
|
| 314 |
+
fn remember_prompt(&mut self) {
|
| 315 |
+
let Some(segment) = self.segment.as_ref() else {
|
| 316 |
+
return
|
| 317 |
};
|
| 318 |
|
| 319 |
+
let tokens = segment
|
| 320 |
+
.tokens
|
| 321 |
+
.iter()
|
| 322 |
+
.map(|td| td.tid)
|
| 323 |
+
.collect::<Vec<WhisperToken>>();
|
|
|
|
| 324 |
|
| 325 |
+
self.prompt_tokens.extend(tokens);
|
| 326 |
+
if self.prompt_tokens.len() > self.config.max_prompt_tokens {
|
| 327 |
+
let _ = self.prompt_tokens.drain(0..(self.prompt_tokens.len() - self.config.max_prompt_tokens)).len();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
}
|
| 329 |
+
}
|
|
|
|
| 330 |
|
| 331 |
+
fn has_crossed_next_line(&self) -> bool {
|
| 332 |
+
let now = Instant::now();
|
| 333 |
+
let elapsed = now - self.start_time;
|
| 334 |
+
let line_number: usize = (elapsed.as_millis() / self.config.length_ms as u128) as usize;
|
| 335 |
+
line_number > self.line_num
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
fn next_line(&mut self) -> Option<Segment> {
|
| 339 |
+
if self.pcm_f32.len() > self.n_samples_keep {
|
| 340 |
+
let drain_size = self.pcm_f32.drain(0..(self.pcm_f32.len() - self.n_samples_keep)).len();
|
| 341 |
+
self.offset += drain_size;
|
| 342 |
+
} else {
|
| 343 |
+
let size_will_clear = self.pcm_f32.len();
|
| 344 |
+
self.pcm_f32.clear();
|
| 345 |
+
self.offset += size_will_clear;
|
| 346 |
}
|
| 347 |
+
|
| 348 |
+
self.line_num += 1;
|
| 349 |
+
self.remember_prompt();
|
| 350 |
+
self.segment.take()
|
| 351 |
}
|
| 352 |
}
|
| 353 |
|
| 354 |
impl Drop for WhisperHandler {
|
| 355 |
fn drop(&mut self) {
|
| 356 |
let Some(stop_handle) = self.stop_handle.take() else {
|
| 357 |
+
return warn!("WhisperHandler::drop() called without stop_handle");
|
| 358 |
};
|
| 359 |
if stop_handle.send(()).is_err() {
|
| 360 |
+
warn!("WhisperHandler::drop() failed to send stop signal");
|
| 361 |
}
|
| 362 |
}
|
| 363 |
}
|
| 364 |
+
|
| 365 |
+
#[cfg(test)]
|
| 366 |
+
mod test {
|
| 367 |
+
use super::*;
|
| 368 |
+
use std::io::{stdout, Write};
|
| 369 |
+
use hound;
|
| 370 |
+
use tracing_test;
|
| 371 |
+
use tracing::info;
|
| 372 |
+
|
| 373 |
+
async fn print_output(output: Output) {
|
| 374 |
+
match output {
|
| 375 |
+
Output::Stable(stable) => {
|
| 376 |
+
print!("\x1b[2K\r");
|
| 377 |
+
print!("{}\n", stable.text);
|
| 378 |
+
},
|
| 379 |
+
Output::Unstable(unstable) => {
|
| 380 |
+
// back to previous line of console
|
| 381 |
+
print!("\x1b[2K\r");
|
| 382 |
+
print!("{}", " ".repeat(100));
|
| 383 |
+
print!("\x1b[2K\r");
|
| 384 |
+
print!("{} ...", unstable.text);
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
stdout().flush().unwrap();
|
| 388 |
+
}
|
| 389 |
+
#[tokio::test]
|
| 390 |
+
#[tracing_test::traced_test]
|
| 391 |
+
async fn test_whisper_handler() {
|
| 392 |
+
let mut whisper_handler = WhisperHandler::new(
|
| 393 |
+
SETTINGS.whisper.clone(),
|
| 394 |
+
"Harry Potter and the Philosopher's Stone".to_string(),
|
| 395 |
+
).expect("failed to create WhisperHandler");
|
| 396 |
+
|
| 397 |
+
let wav = hound::WavReader::open("samples/ADHD_1A.wav")
|
| 398 |
+
.expect("failed to open wav");
|
| 399 |
+
let spec = wav.spec();
|
| 400 |
+
println!("{:?}", spec);
|
| 401 |
+
let samples = wav
|
| 402 |
+
.into_samples::<i16>()
|
| 403 |
+
.map(|s| s.unwrap())
|
| 404 |
+
.collect::<Vec<i16>>();
|
| 405 |
+
let chunks = samples.chunks(1600)
|
| 406 |
+
.map(|chunk| chunk.to_vec())
|
| 407 |
+
.into_iter();
|
| 408 |
+
|
| 409 |
+
let mut rx = whisper_handler.subscribe();
|
| 410 |
+
let send_fut = async {
|
| 411 |
+
// tokio::time::sleep(Duration::from_secs(5)).await;
|
| 412 |
+
for chunk in chunks {
|
| 413 |
+
let _ = whisper_handler.send_i16(chunk).await.expect("failed to send sample");
|
| 414 |
+
tokio::time::sleep(Duration::from_millis(100)).await;
|
| 415 |
+
}
|
| 416 |
+
};
|
| 417 |
+
|
| 418 |
+
let recv_fut = async {
|
| 419 |
+
while let Ok(outputs) = rx.recv().await {
|
| 420 |
+
let Some(output) = outputs.first() else {
|
| 421 |
+
continue
|
| 422 |
+
};
|
| 423 |
+
|
| 424 |
+
match output {
|
| 425 |
+
Output::Stable(stable) => {
|
| 426 |
+
println!("{}", stable.text);
|
| 427 |
+
},
|
| 428 |
+
Output::Unstable(unstable) => {
|
| 429 |
+
|
| 430 |
+
}
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
}
|
| 434 |
+
};
|
| 435 |
+
|
| 436 |
+
tokio::join!(send_fut, recv_fut);
|
| 437 |
+
}
|
| 438 |
+
}
|