Dyen commited on
Commit
bfdf803
·
1 Parent(s): 0a0a57b

Fix binaries and nested git

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -1
  2. whisper.cpp +0 -1
  3. whisper.cpp/.devops/cublas.Dockerfile +28 -0
  4. whisper.cpp/.devops/main-cuda.Dockerfile +50 -0
  5. whisper.cpp/.devops/main-intel.Dockerfile +28 -0
  6. whisper.cpp/.devops/main-musa.Dockerfile +40 -0
  7. whisper.cpp/.devops/main.Dockerfile +20 -0
  8. whisper.cpp/.dockerignore +3 -0
  9. whisper.cpp/.github/workflows/bindings-go.yml +22 -0
  10. whisper.cpp/.github/workflows/bindings-ruby.yml +21 -0
  11. whisper.cpp/.github/workflows/build.yml +1580 -0
  12. whisper.cpp/.github/workflows/docker.yml +76 -0
  13. whisper.cpp/.github/workflows/examples-wasm.yml +97 -0
  14. whisper.cpp/.github/workflows/examples.yml +48 -0
  15. whisper.cpp/.gitignore +66 -0
  16. whisper.cpp/AUTHORS +510 -0
  17. whisper.cpp/CMakeLists.txt +258 -0
  18. whisper.cpp/LICENSE +21 -0
  19. whisper.cpp/Makefile +58 -0
  20. whisper.cpp/README.md +849 -0
  21. whisper.cpp/README_sycl.md +249 -0
  22. whisper.cpp/bindings/CMakeLists.txt +19 -0
  23. whisper.cpp/bindings/go/.gitignore +2 -0
  24. whisper.cpp/bindings/go/LICENSE +21 -0
  25. whisper.cpp/bindings/go/Makefile +72 -0
  26. whisper.cpp/bindings/go/README.md +106 -0
  27. whisper.cpp/bindings/go/doc.go +5 -0
  28. whisper.cpp/bindings/go/examples/go-model-download/context.go +31 -0
  29. whisper.cpp/bindings/go/examples/go-model-download/main.go +304 -0
  30. whisper.cpp/bindings/go/examples/go-whisper/color.go +22 -0
  31. whisper.cpp/bindings/go/examples/go-whisper/flags.go +147 -0
  32. whisper.cpp/bindings/go/examples/go-whisper/main.go +43 -0
  33. whisper.cpp/bindings/go/examples/go-whisper/process.go +132 -0
  34. whisper.cpp/bindings/go/go.mod +16 -0
  35. whisper.cpp/bindings/go/go.sum +16 -0
  36. whisper.cpp/bindings/go/params.go +244 -0
  37. whisper.cpp/bindings/go/pkg/whisper/consts.go +28 -0
  38. whisper.cpp/bindings/go/pkg/whisper/context.go +385 -0
  39. whisper.cpp/bindings/go/pkg/whisper/context_test.go +124 -0
  40. whisper.cpp/bindings/go/pkg/whisper/doc.go +4 -0
  41. whisper.cpp/bindings/go/pkg/whisper/interface.go +118 -0
  42. whisper.cpp/bindings/go/pkg/whisper/model.go +101 -0
  43. whisper.cpp/bindings/go/pkg/whisper/model_test.go +91 -0
  44. whisper.cpp/bindings/go/pkg/whisper/util_test.go +6 -0
  45. whisper.cpp/bindings/go/whisper.go +470 -0
  46. whisper.cpp/bindings/go/whisper_test.go +113 -0
  47. whisper.cpp/bindings/java/README.md +87 -0
  48. whisper.cpp/bindings/java/build.gradle +159 -0
  49. whisper.cpp/bindings/java/gradle.properties +6 -0
  50. whisper.cpp/bindings/java/gradle/wrapper/gradle-wrapper.jar +0 -0
.gitattributes CHANGED
@@ -1,2 +1,2 @@
1
- *.jpg filter=lfs diff=lfs merge=lfs -text
2
  *.png filter=lfs diff=lfs merge=lfs -text
 
 
 
1
  *.png filter=lfs diff=lfs merge=lfs -text
2
+ *.jpg filter=lfs diff=lfs merge=lfs -text
whisper.cpp DELETED
@@ -1 +0,0 @@
1
- Subproject commit 6114e692136bea917dc88a5eb2e532c3d133d963
 
 
whisper.cpp/.devops/cublas.Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG CUDA_VERSION=11.7.1
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} as build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ ARG CUDA_DOCKER_ARCH=all
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake libsdl2-dev wget git
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Set nvcc architecture
22
+ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23
+ # Enable cuBLAS
24
+ ENV GGML_CUDA=1
25
+
26
+ RUN make base.en
27
+
28
+ ENTRYPOINT ["/app/main"]
whisper.cpp/.devops/main-cuda.Dockerfile ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=13.0.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the CUDA runtime image
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+ WORKDIR /app
11
+
12
+ # Unless otherwise specified, we make a fat build.
13
+ ARG CUDA_DOCKER_ARCH=all
14
+ # Set nvcc architecture
15
+ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
16
+
17
+ RUN apt-get update && \
18
+ apt-get install -y build-essential libsdl2-dev wget cmake git \
19
+ && apt-get clean \
20
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
21
+
22
+ # Ref: https://stackoverflow.com/a/53464012
23
+ ENV CUDA_MAIN_VERSION=13.0
24
+ ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
25
+
26
+ COPY .. .
27
+ # Enable cuBLAS
28
+ RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES='75;80;86;90'"
29
+
30
+ RUN find /app/build -name "*.o" -delete && \
31
+ find /app/build -name "*.a" -delete && \
32
+ rm -rf /app/build/CMakeFiles && \
33
+ rm -rf /app/build/cmake_install.cmake && \
34
+ rm -rf /app/build/_deps
35
+
36
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
37
+ ENV CUDA_MAIN_VERSION=13.0
38
+ ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
39
+ WORKDIR /app
40
+
41
+ RUN apt-get update && \
42
+ apt-get install -y curl ffmpeg wget cmake git \
43
+ && apt-get clean \
44
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
45
+
46
+ COPY --from=build /app /app
47
+ RUN du -sh /app/*
48
+ RUN find /app -type f -size +100M
49
+ ENV PATH=/app/build/bin:$PATH
50
+ ENTRYPOINT [ "bash", "-c" ]
whisper.cpp/.devops/main-intel.Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
2
+
3
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
4
+ WORKDIR /app
5
+
6
+ RUN apt-get update && \
7
+ apt-get install -y build-essential libsdl2-dev wget cmake git \
8
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
9
+
10
+ COPY .. .
11
+ # Enable SYCL
12
+ ARG GGML_SYCL_F16=OFF
13
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
14
+ echo "GGML_SYCL_F16 is set" \
15
+ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
16
+ fi && \
17
+ make base.en CMAKE_ARGS="-DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16}"
18
+
19
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
20
+ WORKDIR /app
21
+
22
+ RUN apt-get update && \
23
+ apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
24
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
25
+
26
+ COPY --from=build /app /app
27
+ ENV PATH=/app/build/bin:$PATH
28
+ ENTRYPOINT [ "bash", "-c" ]
whisper.cpp/.devops/main-musa.Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc4.2.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
6
+ # Target the MUSA runtime image
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+ WORKDIR /app
11
+
12
+ RUN apt-get update && \
13
+ apt-get install -y build-essential libsdl2-dev wget cmake git && \
14
+ apt-get clean && \
15
+ rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/*
16
+
17
+ COPY .. .
18
+ # Enable muBLAS
19
+ RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
20
+
21
+ RUN find /app/build -name "*.o" -delete && \
22
+ find /app/build -name "*.a" -delete && \
23
+ rm -rf /app/build/CMakeFiles && \
24
+ rm -rf /app/build/cmake_install.cmake && \
25
+ rm -rf /app/build/_deps
26
+
27
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
28
+ WORKDIR /app
29
+
30
+ RUN apt-get update && \
31
+ apt-get install -y curl ffmpeg wget cmake git && \
32
+ apt-get clean && \
33
+ rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/*
34
+
35
+ COPY --from=build /app/build/bin /app/build/bin
36
+ COPY --from=build /app/samples /app/samples
37
+ COPY --from=build /app/models /app/models
38
+
39
+ ENV PATH=/app/build/bin:$PATH
40
+ ENTRYPOINT [ "bash", "-c" ]
whisper.cpp/.devops/main.Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04 AS build
2
+ WORKDIR /app
3
+
4
+ RUN apt-get update && \
5
+ apt-get install -y build-essential wget cmake git \
6
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
7
+
8
+ COPY .. .
9
+ RUN make base.en
10
+
11
+ FROM ubuntu:22.04 AS runtime
12
+ WORKDIR /app
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
16
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
17
+
18
+ COPY --from=build /app /app
19
+ ENV PATH=/app/build/bin:$PATH
20
+ ENTRYPOINT [ "bash", "-c" ]
whisper.cpp/.dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ build*/
2
+ .github/
3
+ .devops/
whisper.cpp/.github/workflows/bindings-go.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bindings Tests (Go)
2
+ on:
3
+ push:
4
+ paths:
5
+ - bindings/go/**
6
+ - whisper.h
7
+ pull_request:
8
+ paths:
9
+ - bindings/go/**
10
+ - whisper.h
11
+
12
+ jobs:
13
+ ubuntu-22:
14
+ runs-on: ubuntu-22.04
15
+ steps:
16
+ - uses: actions/setup-go@v5
17
+ with:
18
+ go-version: '^1.23'
19
+ - uses: actions/checkout@v4
20
+ - run: |
21
+ cd bindings/go
22
+ make test
whisper.cpp/.github/workflows/bindings-ruby.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bindings Tests (Ruby)
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+
10
+ jobs:
11
+ ubuntu-22:
12
+ runs-on: ubuntu-22.04
13
+ defaults:
14
+ run:
15
+ working-directory: bindings/ruby
16
+ steps:
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: '3.2'
20
+ - uses: actions/checkout@v4
21
+ - run: rake test
whisper.cpp/.github/workflows/build.yml ADDED
@@ -0,0 +1,1580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ tags:
8
+ - 'v*'
9
+ paths: ['.github/workflows/build.yml',
10
+ '**/CMakeLists.txt',
11
+ '**/Makefile',
12
+ '**/*.mk',
13
+ '**/*.cmake',
14
+ '**/*.in',
15
+ '**/*.h',
16
+ '**/*.hpp',
17
+ '**/*.c',
18
+ '**/*.cpp',
19
+ '**/*.cu',
20
+ '**/*.cuh',
21
+ '**/*.cl',
22
+ '**/*.swift',
23
+ '**/*.m',
24
+ '**/*.mm',
25
+ '**/*.metal',
26
+ '**/*.comp',
27
+ '**/*.java']
28
+
29
+ pull_request:
30
+ types: [opened, synchronize, reopened]
31
+ workflow_dispatch:
32
+ inputs:
33
+ create_release:
34
+ description: 'Create new release'
35
+ required: true
36
+ type: boolean
37
+ pre_release_tag:
38
+ description: 'Pre-release tag name'
39
+ required: false
40
+ type: string
41
+ run_type:
42
+ description: 'Workflow type to run'
43
+ required: true
44
+ type: choice
45
+ options:
46
+ - full-ci
47
+ - release-only
48
+
49
+ concurrency:
50
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
51
+ cancel-in-progress: true
52
+
53
+ permissions:
54
+ contents: write # for creating release
55
+
56
+ env:
57
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
58
+ ubuntu_image: "ubuntu:22.04"
59
+ VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
60
+
61
+ jobs:
62
+ determine-tag:
63
+ runs-on: ubuntu-latest
64
+ outputs:
65
+ tag_name: ${{ steps.tag.outputs.name }}
66
+ should_release: ${{ steps.tag.outputs.should_release }}
67
+
68
+ steps:
69
+ - name: Checkout with full history
70
+ uses: actions/checkout@v4
71
+ with:
72
+ fetch-depth: 0
73
+
74
+ - name: Determine tag name
75
+ id: tag
76
+ shell: bash
77
+ run: |
78
+ BUILD_NUMBER=$(git rev-list --count HEAD)
79
+ SHORT_HASH=$(git rev-parse --short=7 HEAD)
80
+ CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}"
81
+ SHOULD_RELEASE="false"
82
+
83
+ echo "Raw values:"
84
+ echo "BUILD_NUMBER: $BUILD_NUMBER"
85
+ echo "SHORT_HASH: $SHORT_HASH"
86
+ echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
87
+ echo "CUSTOM_TAG: $CUSTOM_TAG"
88
+
89
+ if [[ "${{ github.ref_type }}" == "tag" ]]; then
90
+ echo "Using pushed tag name"
91
+ TAG_NAME="${{ github.ref_name }}"
92
+ SHOULD_RELEASE="true"
93
+ elif [[ -n "$CUSTOM_TAG" ]]; then
94
+ echo "Using custom tag"
95
+ TAG_NAME="${CUSTOM_TAG}"
96
+ SHOULD_RELEASE="true"
97
+ elif [[ "${{ github.event.inputs.create_release }}" == "true" ]]; then
98
+ echo "Manual release requested"
99
+ SHOULD_RELEASE="true"
100
+ TAG_NAME="b${BUILD_NUMBER}"
101
+ elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
102
+ echo "Using master branch format"
103
+ TAG_NAME="b${BUILD_NUMBER}"
104
+ SHOULD_RELEASE="false"
105
+ else
106
+ echo "Using non-master branch format"
107
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
108
+ TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}"
109
+ SHOULD_RELEASE="false"
110
+ fi
111
+
112
+ echo "Final tag name: $TAG_NAME"
113
+ echo "Should release: $SHOULD_RELEASE"
114
+ echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
115
+ echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT
116
+
117
+
118
+ ubuntu-22:
119
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
120
+ github.event.inputs.run_type == 'full-ci' }}
121
+ runs-on: ubuntu-22.04
122
+
123
+ strategy:
124
+ fail-fast: false
125
+ matrix:
126
+ arch: [linux/amd64, linux/ppc64le]
127
+
128
+ steps:
129
+ - name: Clone
130
+ uses: actions/checkout@v4
131
+
132
+ - name: Set up QEMU
133
+ uses: docker/setup-qemu-action@v3
134
+
135
+ - name: Build ${{ matrix.arch }}
136
+ run: |
137
+ docker run --platform ${{ matrix.arch }} --rm \
138
+ -v ${{ github.workspace }}:/workspace \
139
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
140
+ set -e
141
+ export DEBIAN_FRONTEND=noninteractive
142
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
143
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
144
+
145
+ apt update
146
+ apt install -y build-essential libsdl2-dev cmake git
147
+ cmake -B build
148
+ cmake --build build --config Release -j $(nproc)'
149
+
150
+ ubuntu-22-arm64:
151
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
152
+ github.event.inputs.run_type == 'full-ci' }}
153
+ runs-on: ubuntu-22.04
154
+
155
+ strategy:
156
+ fail-fast: false
157
+ matrix:
158
+ arch: [linux/arm64]
159
+
160
+ steps:
161
+ - name: Clone
162
+ uses: actions/checkout@v4
163
+
164
+ - name: Set up QEMU
165
+ uses: docker/setup-qemu-action@v3
166
+
167
+ - name: Build ${{ matrix.arch }}
168
+ run: |
169
+ docker run --platform ${{ matrix.arch }} --rm \
170
+ -v ${{ github.workspace }}:/workspace \
171
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
172
+ set -e
173
+ export DEBIAN_FRONTEND=noninteractive
174
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
175
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
176
+
177
+ apt-get update
178
+ apt-get install -y ca-certificates
179
+ sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list
180
+
181
+ apt update
182
+ apt install -y build-essential libsdl2-dev cmake git
183
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
184
+ cmake --build build --config Release -j $(nproc)'
185
+
186
+ ubuntu-22-arm-v7:
187
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
188
+ github.event.inputs.run_type == 'full-ci' }}
189
+ runs-on: ubuntu-22.04
190
+
191
+ strategy:
192
+ fail-fast: false
193
+ matrix:
194
+ arch: [linux/arm/v7]
195
+
196
+ steps:
197
+ - name: Clone
198
+ uses: actions/checkout@v4
199
+
200
+ - name: Set up QEMU
201
+ uses: docker/setup-qemu-action@v3
202
+
203
+ - name: Build ${{ matrix.arch }}
204
+ run: |
205
+ docker run --platform ${{ matrix.arch }} --rm \
206
+ -v ${{ github.workspace }}:/workspace \
207
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
208
+ set -e
209
+ export DEBIAN_FRONTEND=noninteractive
210
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
211
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
212
+
213
+ apt-get update
214
+ apt-get install -y ca-certificates
215
+ sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list
216
+
217
+ apt update
218
+ apt install -y build-essential libsdl2-dev cmake git
219
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
220
+ cmake --build build --config Release -j $(nproc)'
221
+
222
+ macOS-latest:
223
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
224
+ github.event.inputs.run_type == 'full-ci' }}
225
+ runs-on: macOS-latest
226
+
227
+ strategy:
228
+ matrix:
229
+ destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
230
+
231
+ steps:
232
+ - name: Clone
233
+ id: checkout
234
+ uses: actions/checkout@v4
235
+
236
+ - name: ccache
237
+ uses: hendrikmuhs/ccache-action@v1.2.16
238
+ with:
239
+ key: macOS-latest-swift
240
+ evict-old-files: 1d
241
+
242
+ - name: Dependencies
243
+ run: |
244
+ brew update
245
+ cmake --version
246
+ brew install sdl2
247
+
248
+ - name: Build
249
+ run: |
250
+ sysctl -a
251
+ cmake -B build -G Xcode \
252
+ -DGGML_METAL_USE_BF16=ON \
253
+ -DGGML_METAL_EMBED_LIBRARY=ON \
254
+ -DWHISPER_BUILD_EXAMPLES=OFF \
255
+ -DWHISPER_BUILD_TESTS=OFF \
256
+ -DWHISPER_BUILD_SERVER=OFF \
257
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
258
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
259
+
260
+
261
+ # freeBSD-latest:
262
+ # runs-on: macos-13
263
+ #
264
+ # steps:
265
+ # - name: Clone
266
+ # uses: actions/checkout@v4
267
+ #
268
+ # - name: Build
269
+ # uses: cross-platform-actions/action@v0.27.0
270
+ # with:
271
+ # operating_system: freebsd
272
+ # version: '14.2'
273
+ # run: |
274
+ # sudo pkg update
275
+ # sudo pkg install -y gmake sdl2 cmake git
276
+ # cmake -B build
277
+ # cmake --build build --config Release
278
+
279
+ ubuntu-22-gcc:
280
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
281
+ github.event.inputs.run_type == 'full-ci' }}
282
+ runs-on: ubuntu-22.04
283
+
284
+ strategy:
285
+ fail-fast: false
286
+ matrix:
287
+ build: [Debug, Release]
288
+ arch: [linux/amd64, linux/ppc64le]
289
+
290
+ steps:
291
+ - name: Clone
292
+ uses: actions/checkout@v4
293
+
294
+ - name: Set up QEMU
295
+ uses: docker/setup-qemu-action@v3
296
+
297
+ - name: Build ${{ matrix.arch }}
298
+ run: |
299
+ docker run --platform ${{ matrix.arch }} --rm \
300
+ -v ${{ github.workspace }}:/workspace \
301
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
302
+ set -e
303
+ export DEBIAN_FRONTEND=noninteractive
304
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
305
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
306
+
307
+ apt update
308
+ apt install -y build-essential cmake libsdl2-dev git
309
+ cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
310
+ make
311
+ ctest -L gh --output-on-failure'
312
+
313
+ ubuntu-22-gcc-arm64:
314
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
315
+ github.event.inputs.run_type == 'full-ci' }}
316
+ runs-on: ubuntu-22.04
317
+
318
+ strategy:
319
+ fail-fast: false
320
+ matrix:
321
+ build: [Debug, Release]
322
+ arch: [linux/arm64]
323
+
324
+ steps:
325
+ - name: Clone
326
+ uses: actions/checkout@v4
327
+
328
+ - name: Set up QEMU
329
+ uses: docker/setup-qemu-action@v3
330
+
331
+ - name: Build ${{ matrix.arch }}
332
+ run: |
333
+ docker run --platform ${{ matrix.arch }} --rm \
334
+ -v ${{ github.workspace }}:/workspace \
335
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
336
+ set -e
337
+ export DEBIAN_FRONTEND=noninteractive
338
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
339
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
340
+
341
+ apt-get update
342
+ apt-get install -y ca-certificates
343
+ sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list
344
+
345
+ apt update
346
+ apt install -y build-essential cmake libsdl2-dev git
347
+ cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
348
+ make
349
+ ctest -L gh --output-on-failure'
350
+
351
+ ubuntu-22-gcc-arm-v7:
352
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
353
+ github.event.inputs.run_type == 'full-ci' }}
354
+ runs-on: ubuntu-22.04
355
+
356
+ strategy:
357
+ fail-fast: false
358
+ matrix:
359
+ build: [Debug, Release]
360
+ arch: [linux/arm/v7]
361
+
362
+ steps:
363
+ - name: Clone
364
+ uses: actions/checkout@v4
365
+
366
+ - name: Set up QEMU
367
+ uses: docker/setup-qemu-action@v3
368
+
369
+ - name: Build ${{ matrix.arch }}
370
+ run: |
371
+ docker run --platform ${{ matrix.arch }} --rm \
372
+ -v ${{ github.workspace }}:/workspace \
373
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
374
+ set -e
375
+ export DEBIAN_FRONTEND=noninteractive
376
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
377
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
378
+
379
+ apt-get update
380
+ apt-get install -y ca-certificates
381
+ sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list
382
+
383
+ apt update
384
+ apt install -y build-essential cmake libsdl2-dev git
385
+ cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
386
+ make
387
+ ctest -L gh --output-on-failure'
388
+
389
+ ubuntu-22-clang:
390
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
391
+ github.event.inputs.run_type == 'full-ci' }}
392
+ runs-on: ubuntu-22.04
393
+
394
+ strategy:
395
+ fail-fast: false
396
+ matrix:
397
+ build: [Debug, Release]
398
+ #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
399
+ # TODO: arm/v7 disabled due to clang bug
400
+ # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
401
+ arch: [linux/amd64, linux/arm64, linux/ppc64le]
402
+
403
+ steps:
404
+ - name: Clone
405
+ uses: actions/checkout@v4
406
+
407
+ - name: Set up QEMU
408
+ uses: docker/setup-qemu-action@v3
409
+
410
+ - name: Build ${{ matrix.arch }}
411
+ run: |
412
+ docker run --platform ${{ matrix.arch }} --rm \
413
+ -v ${{ github.workspace }}:/workspace \
414
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
415
+ set -e
416
+ export DEBIAN_FRONTEND=noninteractive
417
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
418
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
419
+
420
+ apt-get update
421
+ apt-get install -y ca-certificates
422
+ sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list
423
+
424
+ apt update
425
+ apt install -y clang build-essential cmake libsdl2-dev git
426
+ cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
427
+ make
428
+ ctest -L gh --output-on-failure'
429
+
430
+ ubuntu-22-gcc-sanitized:
431
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
432
+ github.event.inputs.run_type == 'full-ci' }}
433
+ runs-on: ubuntu-22.04
434
+
435
+ strategy:
436
+ fail-fast: false
437
+ matrix:
438
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
439
+ arch: [linux/amd64]
440
+
441
+ steps:
442
+ - name: Clone
443
+ uses: actions/checkout@v4
444
+
445
+ - name: Set up QEMU
446
+ uses: docker/setup-qemu-action@v3
447
+
448
+ - name: Build ${{ matrix.arch }}
449
+ run: |
450
+ docker run --platform ${{ matrix.arch }} --rm \
451
+ -v ${{ github.workspace }}:/workspace \
452
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
453
+ set -e
454
+ export DEBIAN_FRONTEND=noninteractive
455
+ sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
456
+ sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
457
+
458
+ apt update
459
+ apt install -y build-essential cmake git
460
+ cmake . -DCMAKE_BUILD_TYPE=Debug \
461
+ -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON \
462
+ -DGGML_OPENMP=OFF
463
+ make
464
+ ctest -L gh --output-on-failure'
465
+
466
+ ubuntu-22-cmake-sycl:
467
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
468
+ github.event.inputs.run_type == 'full-ci' }}
469
+ runs-on: ubuntu-22.04
470
+
471
+ strategy:
472
+ fail-fast: false
473
+ matrix:
474
+ dwhisper_sycl: [ON]
475
+ dcmake_c_compiler: [icx]
476
+ dcmake_cxx_compiler: [icpx]
477
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
478
+
479
+ continue-on-error: true
480
+
481
+ steps:
482
+ - name: Clone
483
+ uses: actions/checkout@v4
484
+
485
+ - name: add oneAPI to apt
486
+ shell: bash
487
+ run: |
488
+ cd /tmp
489
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
490
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
491
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
492
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
493
+
494
+ - name: install oneAPI dpcpp compiler
495
+ shell: bash
496
+ run: |
497
+ sudo apt update
498
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp git
499
+
500
+ - name: install oneAPI MKL library
501
+ shell: bash
502
+ run: |
503
+ sudo apt install intel-oneapi-mkl-devel git
504
+
505
+ - name: Clone
506
+ id: checkout
507
+ uses: actions/checkout@v4
508
+
509
+ - name: Build
510
+ id: cmake_build
511
+ run: |
512
+ source /opt/intel/oneapi/setvars.sh
513
+ mkdir build
514
+ cd build
515
+ cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
516
+ cmake --build . --config Release -j $(nproc)
517
+
518
+ ubuntu-22-cmake-sycl-fp16:
519
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
520
+ github.event.inputs.run_type == 'full-ci' }}
521
+ runs-on: ubuntu-22.04
522
+
523
+ strategy:
524
+ fail-fast: false
525
+ matrix:
526
+ dwhisper_sycl: [ON]
527
+ dcmake_c_compiler: [icx]
528
+ dcmake_cxx_compiler: [icpx]
529
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
530
+
531
+ continue-on-error: true
532
+
533
+ steps:
534
+ - name: Clone
535
+ uses: actions/checkout@v4
536
+
537
+ - name: add oneAPI to apt
538
+ shell: bash
539
+ run: |
540
+ cd /tmp
541
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
542
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
543
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
544
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
545
+
546
+ - name: install oneAPI dpcpp compiler
547
+ shell: bash
548
+ run: |
549
+ sudo apt update
550
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp git
551
+
552
+ - name: install oneAPI MKL library
553
+ shell: bash
554
+ run: |
555
+ sudo apt install intel-oneapi-mkl-devel
556
+
557
+ - name: Clone
558
+ id: checkout
559
+ uses: actions/checkout@v4
560
+
561
+ - name: Build
562
+ id: cmake_build
563
+ run: |
564
+ source /opt/intel/oneapi/setvars.sh
565
+ mkdir build
566
+ cd build
567
+ cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
568
+ cmake --build . --config Release -j $(nproc)
569
+
570
+ windows-msys2:
571
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
572
+ github.event.inputs.run_type == 'full-ci' }}
573
+ runs-on: windows-latest
574
+
575
+ strategy:
576
+ fail-fast: false
577
+ matrix:
578
+ include:
579
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
580
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
581
+
582
+ steps:
583
+ - name: Clone
584
+ uses: actions/checkout@v4
585
+
586
+ - name: Setup ${{ matrix.sys }}
587
+ uses: msys2/setup-msys2@v2
588
+ with:
589
+ update: true
590
+ msystem: ${{matrix.sys}}
591
+ install: >-
592
+ base-devel
593
+ git
594
+ mingw-w64-${{matrix.env}}-toolchain
595
+ mingw-w64-${{matrix.env}}-cmake
596
+ mingw-w64-${{matrix.env}}-SDL2
597
+ mingw-w64-${{matrix.env}}-openblas
598
+
599
+ - name: Build using CMake
600
+ shell: msys2 {0}
601
+ run: |
602
+ cmake -B build -DWHISPER_SDL2=ON
603
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
604
+
605
+ - name: Clean after building using CMake
606
+ shell: msys2 {0}
607
+ run: |
608
+ rm -rf build
609
+
610
+ - name: Build using CMake w/ OpenBLAS
611
+ shell: msys2 {0}
612
+ run: |
613
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
614
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
615
+
616
+ windows:
617
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
618
+ github.event.inputs.run_type == 'full-ci' }}
619
+ runs-on: windows-latest
620
+ needs: determine-tag
621
+
622
+ strategy:
623
+ matrix:
624
+ build: [Release]
625
+ arch: [Win32, x64]
626
+ sdl2: [ON]
627
+ include:
628
+ - arch: Win32
629
+ s2arc: x86
630
+ jnaPath: win32-x86
631
+ - arch: x64
632
+ s2arc: x64
633
+ jnaPath: win32-x86-64
634
+ - sdl2: ON
635
+ s2ver: 2.28.5
636
+
637
+ steps:
638
+ - name: Clone
639
+ uses: actions/checkout@v4
640
+
641
+ - name: Add msbuild to PATH
642
+ uses: microsoft/setup-msbuild@v2
643
+
644
+ - name: Fetch SDL2 and set SDL2_DIR
645
+ if: matrix.sdl2 == 'ON'
646
+ run: |
647
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
648
+ 7z x sdl2.zip
649
+ echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
650
+
651
+ - name: Configure
652
+ run: >
653
+ cmake -S . -B ./build -A ${{ matrix.arch }}
654
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }}
655
+ -DBUILD_SHARED_LIBS=ON
656
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
657
+
658
+ - name: Build
659
+ run: |
660
+ cd ./build
661
+ msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
662
+
663
+ - name: Copy SDL2.dll
664
+ if: matrix.sdl2 == 'ON'
665
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
666
+
667
+ - name: Upload SDL2.dll
668
+ if: matrix.sdl2 == 'ON'
669
+ uses: actions/upload-artifact@v4
670
+ with:
671
+ name: ${{ matrix.s2arc }}_SDL2.dll
672
+ path: build/bin/${{ matrix.build }}/SDL2.dll
673
+
674
+ - name: Upload whisper dll
675
+ uses: actions/upload-artifact@v4
676
+ with:
677
+ name: whisper_${{ matrix.arch }}.dll
678
+ path: build/bin/${{ matrix.build }}/whisper.dll
679
+
680
+ - name: Upload ggml dll
681
+ uses: actions/upload-artifact@v4
682
+ with:
683
+ name: ggml_${{ matrix.arch }}.dll
684
+ path: build/bin/${{ matrix.build }}/ggml.dll
685
+
686
+ - name: Upload ggml base dll
687
+ uses: actions/upload-artifact@v4
688
+ with:
689
+ name: ggml_base_${{ matrix.arch }}.dll
690
+ path: build/bin/${{ matrix.build }}/ggml-base.dll
691
+
692
+ - name: Upload ggml cpu dll
693
+ uses: actions/upload-artifact@v4
694
+ with:
695
+ name: ggml_cpu_${{ matrix.arch }}.dll
696
+ path: build/bin/${{ matrix.build }}/ggml-cpu.dll
697
+
698
+ - name: Pack bin artifacts
699
+ shell: pwsh
700
+ run: |
701
+ Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip"
702
+
703
+ - name: Upload binaries
704
+ if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }}
705
+ uses: actions/upload-artifact@v4
706
+ with:
707
+ name: whisper-bin-${{ matrix.arch }}.zip
708
+ path: whisper-bin-${{ matrix.arch }}.zip
709
+
710
+ windows-blas:
711
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
712
+ github.event.inputs.run_type == 'full-ci' }}
713
+ runs-on: windows-latest
714
+
715
+ strategy:
716
+ matrix:
717
+ build: [Release]
718
+ arch: [Win32, x64]
719
+ blas: [ON]
720
+ sdl2: [ON]
721
+ blasver: [0.3.29]
722
+ include:
723
+ - arch: Win32
724
+ s2arc: x86
725
+ blasfile: x86
726
+ - arch: x64
727
+ s2arc: x64
728
+ blasfile: x64_64
729
+ - sdl2: ON
730
+ s2ver: 2.28.5
731
+
732
+ steps:
733
+ - name: Clone
734
+ uses: actions/checkout@v4
735
+
736
+ - name: Export GitHub Actions cache environment variables
737
+ uses: actions/github-script@v7
738
+ with:
739
+ script: |
740
+ core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
741
+ core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
742
+
743
+ - name: Add msbuild to PATH
744
+ uses: microsoft/setup-msbuild@v2
745
+
746
+ - name: Install OpenBLAS and pkgconfiglite
747
+ if: matrix.blas == 'ON'
748
+ run: |
749
+ Invoke-WebRequest "https://github.com/OpenMathLib/OpenBLAS/releases/download/v${{matrix.blasver}}/OpenBLAS-${{matrix.blasver}}_${{matrix.blasfile}}.zip" -OutFile "OpenBLAS-${{matrix.blasver}}.zip"
750
+ Expand-Archive "OpenBLAS-${{matrix.blasver}}.zip" -DestinationPath "OpenBLAS-${{matrix.blasver}}"
751
+ choco install pkgconfiglite
752
+
753
+ - name: Fetch SDL2 and set SDL2_DIR
754
+ if: matrix.sdl2 == 'ON'
755
+ run: |
756
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
757
+ 7z x sdl2.zip
758
+ echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
759
+
760
+ - name: Configure
761
+ run: >
762
+ cmake -S . -B ./build -A ${{ matrix.arch }}
763
+ -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
764
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }}
765
+ -DGGML_BLAS=${{ matrix.blas }}
766
+ -DGGML_BLAS_VENDOR=OpenBLAS
767
+ -DBLAS_LIBRARIES="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/lib/libopenblas.lib"
768
+ -DBLAS_INCLUDE_DIRS="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/include"
769
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
770
+
771
+ - name: Build
772
+ run: |
773
+ cd ./build
774
+ msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
775
+
776
+ - name: Copy openblas.dll
777
+ if: matrix.blas == 'ON'
778
+ run: copy "$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/bin/libopenblas.dll" build/bin/${{ matrix.build }}
779
+
780
+ - name: Copy SDL2.dll
781
+ if: matrix.sdl2 == 'ON'
782
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
783
+
784
+ - name: Pack bin artifacts
785
+ shell: pwsh
786
+ run: |
787
+ Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip"
788
+
789
+ - name: Upload binaries
790
+ if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }}
791
+ uses: actions/upload-artifact@v4
792
+ with:
793
+ name: whisper-blas-bin-${{ matrix.arch }}.zip
794
+ path: whisper-blas-bin-${{ matrix.arch }}.zip
795
+
796
+ windows-cublas:
797
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
798
+ github.event.inputs.run_type == 'full-ci' }}
799
+ runs-on: windows-2022
800
+ needs: determine-tag
801
+ strategy:
802
+ fail-fast: false
803
+ matrix:
804
+ build: [Release]
805
+ arch: [x64]
806
+ cublas: [ON]
807
+ sdl2: [ON]
808
+ cuda-toolkit: [12.4.0, 11.8.0]
809
+ include:
810
+ - arch: x64
811
+ sdl2: ON
812
+ sdl2_ver: 2.28.5
813
+ steps:
814
+ - name: Clone repository
815
+ uses: actions/checkout@v4
816
+
817
+ - name: Install Ninja
818
+ id: install_ninja
819
+ run: |
820
+ choco install ninja
821
+
822
+ - name: Install ccache
823
+ uses: hendrikmuhs/ccache-action@v1.2.16
824
+ with:
825
+ key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }}
826
+ variant: sccache
827
+ evict-old-files: 5d
828
+
829
+ - name: Install Cuda Toolkit 11.8.0
830
+ if: ${{ matrix.cuda-toolkit == '11.8.0' }}
831
+ run: |
832
+ $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
833
+ $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
834
+ $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
835
+
836
+ # Components versions
837
+ $CUDART_VER = "11.8.89"
838
+ $NVCC_VER = "11.8.89"
839
+ $NVRTC_VER = "11.8.89"
840
+ $CUBLAS_VER = "11.8.1.74"
841
+ $NVTX_VER = "11.8.86"
842
+ $VS_VER = "11.8.86"
843
+ $NVPROF_VER = "11.8.87"
844
+ $CCCL_VER = "11.8.89"
845
+
846
+ # Create the directory where the CUDA Toolkit will be installed
847
+ mkdir -p $CUDA_TOOLKIT_DIR
848
+
849
+ # Install unzip to extract the downloaded files
850
+ choco install unzip -y
851
+
852
+ # Download all the required components
853
+ curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
854
+ curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
855
+ curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
856
+ curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
857
+ curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
858
+ curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
859
+ curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
860
+ curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
861
+
862
+ # Extract all the downloaded files to the CUDA Toolkit directory
863
+ unzip '*.zip' -d $CUDA_TOOLKIT_DIR
864
+
865
+ # Copy all the extracted files to the main CUDA Toolkit directory
866
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
867
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
868
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
869
+ xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
870
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
871
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
872
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
873
+ xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
874
+
875
+ # Visual Studio integration
876
+ xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y
877
+
878
+ # Set environment variables
879
+ echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
880
+ echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
881
+ echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
882
+ echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
883
+
884
+ - name: Install Cuda Toolkit 12.4.0
885
+ if: ${{ matrix.cuda-toolkit == '12.4.0' }}
886
+ run: |
887
+ $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
888
+ $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
889
+ $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
890
+
891
+ # Components versions
892
+ $CUDART_VER = "12.4.127"
893
+ $NVCC_VER = "12.4.131"
894
+ $NVRTC_VER = "12.4.127"
895
+ $CUBLAS_VER = "12.4.5.8"
896
+ $NVTX_VER = "12.4.127"
897
+ $PROFILER_VER = "12.4.127"
898
+ $VS_VER = "12.4.127"
899
+ $NVPROF_VER = "12.4.128"
900
+ $CCCL_VER = "12.4.127"
901
+
902
+ # Create the directory where the CUDA Toolkit will be installed
903
+ mkdir -p $CUDA_TOOLKIT_DIR
904
+
905
+ # Install unzip to extract the downloaded files
906
+ choco install unzip -y
907
+
908
+ # Download all the required components
909
+ curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
910
+ curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
911
+ curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
912
+ curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
913
+ curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
914
+ curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip"
915
+ curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
916
+ curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
917
+ curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
918
+
919
+ # Extract all the downloaded files to the CUDA Toolkit directory
920
+ unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR
921
+
922
+ # Copy all the extracted files to the main CUDA Toolkit directory
923
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
924
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
925
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
926
+ xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
927
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
928
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
929
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
930
+ xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
931
+ xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
932
+
933
+ # Visual Studio integration
934
+ xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y
935
+
936
+ # Set environment variables
937
+ echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
938
+ echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
939
+ echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
940
+ echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
941
+
942
+ - name: Add msbuild to PATH
943
+ uses: microsoft/setup-msbuild@v2
944
+
945
+ - name: Install 7-Zip
946
+ run: choco install 7zip -y
947
+
948
+ - name: Fetch SDL2 and set SDL2_DIR
949
+ if: matrix.sdl2 == 'ON'
950
+ run: |
951
+ Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip
952
+ 7z x sdl2.zip
953
+ echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
954
+ echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
955
+
956
+ - name: Install cmake
957
+ run: choco install cmake
958
+
959
+ - name: Build Project
960
+ shell: cmd
961
+ run: |
962
+ call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
963
+ cmake --version
964
+ where cmake
965
+ if "${{ matrix.cuda-toolkit }}" == "11.8.0" (
966
+ set CUDA_FLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR
967
+ ) else (
968
+ set CUDA_FLAGS=
969
+ )
970
+ cmake -S . -B build -G "Ninja Multi-Config" ^
971
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
972
+ -DGGML_CUDA=${{ matrix.cublas }} ^
973
+ -DWHISPER_SDL2=${{ matrix.sdl2 }} ^
974
+ -DSDL2_DIR="%SDL2_DIR%" ^
975
+ -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ^
976
+ -DCMAKE_CUDA_FLAGS="%CUDA_FLAGS%"
977
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
978
+ cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS%
979
+
980
+ - name: Check sccache status after build
981
+ run: |
982
+ sccache --show-stats
983
+
984
+ - name: Copy CUDA DLLs
985
+ run: |
986
+ Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" |
987
+ Copy-Item -Destination "build/bin/${{ matrix.build }}"
988
+
989
+ - name: Copy SDL2.dll
990
+ if: matrix.sdl2 == 'ON'
991
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }}
992
+
993
+ - name: Pack bin artifacts
994
+ shell: pwsh
995
+ run: |
996
+ Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip"
997
+
998
+ - name: Upload binaries
999
+ if: ${{ needs.determine-tag.outputs.should_release }}
1000
+ uses: actions/upload-artifact@v4
1001
+ with:
1002
+ name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip
1003
+ path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip
1004
+
1005
+ emscripten:
1006
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
1007
+ github.event.inputs.run_type == 'full-ci' }}
1008
+ runs-on: ubuntu-22.04
1009
+
1010
+ strategy:
1011
+ matrix:
1012
+ build: [Release]
1013
+
1014
+ steps:
1015
+ - name: Clone
1016
+ uses: actions/checkout@v4
1017
+
1018
+ - name: Setup emsdk
1019
+ uses: mymindstorm/setup-emsdk@v14
1020
+
1021
+ - name: Verify
1022
+ run: emcc -v
1023
+
1024
+ - name: Build
1025
+ run: |
1026
+ emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1027
+ make
1028
+
1029
+ ios-xcode-build:
1030
+ runs-on: macos-latest
1031
+ needs: determine-tag
1032
+
1033
+ strategy:
1034
+ matrix:
1035
+ build: [Release]
1036
+
1037
+ steps:
1038
+ - name: Checkout code
1039
+ uses: actions/checkout@v4
1040
+
1041
+ - name: Configure
1042
+ run: |
1043
+ cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
1044
+ mkdir models/ggml-base.en-encoder.mlmodelc
1045
+
1046
+ - name: Build
1047
+ id: cmake_build
1048
+ run: |
1049
+ sysctl -a
1050
+ mkdir build
1051
+ cd build
1052
+ cmake -G Xcode .. \
1053
+ -DGGML_METAL_USE_BF16=ON \
1054
+ -DGGML_METAL_EMBED_LIBRARY=ON \
1055
+ -DWHISPER_BUILD_EXAMPLES=OFF \
1056
+ -DWHISPER_BUILD_TESTS=OFF \
1057
+ -DWHISPER_BUILD_SERVER=OFF \
1058
+ -DCMAKE_SYSTEM_NAME=iOS \
1059
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1060
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1061
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1062
+
1063
+ - name: xcodebuild for swift package
1064
+ id: xcodebuild
1065
+ run: |
1066
+ ./build-xcframework.sh
1067
+
1068
+ - name: Build objc example
1069
+ run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
1070
+
1071
+ - name: Build swiftui example
1072
+ run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
1073
+
1074
+ - name: Pack artifacts
1075
+ id: pack_artifacts
1076
+ run: |
1077
+ zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework
1078
+
1079
+ - name: Upload artifacts
1080
+ if: ${{ needs.determine-tag.outputs.should_release }}
1081
+ uses: actions/upload-artifact@v4
1082
+ with:
1083
+ path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
1084
+ name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
1085
+
1086
+ android:
1087
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
1088
+ github.event.inputs.run_type == 'full-ci' }}
1089
+ runs-on: ubuntu-22.04
1090
+
1091
+ steps:
1092
+ - name: Clone
1093
+ uses: actions/checkout@v4
1094
+ with:
1095
+ path: whisper
1096
+
1097
+ - name: Install Java
1098
+ uses: actions/setup-java@v4
1099
+ with:
1100
+ distribution: zulu
1101
+ java-version: 21
1102
+
1103
+ - name: Setup Android SDK
1104
+ uses: android-actions/setup-android@v3
1105
+
1106
+ - name: Build
1107
+ run: |
1108
+ cd whisper/examples/whisper.android
1109
+ ./gradlew assembleRelease --no-daemon
1110
+
1111
+ - name: Build with external ggml
1112
+ run: |
1113
+ export PATH_TO_GGML=$PWD/ggml
1114
+ cd whisper/examples/whisper.android
1115
+ ./gradlew assembleRelease --no-daemon
1116
+
1117
+ android_java:
1118
+ runs-on: ubuntu-22.04
1119
+
1120
+ steps:
1121
+ - name: Clone
1122
+ uses: actions/checkout@v4
1123
+
1124
+ - name: set up JDK 11
1125
+ uses: actions/setup-java@v4
1126
+ with:
1127
+ java-version: '11'
1128
+ distribution: 'temurin'
1129
+ cache: gradle
1130
+
1131
+ - name: Setup Android SDK
1132
+ uses: android-actions/setup-android@v3
1133
+ with:
1134
+ cmdline-tools-version: 9.0
1135
+
1136
+ - name: Build
1137
+ run: |
1138
+ cd examples/whisper.android.java
1139
+ chmod +x ./gradlew
1140
+ ./gradlew assembleRelease
1141
+
1142
+ bindings-java:
1143
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
1144
+ github.event.inputs.run_type == 'full-ci' }}
1145
+ needs: ['windows']
1146
+ runs-on: windows-latest
1147
+ steps:
1148
+ - uses: actions/checkout@v4
1149
+
1150
+ - name: Install Java
1151
+ uses: actions/setup-java@v4
1152
+ with:
1153
+ distribution: zulu
1154
+ java-version: 20
1155
+
1156
+ - name: Download Whisper Windows lib
1157
+ uses: actions/download-artifact@v4
1158
+ with:
1159
+ name: whisper_x64.dll
1160
+
1161
+ - name: Download GGML Windows lib
1162
+ uses: actions/download-artifact@v4
1163
+ with:
1164
+ name: ggml_x64.dll
1165
+
1166
+ - name: Download GGML Base Windows lib
1167
+ uses: actions/download-artifact@v4
1168
+ with:
1169
+ name: ggml_base_x64.dll
1170
+
1171
+ - name: Download GGML CPU Windows lib
1172
+ uses: actions/download-artifact@v4
1173
+ with:
1174
+ name: ggml_cpu_x64.dll
1175
+
1176
+ - name: Download SDL2.dll
1177
+ uses: actions/download-artifact@v4
1178
+ with:
1179
+ name: x64_SDL2.dll
1180
+
1181
+ - name: List downloaded files
1182
+ shell: pwsh
1183
+ run: |
1184
+ Get-ChildItem -Path "." -Recurse -Filter "*.dll"
1185
+
1186
+ - name: Move DLL to correct location
1187
+ shell: pwsh
1188
+ run: |
1189
+ New-Item -Path "build\bin\Release" -ItemType Directory -Force
1190
+
1191
+ Copy-Item -Path "whisper.dll" -Destination "build\bin\Release\whisper.dll" -Force
1192
+ Write-Host "Copied whisper.dll to build\bin\Release\whisper.dll directory"
1193
+
1194
+ Copy-Item -Path "ggml.dll" -Destination "build\bin\Release\ggml.dll" -Force
1195
+ Write-Host "Copied ggml.dll to build\bin\Release\ggml.dll directory"
1196
+
1197
+ Copy-Item -Path "ggml-base.dll" -Destination "build\bin\Release\ggml-base.dll" -Force
1198
+ Write-Host "Copied ggml-base.dll to build\bin\Release\ggml-base.dll directory"
1199
+
1200
+ Copy-Item -Path "ggml-cpu.dll" -Destination "build\bin\Release\ggml-cpu.dll" -Force
1201
+ Write-Host "Copied ggml-cpu.dll to build\bin\Release\ggml-cpu.dll directory"
1202
+
1203
+ Copy-Item -Path "SDL2.dll" -Destination "build\bin\Release\SDL2.dll" -Force
1204
+ Write-Host "Copied SDL2.dll to build\bin\Release\SDL2.dll directory"
1205
+
1206
+ - name: List build release files
1207
+ shell: pwsh
1208
+ run: |
1209
+ Get-ChildItem -Path "build\Release" -Recurse -Filter "*.dll"
1210
+
1211
+ - name: Build
1212
+ run: |
1213
+ models\download-ggml-model.cmd tiny.en models/
1214
+ cd bindings/java
1215
+ chmod +x ./gradlew
1216
+ ./gradlew build --info
1217
+
1218
+ - name: Pack jar artifacts
1219
+ shell: pwsh
1220
+ run: |
1221
+ Compress-Archive -Path "bindings/java/build/libs/whispercpp-*.jar" -DestinationPath "whispercpp.jar.zip"
1222
+
1223
+ - name: Upload jar
1224
+ uses: actions/upload-artifact@v4
1225
+ with:
1226
+ name: whispercpp.jar.zip
1227
+ path: whispercpp.jar.zip
1228
+
1229
+ # - name: Publish package
1230
+ # if: ${{ github.ref == 'refs/heads/master' }}
1231
+ # uses: gradle/gradle-build-action@v2.4.2
1232
+ # with:
1233
+ # arguments: publish
1234
+ # build-root-directory: bindings/java
1235
+ # env:
1236
+ # MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
1237
+ # MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
1238
+ # PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
1239
+ # PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
1240
+
1241
+ quantize:
1242
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
1243
+ github.event.inputs.run_type == 'full-ci' }}
1244
+ runs-on: ubuntu-22.04
1245
+
1246
+ steps:
1247
+ - name: Clone
1248
+ uses: actions/checkout@v4
1249
+
1250
+ - name: Test quantize
1251
+ run: |
1252
+ ./models/download-ggml-model.sh tiny.en
1253
+ cmake -B build
1254
+ cmake --build build --config Release
1255
+ ./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
1256
+
1257
+ release:
1258
+ if: ${{ github.event.inputs.create_release == 'true' || github.event.inputs.pre_release_tag != '' || startsWith(github.ref, 'refs/tags/v') }}
1259
+
1260
+ runs-on: ubuntu-latest
1261
+
1262
+ needs:
1263
+ - determine-tag
1264
+ - ios-xcode-build
1265
+ - windows
1266
+ - windows-blas
1267
+ - windows-cublas
1268
+
1269
+ steps:
1270
+ - name: Clone
1271
+ id: checkout
1272
+ uses: actions/checkout@v4
1273
+ with:
1274
+ fetch-depth: 0
1275
+
1276
+ - name: ccache
1277
+ uses: hendrikmuhs/ccache-action@v1.2.16
1278
+ with:
1279
+ key: release
1280
+ evict-old-files: 1d
1281
+
1282
+ # Downloads all the artifacts from the previous jobs
1283
+ - name: Download artifacts
1284
+ id: download-artifact
1285
+ uses: actions/download-artifact@v4
1286
+ with:
1287
+ path: ./artifact
1288
+
1289
+ - name: Move artifacts
1290
+ id: move_artifacts
1291
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
1292
+
1293
+ - name: Create release
1294
+ id: create_release
1295
+ uses: ggml-org/action-create-release@v1
1296
+ env:
1297
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1298
+ with:
1299
+ tag_name: ${{ needs.determine-tag.outputs.tag_name }}
1300
+ prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
1301
+ draft: true
1302
+
1303
+ - name: Upload release
1304
+ id: upload_release
1305
+ uses: actions/github-script@v3
1306
+ with:
1307
+ github-token: ${{secrets.GITHUB_TOKEN}}
1308
+ script: |
1309
+ const path = require('path');
1310
+ const fs = require('fs');
1311
+ const release_id = '${{ steps.create_release.outputs.id }}';
1312
+ for (let file of await fs.readdirSync('./artifact/release')) {
1313
+ if (path.extname(file) === '.zip') {
1314
+ console.log('uploadReleaseAsset', file);
1315
+ await github.repos.uploadReleaseAsset({
1316
+ owner: context.repo.owner,
1317
+ repo: context.repo.repo,
1318
+ release_id: release_id,
1319
+ name: file,
1320
+ data: await fs.readFileSync(`./artifact/release/${file}`)
1321
+ });
1322
+ }
1323
+ }
1324
+
1325
+ coreml-base-en:
1326
+ if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
1327
+ github.event.inputs.create_release == 'true' ||
1328
+ github.event.inputs.pre_release_tag != '' ||
1329
+ startsWith(github.ref, 'refs/tags/v') }}
1330
+ runs-on: macos-latest
1331
+ needs: determine-tag
1332
+
1333
+ steps:
1334
+ - name: Checkout code
1335
+ uses: actions/checkout@v4
1336
+
1337
+ - name: Set environment variables
1338
+ id: set_vars
1339
+ run: |
1340
+ echo "MODEL_NAME=base.en" >> $GITHUB_ENV
1341
+ echo "GEN_MODEL_NAME=whisper-${{ needs.determine-tag.outputs.tag_name }}-ggml-base.en-encoder.mlmodelc" >> $GITHUB_ENV
1342
+
1343
+ - name: Download model
1344
+ run: |
1345
+ ./models/download-ggml-model.sh ${{ env.MODEL_NAME }}
1346
+
1347
+ - name: Generate CoreML model
1348
+ run: |
1349
+ python3.11 -m venv venv
1350
+ source venv/bin/activate
1351
+ pip install ane_transformers openai-whisper coremltools
1352
+ ./models/generate-coreml-model.sh ${{ env.MODEL_NAME }}
1353
+
1354
+ vad:
1355
+ if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
1356
+ github.event.inputs.run_type == 'full-ci' }}
1357
+ runs-on: ubuntu-latest
1358
+
1359
+ steps:
1360
+ - name: Checkout
1361
+ uses: actions/checkout@v4
1362
+
1363
+ - name: Build
1364
+ shell: bash
1365
+ run: |
1366
+ cmake -B build
1367
+ cmake --build build --config Release
1368
+
1369
+ - name: Test
1370
+ shell: bash
1371
+ run: |
1372
+ ctest -R ^test-vad$ --test-dir build --output-on-failure -VV
1373
+
1374
+ # TODO: simplify the following workflows using a matrix
1375
+ ggml-ci-x64-cpu-low-perf:
1376
+ runs-on: ubuntu-22.04
1377
+
1378
+ steps:
1379
+ - name: Clone
1380
+ id: checkout
1381
+ uses: actions/checkout@v4
1382
+
1383
+ - name: ccache
1384
+ uses: ggml-org/ccache-action@v1.2.16
1385
+ with:
1386
+ key: ggml-ci-x64-cpu-low-perf
1387
+ evict-old-files: 1d
1388
+
1389
+ - name: Dependencies
1390
+ id: depends
1391
+ run: |
1392
+ sudo apt-get update
1393
+ sudo apt-get install build-essential libcurl4-openssl-dev
1394
+
1395
+ - name: Test
1396
+ id: ggml-ci
1397
+ run: |
1398
+ LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1399
+
1400
+ ggml-ci-arm64-cpu-low-perf:
1401
+ runs-on: ubuntu-22.04-arm
1402
+
1403
+ steps:
1404
+ - name: Clone
1405
+ id: checkout
1406
+ uses: actions/checkout@v4
1407
+
1408
+ - name: ccache
1409
+ uses: ggml-org/ccache-action@v1.2.16
1410
+ with:
1411
+ key: ggml-ci-arm64-cpu-low-perf
1412
+ evict-old-files: 1d
1413
+
1414
+ - name: Dependencies
1415
+ id: depends
1416
+ run: |
1417
+ sudo apt-get update
1418
+ sudo apt-get install build-essential libcurl4-openssl-dev
1419
+
1420
+ - name: Test
1421
+ id: ggml-ci
1422
+ run: |
1423
+ LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1424
+
1425
+ ggml-ci-x64-cpu-high-perf:
1426
+ runs-on: ubuntu-22.04
1427
+
1428
+ steps:
1429
+ - name: Clone
1430
+ id: checkout
1431
+ uses: actions/checkout@v4
1432
+
1433
+ - name: ccache
1434
+ uses: ggml-org/ccache-action@v1.2.16
1435
+ with:
1436
+ key: ggml-ci-x64-cpu-high-perf
1437
+ evict-old-files: 1d
1438
+
1439
+ - name: Dependencies
1440
+ id: depends
1441
+ run: |
1442
+ sudo apt-get update
1443
+ sudo apt-get install build-essential libcurl4-openssl-dev
1444
+
1445
+ - name: Test
1446
+ id: ggml-ci
1447
+ run: |
1448
+ LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt
1449
+
1450
+ ggml-ci-arm64-cpu-high-perf:
1451
+ runs-on: ubuntu-22.04-arm
1452
+
1453
+ steps:
1454
+ - name: Clone
1455
+ id: checkout
1456
+ uses: actions/checkout@v4
1457
+
1458
+ - name: ccache
1459
+ uses: ggml-org/ccache-action@v1.2.16
1460
+ with:
1461
+ key: ggml-ci-arm64-cpu-high-perf
1462
+ evict-old-files: 1d
1463
+
1464
+ - name: Dependencies
1465
+ id: depends
1466
+ run: |
1467
+ sudo apt-get update
1468
+ sudo apt-get install build-essential libcurl4-openssl-dev
1469
+
1470
+ - name: Test
1471
+ id: ggml-ci
1472
+ run: |
1473
+ LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1474
+
1475
+ ggml-ci-arm64-cpu-high-perf-sve:
1476
+ runs-on: ubuntu-22.04-arm
1477
+
1478
+ steps:
1479
+ - name: Clone
1480
+ id: checkout
1481
+ uses: actions/checkout@v4
1482
+
1483
+ - name: ccache
1484
+ uses: ggml-org/ccache-action@v1.2.16
1485
+ with:
1486
+ key: ggml-ci-arm64-cpu-high-perf-sve
1487
+ evict-old-files: 1d
1488
+
1489
+ - name: Dependencies
1490
+ id: depends
1491
+ run: |
1492
+ sudo apt-get update
1493
+ sudo apt-get install build-essential libcurl4-openssl-dev
1494
+
1495
+ - name: Test
1496
+ id: ggml-ci
1497
+ run: |
1498
+ LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1499
+
1500
+ ggml-ci-x64-nvidia-cuda:
1501
+ runs-on: [self-hosted, Linux, X64, NVIDIA]
1502
+
1503
+ steps:
1504
+ - name: Clone
1505
+ id: checkout
1506
+ uses: actions/checkout@v4
1507
+
1508
+ - name: Test
1509
+ id: ggml-ci
1510
+ run: |
1511
+ nvidia-smi
1512
+ GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
1513
+
1514
+ ggml-ci-x64-nvidia-vulkan-cm:
1515
+ runs-on: [self-hosted, Linux, X64, NVIDIA]
1516
+
1517
+ steps:
1518
+ - name: Clone
1519
+ id: checkout
1520
+ uses: actions/checkout@v4
1521
+
1522
+ - name: Test
1523
+ id: ggml-ci
1524
+ run: |
1525
+ vulkaninfo --summary
1526
+ GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
1527
+
1528
+ ggml-ci-x64-nvidia-vulkan-cm2:
1529
+ runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2]
1530
+
1531
+ steps:
1532
+ - name: Clone
1533
+ id: checkout
1534
+ uses: actions/checkout@v4
1535
+
1536
+ - name: Test
1537
+ id: ggml-ci
1538
+ run: |
1539
+ vulkaninfo --summary
1540
+ GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
1541
+
1542
+ ggml-ci-x64-cpu-amx:
1543
+ runs-on: [self-hosted, Linux, X64, CPU, AMX]
1544
+
1545
+ steps:
1546
+ - name: Clone
1547
+ id: checkout
1548
+ uses: actions/checkout@v4
1549
+
1550
+ - name: Test
1551
+ id: ggml-ci
1552
+ run: |
1553
+ bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
1554
+
1555
+ ggml-ci-mac-metal:
1556
+ runs-on: [self-hosted, macOS, ARM64]
1557
+
1558
+ steps:
1559
+ - name: Clone
1560
+ id: checkout
1561
+ uses: actions/checkout@v4
1562
+
1563
+ - name: Test
1564
+ id: ggml-ci
1565
+ run: |
1566
+ GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp
1567
+
1568
+ ggml-ci-mac-vulkan:
1569
+ runs-on: [self-hosted, macOS, ARM64]
1570
+
1571
+ steps:
1572
+ - name: Clone
1573
+ id: checkout
1574
+ uses: actions/checkout@v4
1575
+
1576
+ - name: Test
1577
+ id: ggml-ci
1578
+ run: |
1579
+ vulkaninfo --summary
1580
+ GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp
whisper.cpp/.github/workflows/docker.yml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish Docker image
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches:
7
+ - master
8
+
9
+ jobs:
10
+ push_to_registry:
11
+ name: Push Docker image to Docker Hub
12
+ if: github.event.pull_request.draft == false
13
+
14
+ runs-on: ubuntu-22.04
15
+ env:
16
+ COMMIT_SHA: ${{ github.sha }}
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ config:
21
+ - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
22
+ - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
23
+ - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" }
24
+ - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
25
+
26
+ steps:
27
+ - name: Check out the repo
28
+ uses: actions/checkout@v3
29
+
30
+ - name: Set up QEMU
31
+ uses: docker/setup-qemu-action@v3
32
+ with:
33
+ image: tonistiigi/binfmt:qemu-v7.0.0-28
34
+
35
+ - name: Set up Docker Buildx
36
+ uses: docker/setup-buildx-action@v3
37
+
38
+ - name: Log in to Docker Hub
39
+ uses: docker/login-action@v3
40
+ with:
41
+ registry: ghcr.io
42
+ username: ${{ github.repository_owner }}
43
+ password: ${{ secrets.GITHUB_TOKEN }}
44
+
45
+ - name: Free up disk space
46
+ run: |
47
+ sudo apt-get remove -y '^dotnet-.*' '^llvm-.*' '^mysql-.*' '^postgresql-.*'
48
+ sudo apt-get autoremove -y
49
+ sudo apt-get autoclean
50
+
51
+ sudo rm -rf /usr/share/dotnet
52
+ sudo rm -rf /usr/local/lib/android
53
+ sudo rm -rf /opt/ghc
54
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
55
+
56
+ docker system prune -af
57
+
58
+ df -h
59
+
60
+ - name: Generate tags
61
+ id: tags
62
+ run: |
63
+ TAGS="ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
64
+ if [ "${{ github.event_name }}" == "push" ]; then
65
+ TAGS="$TAGS,ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
66
+ fi
67
+ echo "tags=$TAGS" >> $GITHUB_OUTPUT
68
+
69
+ - name: Build and push Docker image (tagged)
70
+ uses: docker/build-push-action@v5
71
+ with:
72
+ context: .
73
+ push: ${{ github.event_name == 'push' }}
74
+ platforms: ${{ matrix.config.platform }}
75
+ tags: ${{ steps.tags.outputs.tags }}
76
+ file: ${{ matrix.config.dockerfile }}
whisper.cpp/.github/workflows/examples-wasm.yml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Examples WASM
2
+ on:
3
+ push:
4
+ branches: ["master"]
5
+
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+ pages: write
11
+ id-token: write
12
+
13
+ concurrency:
14
+ group: "pages"
15
+ cancel-in-progress: false
16
+
17
+ jobs:
18
+ deploy-wasm-github-pages:
19
+ environment:
20
+ name: github-pages
21
+ url: ${{ steps.deployment.outputs.page_url }}
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - name: Checkout
25
+ uses: actions/checkout@v4
26
+
27
+ - name: Setup Pages
28
+ uses: actions/configure-pages@v4
29
+
30
+ - name: Setup emsdk
31
+ uses: mymindstorm/setup-emsdk@v14
32
+
33
+ - name: Build WASM Examples
34
+ # Enable for real build later in whisper.cpp
35
+ run: |
36
+ mkdir -p build-em && cd build-em
37
+ emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
38
+ make -j
39
+
40
+ - name: Create staging directory
41
+ run: mkdir -p staging
42
+
43
+ - name: Create .nojekyll file in staging directory
44
+ run: touch staging/.nojekyll
45
+
46
+ - name: Copy application files
47
+ run: |
48
+ build_dir=build-em/bin
49
+
50
+ ls ${build_dir}
51
+
52
+ # command.wasm
53
+ target_dir=staging/command.wasm
54
+ mkdir -p ${target_dir}
55
+ cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
56
+ cp ${build_dir}/libcommand.js ${target_dir}
57
+
58
+ # bench.wasm
59
+ target_dir=staging/bench.wasm
60
+ mkdir -p ${target_dir}
61
+ cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
62
+ cp ${build_dir}/libbench.js ${target_dir}
63
+
64
+ # stream.wasm
65
+ target_dir=staging/stream.wasm
66
+ mkdir -p ${target_dir}
67
+ cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
68
+ cp ${build_dir}/libstream.js ${target_dir}
69
+
70
+ # wchess.wasm
71
+ target_dir=staging/wchess.wasm
72
+ mkdir -p ${target_dir}
73
+ cp -r ${build_dir}/wchess.wasm/{index.html,css,img,js} ${target_dir}
74
+ cp ${build_dir}/wchess.wasm.js ${target_dir}
75
+
76
+ # whisper.wasm (this will be the main example page)
77
+ target_dir=staging
78
+ mkdir -p ${target_dir}
79
+ cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
80
+ cp ${build_dir}/libmain.js ${target_dir}
81
+
82
+ # Copy Cross-Origin Isolation service worker
83
+ cp -v examples/coi-serviceworker.js staging/
84
+
85
+ - name: List files in staging directory (for debugging)
86
+ run: |
87
+ echo "Files in staging directory:"
88
+ find staging -type f | sort
89
+
90
+ - name: Upload artifact
91
+ uses: actions/upload-pages-artifact@v3
92
+ with:
93
+ path: ./staging
94
+
95
+ - name: Deploy to GitHub Pages
96
+ id: deployment
97
+ uses: actions/deploy-pages@v4
whisper.cpp/.github/workflows/examples.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Examples Tests
2
+ on:
3
+ push:
4
+ paths:
5
+ - examples/addon.node/**
6
+ - whisper.h
7
+ pull_request:
8
+ paths:
9
+ - examples/addon.node/**
10
+ - whisper.h
11
+
12
+ jobs:
13
+ addon_node-ubuntu-22:
14
+ runs-on: ubuntu-22.04
15
+ strategy:
16
+ matrix:
17
+ node-version: [ 16.x, 18.x ]
18
+ steps:
19
+ - name: Clone
20
+ uses: actions/checkout@v1
21
+
22
+ - name: Dependencies
23
+ run: |
24
+ sudo apt-get update
25
+ sudo apt-get install build-essential git
26
+ sudo apt-get install cmake
27
+ sudo apt-get install libsdl2-dev
28
+
29
+ - name: Use Node.js ${{ matrix.node-version }}
30
+ uses: actions/setup-node@v1
31
+ with:
32
+ node-version: ${{ matrix.node-version }}
33
+ cache: 'npm'
34
+
35
+ - name: Install package.json dependencies
36
+ working-directory: ./examples/addon.node
37
+ run: npm install
38
+
39
+ - name: Compile addon.node
40
+ run: npx cmake-js compile -T addon.node -B Release
41
+
42
+ - name: Download test model
43
+ run: |
44
+ bash ./models/download-ggml-model.sh base.en
45
+ - name: Test
46
+ run: |
47
+ cd examples/addon.node
48
+ npm run test
whisper.cpp/.gitignore ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ *.d
4
+ .cache/
5
+ .coreml/
6
+ .test/
7
+ .venv/
8
+ .vs/
9
+ .vscode/
10
+ .DS_Store
11
+ .vimspector.json
12
+ /CMakeSettings.json
13
+ /talk-llama.dSYM/
14
+
15
+ build/
16
+ build-*/
17
+ build_*/
18
+ tmp/
19
+
20
+ # SPM
21
+ .build/
22
+ .swiftpm
23
+ *.metallib
24
+
25
+ ggml-metal-embed.metal
26
+ ggml-metal-embed.metal.tmp
27
+
28
+ /main
29
+ /stream
30
+ /command
31
+ /talk
32
+ /talk-llama
33
+ /bench
34
+ /quantize
35
+ /server
36
+ /lsp
37
+
38
+ arm_neon.h
39
+ sync.sh
40
+ libwhisper.a
41
+ libwhisper.so
42
+ compile_commands.json
43
+
44
+ examples/arm_neon.h
45
+ examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
46
+ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
47
+ examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
48
+
49
+ extra/bench-gg.txt
50
+
51
+ models/*.mlmodel
52
+ models/*.mlmodelc
53
+ models/*.mlpackage
54
+ models/*-encoder-openvino.xml
55
+ models/*-encoder-openvino-cache/
56
+ bindings/java/.gradle/
57
+ bindings/java/.idea/
58
+ .idea/
59
+
60
+ benchmark_results.csv
61
+ cmake-build-debug/
62
+ .cxx/
63
+ .gradle/
64
+ local.properties
65
+ .log
66
+ .exe
whisper.cpp/AUTHORS ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # date: Tue Feb 4 13:03:35 EET 2025
2
+ # this file is auto-generated by scripts/gen-authors.sh
3
+
4
+ 0/0 <zero@imaskeleton.me>
5
+ 0cc4m <picard12@live.de>
6
+ 0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
7
+ 65a <10104049+65a@users.noreply.github.com>
8
+ AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
9
+ AT <manyoso@users.noreply.github.com>
10
+ Aarni Koskela <akx@iki.fi>
11
+ Aaron Pham <29749331+aarnphm@users.noreply.github.com>
12
+ Aaron Taylor <aaron@exphat.com>
13
+ Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
14
+ Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
15
+ Adam Jones <domdomegg+git@gmail.com>
16
+ Adrien Gallouët <adrien@gallouet.fr>
17
+ Adrien Gallouët <angt@huggingface.co>
18
+ AfryMask <AfryMask@163.com>
19
+ Ahmad Bilal <ahmad.bilal@empglabs.com>
20
+ Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
21
+ AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
22
+ AidanBeltonS <aidan.belton@codeplay.com>
23
+ Akarshan Biswas <akarshan.biswas@gmail.com>
24
+ Akarshan Biswas <akarshanbiswas@fedoraproject.org>
25
+ Akash Mahajan <akash7190@gmail.com>
26
+ Akash Mahajan <akashmjn@stanford.edu>
27
+ Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
28
+ Alan <unknown>
29
+ Albert Jin <albert.jin@gmail.com>
30
+ Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
31
+ Alberto Cabrera Pérez <alberto.cabrera@intel.com>
32
+ Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
33
+ Alex Azarov <alex@azarov.by>
34
+ Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
35
+ Alex Evgrashin <aevgrashin@yandex.ru>
36
+ Alex O'Connell <35843486+acon96@users.noreply.github.com>
37
+ Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
38
+ Alexandru Mariuti <alex@mariuti.com>
39
+ Alexey Kharlamov <alexey@kharlamov.biz>
40
+ Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
41
+ Ali Alameh <ali.alameh@isae.edu.lb>
42
+ Alter <0x7c48@gmail.com>
43
+ Ananta Bastola <anantarajbastola@gmail.com>
44
+ Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
45
+ Andreas Lubbe <git@lubbe.org>
46
+ Andreu Huguet <andreuhuguet@gmail.com>
47
+ Andrew Huynh <a5thuynh@gmail.com>
48
+ Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
49
+ Andrew S <andrews54757@gmail.com>
50
+ Andy Maloney <asmaloney@gmail.com>
51
+ Anton Kostin <masguit42@users.noreply.github.com>
52
+ Artyom Mezin <psycho.fading@gmail.com>
53
+ Asad Memon <asad.lionpk@gmail.com>
54
+ Ashraful Islam <ashraful.meche@gmail.com>
55
+ AsukaMinato <asukaminato@nyan.eu.org>
56
+ AustinMroz <austinmroz@utexas.edu>
57
+ Avik Sengupta <avik@sengupta.net>
58
+ Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
59
+ Baffin Lee <baffinlee@gmail.com>
60
+ Ben Ashbaugh <ben.ashbaugh@intel.com>
61
+ Ben Nortier <bjnortier@gmail.com>
62
+ Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
63
+ Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
64
+ Binozo <70137898+Binozo@users.noreply.github.com>
65
+ Bo-Yi Wu <appleboy.tw@gmail.com>
66
+ Boris Bliznioukov <blib@mail.com>
67
+ Borislav Stanimirov <b.stanimirov@abv.bg>
68
+ Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
69
+ Brian Murray <brian@bmurray.ca>
70
+ CRD716 <crd716@gmail.com>
71
+ Canis Lupus <Canis-UK@users.noreply.github.com>
72
+ Carlos Zoido <mrgalleta@gmail.com>
73
+ Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
74
+ CarterLi999 <664681047@qq.com>
75
+ ChangSeok Oh <shivamidow@users.noreply.github.com>
76
+ Changyeon Kim <cyzero.kim@samsung.com>
77
+ Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
78
+ Charles Xu <63788048+chaxu01@users.noreply.github.com>
79
+ Charles Xu <charles.xu@arm.com>
80
+ Chen Xi <xi2.chen@intel.com>
81
+ Chen Xi <xixichen08@foxmail.com>
82
+ Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
83
+ Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
84
+ Chidi Williams <williamschidi1@gmail.com>
85
+ Chris Elrod <elrodc@gmail.com>
86
+ Christian <12550267+iceychris@users.noreply.github.com>
87
+ Christian Kastner <ckk@kvr.at>
88
+ Clifford Heath <clifford.heath@gmail.com>
89
+ Clint Herron <hanclinto@gmail.com>
90
+ Colin <github@whoisc.cc>
91
+ Conrad Kramer <conrad@conradkramer.com>
92
+ Corey Earwood <iamcgn+github@gmail.com>
93
+ CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
94
+ DAN™ <dranger003@gmail.com>
95
+ DGdev91 <DGdev91@users.noreply.github.com>
96
+ Damian Czaja <trojan295@protonmail.com>
97
+ Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
98
+ Dan Johansson <dan.johansson@arm.com>
99
+ Daniel Bevenius <daniel.bevenius@gmail.com>
100
+ Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
101
+ Daniel Ziegenberg <daniel@ziegenberg.at>
102
+ Daniele <57776841+daniandtheweb@users.noreply.github.com>
103
+ Dave <dave-fl@users.noreply.github.com>
104
+ Dave Airlie <airlied@gmail.com>
105
+ Dave Airlie <airlied@redhat.com>
106
+ Daven Sanassy <daven@vochlea.co.uk>
107
+ David <dnhkng@gmail.com>
108
+ David Thorpe <djt@mutablelogic.com>
109
+ DavidKorczynski <david@adalogics.com>
110
+ Davidson Francis <davidsondfgl@gmail.com>
111
+ Dener Stassun <denerstassun@gmail.com>
112
+ Dibakar Gope <dibakar.gope@arm.com>
113
+ Didzis Gosko <didzis@users.noreply.github.com>
114
+ Diego Devesa <slarengh@gmail.com>
115
+ Digipom <admin@digipom.com>
116
+ Dimo <dimo@ieee.org>
117
+ Djip007 <3705339+Djip007@users.noreply.github.com>
118
+ Djip007 <djip.perois@free.fr>
119
+ Dody Suria Wijaya <dodysw@gmail.com>
120
+ Dou Xinpeng <15529241576@163.com>
121
+ Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
122
+ Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
123
+ Duncan McConnell <ddmcconnell4@gmail.com>
124
+ Egor Egorov <me@egorfine.com>
125
+ Elkana Bardugo <ttv200@gmail.com>
126
+ Emmanuel Schmidbauer <eschmidbauer@gmail.com>
127
+ Engininja2 <139037756+Engininja2@users.noreply.github.com>
128
+ Eric Curtin <ericcurtin17@gmail.com>
129
+ Eric Swanson <eswanson@alloscomp.com>
130
+ Eric Tendian <erictendian@gmail.com>
131
+ Eric Zhang <34133756+EZForever@users.noreply.github.com>
132
+ Erik Scholz <Green-Sky@users.noreply.github.com>
133
+ Evan Jones <evan.q.jones@gmail.com>
134
+ Evan Martin <evan.martin@gmail.com>
135
+ Eve <139727413+netrunnereve@users.noreply.github.com>
136
+ Evgeny Kuznetsov <evgeny@kuznetsov.md>
137
+ F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
138
+ Faisal Zaghloul <quic_fzaghlou@quicinc.com>
139
+ Fangjun Kuang <csukuangfj@gmail.com>
140
+ Felix <stenbackfelix@gmail.com>
141
+ Finn Voorhees <finnvoorhees@gmail.com>
142
+ FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
143
+ FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
144
+ Frankie Robertson <frankier@users.noreply.github.com>
145
+ Gang Chen <goncha@gmail.com>
146
+ Gavin Cai <gavin1818@hotmail.com>
147
+ George Hindle <george@georgehindle.com>
148
+ Georgi Gerganov <ggerganov@gmail.com>
149
+ Gilad S <7817232+giladgd@users.noreply.github.com>
150
+ Gilad S <giladgd@users.noreply.github.com>
151
+ Gilad S. <7817232+giladgd@users.noreply.github.com>
152
+ GitAritron <103900385+GitAritron@users.noreply.github.com>
153
+ GiviMAD <GiviMAD@users.noreply.github.com>
154
+ Gleicon Moraes <gleicon@gmail.com>
155
+ Gregor Jasny <gjasny@googlemail.com>
156
+ Guillaume Wenzek <gwenzek@users.noreply.github.com>
157
+ HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
158
+ Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
159
+ Hang <bebound@gmail.com>
160
+ Haus1 <haus.xda@gmail.com>
161
+ Herman Semenov <GermanAizek@yandex.ru>
162
+ HimariO <dsfhe49854@gmail.com>
163
+ Hong Bo PENG <penghb@cn.ibm.com>
164
+ Hrishikesh Barman <geekodour@users.noreply.github.com>
165
+ Hugo <hugo@whynothugo.nl>
166
+ Ian Bicking <ian@ianbicking.org>
167
+ Ian Bull <irbull@eclipsesource.com>
168
+ Ihar Hrachyshka <ihrachys@redhat.com>
169
+ Ikko Ashimine <eltociear@gmail.com>
170
+ Ikko Eltociear Ashimine <eltociear@gmail.com>
171
+ InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
172
+ Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
173
+ Ivan <nekotekina@gmail.com>
174
+ Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
175
+ Ivan Gorin <ivangorin21@gmail.com>
176
+ Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
177
+ JJ <103335846+computerscienceiscool@users.noreply.github.com>
178
+ Jack Mousseau <jmousseau@users.noreply.github.com>
179
+ JacobLinCool <jacoblincool@gmail.com>
180
+ Jakub Ráček <blizzcz@gmail.com>
181
+ Jared Van Bortel <jared@nomic.ai>
182
+ Jay Binks <jaybinks@gmail.com>
183
+ Jayant <jayantyadav202@gmail.com>
184
+ Jeff Bolz <jbolz@nvidia.com>
185
+ Jeroen Mostert <jeroen.mostert@cm.com>
186
+ Jhen-Jie Hong <developer@jhen.me>
187
+ Jhen-Jie Hong <iainst0409@gmail.com>
188
+ JidongZhang-THU <1119708529@qq.com>
189
+ Jo Liss <joliss42@gmail.com>
190
+ Joe Todd <joe.todd@codeplay.com>
191
+ Johan <jr.raffin@gmail.com>
192
+ Johannes Gäßler <johannesg@5d6.de>
193
+ John Balis <phobossystems@gmail.com>
194
+ JohnnyB <jboero@users.noreply.github.com>
195
+ Jonathan Soo <jcsoo@agora.com>
196
+ Jonno <1160532+razodactyl@users.noreply.github.com>
197
+ Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
198
+ Jose <34888496+Jerry-Master@users.noreply.github.com>
199
+ Josh Bleecher Snyder <josharian@gmail.com>
200
+ Josscii <jossciiweiyi@gmail.com>
201
+ Judd <foldl@users.noreply.github.com>
202
+ Jumper775 <78500318+jumpers775@users.noreply.github.com>
203
+ Jun Hee Yoo <contact.jhyoo@gmail.com>
204
+ Junil Kim <logyourself@gmail.com>
205
+ Justina Cho <justcho5@gmail.com>
206
+ Justine Tunney <jtunney@gmail.com>
207
+ Justine Tunney <jtunney@mozilla.com>
208
+ KITAITI Makoto <KitaitiMakoto@gmail.com>
209
+ KP Kaiser <kirk@zothcorp.com>
210
+ Kamilake <exjang0@gmail.com>
211
+ Karol Kontny <82021046+kkontny@users.noreply.github.com>
212
+ Karthick <j.karthic2004@gmail.com>
213
+ Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
214
+ Kasumi <90275229+kasumi-1@users.noreply.github.com>
215
+ Kawrakow <48489457+ikawrakow@users.noreply.github.com>
216
+ Kendrick Taylor <kendrick@circuitsix.com>
217
+ Kevin Brothaler <admin@digipom.com>
218
+ Kevin Gibbons <bakkot@gmail.com>
219
+ Konosuke Sakai <konosuke@konosuke.work>
220
+ Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
221
+ Kreijstal <rainb@tfwno.gf>
222
+ Kylin <56434533+KyL0N@users.noreply.github.com>
223
+ LBlue <153975653+lbluep@users.noreply.github.com>
224
+ Larry Battle <larry.battle.tech@gmail.com>
225
+ Laytan Laats <laytanlaats@hotmail.com>
226
+ Leo Moll <leo.moll@yeasoft.com>
227
+ Lexevolution <31176843+Lexevolution@users.noreply.github.com>
228
+ LittleLoli <26589867+WhichWho@users.noreply.github.com>
229
+ Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
230
+ Luis Herrera <herrera-luis@users.noreply.github.com>
231
+ Lukas Rist <glaslos@gmail.com>
232
+ M. A. Ali <73258591+MightyStud@users.noreply.github.com>
233
+ M. Eren Akbiyik <erenakbiyik@gmail.com>
234
+ Ma Mingfei <mingfei.ma@intel.com>
235
+ Maciek <maciek.mab122@gmail.com>
236
+ Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
237
+ Marcin Mielniczuk <marmistrz.dev@zoho.eu>
238
+ Mark Karpelès <MagicalTux@users.noreply.github.com>
239
+ Mark Zhuang <zhuangqiubin@gmail.com>
240
+ Markus Tavenrath <mtavenrath@users.noreply.github.com>
241
+ Martin Delille <martin@delille.org>
242
+ Martin Warnaar <martinwarnaar@gmail.com>
243
+ Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
244
+ Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
245
+ Mathieu Baudier <mbaudier@argeo.org>
246
+ Mathijs de Bruin <mathijs@mathijsfietst.nl>
247
+ Matija Pevec <mightymatth@users.noreply.github.com>
248
+ Matt Stephenson <mstephenson6@users.noreply.github.com>
249
+ Max Krasnyansky <max.krasnyansky@gmail.com>
250
+ Max Krasnyansky <quic_maxk@quicinc.com>
251
+ Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
252
+ Meng, Hengyu <hengyu.meng@intel.com>
253
+ Mengqing Cao <cmq0113@163.com>
254
+ Michael Podvitskiy <podvitskiymichael@gmail.com>
255
+ Michael Rienstra <mrienstra@gmail.com>
256
+ Mikhail Grigorev <sleuthhound@gmail.com>
257
+ Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
258
+ Mohit Agarwal <mohit@sdf.org>
259
+ Molly Sophia <mollysophia379@gmail.com>
260
+ Murilo Santana <mvrilo@gmail.com>
261
+ NETZkultur GmbH <mulholland@netzkultur.de>
262
+ Natsu <chino@hotococoa.moe>
263
+ Neil Chudleigh <nchudleigh@users.noreply.github.com>
264
+ Neo Zhang <14088817+arthw@users.noreply.github.com>
265
+ Neo Zhang Jianyu <jianyu.zhang@intel.com>
266
+ Neuman Vong <neuman.vong@gmail.com>
267
+ Nicholai Tukanov <nicholaitukanov@gmail.com>
268
+ Nicholas Albion <nalbion@yahoo.com>
269
+ Nico Bosshard <nico@bosshome.ch>
270
+ Nicolò Scipione <nicolo.scipione@codeplay.com>
271
+ Niels Mayer <Niels.Mayer@gmail.com>
272
+ Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
273
+ Nikolaj Olsson <nikse.dk@gmail.com>
274
+ Okabintaro <103938900+Okabintaro@users.noreply.github.com>
275
+ Oleg Sidorov <me@whitebox.io>
276
+ Oleg Sidorov <oleg@sidorov.nl>
277
+ Olivier Chafik <ochafik@users.noreply.github.com>
278
+ Ondrej Kokes <ondrej.kokes@gmail.com>
279
+ Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
280
+ PAB <pierreantoine.bannier@gmail.com>
281
+ Paul Tsochantaris <ptsochantaris@icloud.com>
282
+ Pedro Probst <pprobst@insiberia.net>
283
+ Peng <hzp1024@qq.com>
284
+ Peter <peter277@users.noreply.github.com>
285
+ Philipp Zabel <philipp.zabel@gmail.com>
286
+ Philippe Normand <phil@base-art.net>
287
+ Philippe Normand <philn@igalia.com>
288
+ Plamen Minev <pacominev@gmail.com>
289
+ Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
290
+ Przemysław Pawełczyk <przemoc@gmail.com>
291
+ Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
292
+ R0CKSTAR <xiaodong.ye@mthreads.com>
293
+ R0CKSTAR <yeahdongcn@gmail.com>
294
+ Radoslav Gerganov <rgerganov@gmail.com>
295
+ Radosław Gryta <radek.gryta@gmail.com>
296
+ Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
297
+ Raiya Araki <83504221+rai62@users.noreply.github.com>
298
+ Reinforce-II <fate@eastal.com>
299
+ Reinis Muiznieks <muiznieks.reinis@gmail.com>
300
+ RelatedTitle <r3latedtitle@gmail.com>
301
+ Rémy Oudompheng <oudomphe@phare.normalesup.org>
302
+ RhinoDevel <RhinoDevel@users.noreply.github.com>
303
+ Rich Jones <miserlou@gmail.com>
304
+ Robert Ormandi <52251610+ormandi@users.noreply.github.com>
305
+ Robin <robin.xw@hotmail.com>
306
+ Roddur Dasgupta <roddurd@gmail.com>
307
+ Roland Rabien <figbug@gmail.com>
308
+ Romain Biessy <romain.biessy@codeplay.com>
309
+ Ronsor <ronsor@ronsor.pw>
310
+ Rotem Dan <rotemdan@gmail.com>
311
+ Ryan Hitchman <hitchmanr@gmail.com>
312
+ Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
313
+ RyanChang <ftes90015@gmail.com>
314
+ SRHMorris <69468379+SRHMorris@users.noreply.github.com>
315
+ SXX <sxx1136965276@gmail.com>
316
+ Sacha Arbonel <sacha.arbonel@hotmail.fr>
317
+ Salman Faroz <stsfaroz@gmail.com>
318
+ Salvatore Mesoraca <s.mesoraca16@gmail.com>
319
+ Sam <49637763+Onlyartist9@users.noreply.github.com>
320
+ Sam Pullara <spullara@gmail.com>
321
+ Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
322
+ Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
323
+ Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
324
+ Sergio López <slp@redhat.com>
325
+ Sergio López <slp@sinrega.org>
326
+ Shanshan Shen <467638484@qq.com>
327
+ Shijie <821898965@qq.com>
328
+ Shupei Fan <dymarkfan@outlook.com>
329
+ Siddharth Ramakrishnan <srr2141@columbia.edu>
330
+ Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
331
+ Simon Moisselin <simon.moisstoll@gmail.com>
332
+ Sindre Sorhus <sindresorhus@gmail.com>
333
+ Slava Primenko <primenko.s@gmail.com>
334
+ Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
335
+ Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
336
+ Stefan Sydow <s.sydow@heinlein-video.de>
337
+ Stefan Sydow <stefan@sydow.email>
338
+ Syahmi Azhar <prsyahmi@gmail.com>
339
+ Syed Jafri <syedjafri97@gmail.com>
340
+ Sơn Phan Trung <phantrungson17@gmail.com>
341
+ Taisei Mima <bhbstar.me@gmail.com>
342
+ Takeshi Inoue <inoue.takeshi@gmail.com>
343
+ Tamotsu Takahashi <ttakah+github@gmail.com>
344
+ Taras Glek <taras@thegp.com>
345
+ Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
346
+ Thamster <Thamster@users.noreply.github.com>
347
+ Thijs Raymakers <thijs@raymakers.nl>
348
+ Thomas Fitzsimmons <fitzsim@fitzsim.org>
349
+ Tiago Fassoni <tiagofassoni@users.noreply.github.com>
350
+ Tienshiao Ma <tienshiao@tienshiao.org>
351
+ Tim Miller <drasticactions@users.noreply.github.com>
352
+ Timothy Cronin <40186632+4imothy@users.noreply.github.com>
353
+ Tobrun <tobrun.van.nuland@gmail.com>
354
+ Todd <taf2@users.noreply.github.com>
355
+ Toliver <teejae@gmail.com>
356
+ Tong Li <31761981+litongjava@users.noreply.github.com>
357
+ Tony Wasserka <4840017+neobrain@users.noreply.github.com>
358
+ Topping1 <78745143+Topping1@users.noreply.github.com>
359
+ Travis Cline <travis.cline@gmail.com>
360
+ UEXTM.com <84163508+uextm@users.noreply.github.com>
361
+ UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
362
+ Vadim Peretokin <vperetokin@hey.com>
363
+ Valentin Gosu <1454649+valenting@users.noreply.github.com>
364
+ Vin Misra <vinith@alum.mit.edu>
365
+ Vulcan <93451215+trholding@users.noreply.github.com>
366
+ WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
367
+ William Tambellini <william.tambellini@gmail.com>
368
+ William Tambellini <wtambellini@sdl.com>
369
+ Wilson Silva <wilson.dsigns@gmail.com>
370
+ Xiang (Kevin) Li <kevinli020508@gmail.com>
371
+ Xiao-Yong Jin <jinxiaoyong@gmail.com>
372
+ XiaotaoChen <chenxiaotao1234@gmail.com>
373
+ Xingchen Song(宋星辰) <xingchensong1996@163.com>
374
+ Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
375
+ Xuan Son Nguyen <thichthat@gmail.com>
376
+ Yajing Tang <phillis@google.com>
377
+ Yang Shen <aplshenyang@gmail.com>
378
+ Yunès <jean.baptiste.yunes@free.fr>
379
+ Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
380
+ Yusuf Redžić <48274562+redzic@users.noreply.github.com>
381
+ ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
382
+ Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
383
+ Zhiyuan Li <lizhiyuan@uniartisan.com>
384
+ Zhiyuan Li <uniartisan2017@gmail.com>
385
+ Zigfrid Zvezdin <ziggerZZ@gmail.com>
386
+ Zollner <24618122+Zolliner@users.noreply.github.com>
387
+ a3sh <38979186+A3shTnT@users.noreply.github.com>
388
+ ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
389
+ agray3 <agray3@users.noreply.github.com>
390
+ ai-at-home <149282006+ai-at-home@users.noreply.github.com>
391
+ aldorof <aldorof@users.noreply.github.com>
392
+ alonfaraj <alonfaraj@gmail.com>
393
+ amd-dwang <dong.wang@amd.com>
394
+ amritahs-ibm <amritahs@linux.vnet.ibm.com>
395
+ andypayne <apayne@gmail.com>
396
+ ardfork <134447697+ardfork@users.noreply.github.com>
397
+ arizhih <40765267+arizhih@users.noreply.github.com>
398
+ automaticcat <daogiatuank54@gmail.com>
399
+ bandoti <141645996+bandoti@users.noreply.github.com>
400
+ be-next <jerome.ramette@gmail.com>
401
+ bert hubert <bert@hubertnet.nl>
402
+ billyct <billy_allen@126.com>
403
+ bmwl <brian.marshall@tolko.com>
404
+ bobqianic <129547291+bobqianic@users.noreply.github.com>
405
+ bocytko <bocytko+github@gmail.com>
406
+ boolemancer <48014766+boolemancer@users.noreply.github.com>
407
+ boolemancer <boolemancer@gmail.com>
408
+ bradmit <151883577+bradmit@users.noreply.github.com>
409
+ brunofaustino <b.fa.amorim@gmail.com>
410
+ bssrdf <merlintiger@hotmail.com>
411
+ byte-6174 <88070277+byte-6174@users.noreply.github.com>
412
+ cdosoftei <ciprian.dosoftei@gmail.com>
413
+ clach04 <Chris.Clark@actian.com>
414
+ compilade <113953597+compilade@users.noreply.github.com>
415
+ compilade <git@compilade.net>
416
+ conradg <conradjgodfrey@gmail.com>
417
+ crummyh <elijah@crums.us>
418
+ ddpasa <112642920+ddpasa@users.noreply.github.com>
419
+ denersc <denerstassun@gmail.com>
420
+ dscripka <dscripka@users.noreply.github.com>
421
+ duthils <duthils@duthils.net>
422
+ ecneladis <ecneladis@users.noreply.github.com>
423
+ faker <nspyia2002@gmail.com>
424
+ fitzsim <fitzsim@fitzsim.org>
425
+ fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
426
+ fraxy-v <65565042+fraxy-v@users.noreply.github.com>
427
+ genevera (she/her) <genevera@users.noreply.github.com>
428
+ geniusnut <geniusnut@gmail.com>
429
+ gilbertgong <gilbert.gong@gmail.com>
430
+ gn64 <yukikaze.jp@gmail.com>
431
+ goldwaving <77494627+goldwaving@users.noreply.github.com>
432
+ greeshmay <greeshmay@gmail.com>
433
+ haopeng <657407891@qq.com>
434
+ hipudding <huafengchun@gmail.com>
435
+ hsinhoyeh <yhh92u@gmail.com>
436
+ hydai <z54981220@gmail.com>
437
+ iamthad <thadeus.j.fleming@gmail.com>
438
+ issixx <46835150+issixx@users.noreply.github.com>
439
+ james wolf <contractorwolf@hotmail.com>
440
+ jdomke <28772296+jdomke@users.noreply.github.com>
441
+ jettoblack <jettoblack@gmail.com>
442
+ jiez <373447296@qq.com>
443
+ joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
444
+ jorismertz <35079666+jorismertz@users.noreply.github.com>
445
+ junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
446
+ junkfood <69683722+JunkFood02@users.noreply.github.com>
447
+ jwijffels <jwijffels@bnosac.be>
448
+ k.h.lai <adrian.k.h.lai@outlook.com>
449
+ kamranjon <kamranjon@gmail.com>
450
+ katsu560 <katsu560oo-@docomo.ne.jp>
451
+ kennethge <57784063+kenneth-ge@users.noreply.github.com>
452
+ keyehzy <msamuel@aluno.puc-rio.br>
453
+ kunnis <kunnis@users.noreply.github.com>
454
+ l3utterfly <gc.pthzfoldr@gmail.com>
455
+ leejet <leejet714@gmail.com>
456
+ leo-pony <nengjunma@outlook.com>
457
+ lhez <quic_lih@quicinc.com>
458
+ litong <31761981+litongjava@users.noreply.github.com>
459
+ liuwei-git <14815172+liuwei-git@users.noreply.github.com>
460
+ lnyan <lkwq007@gmail.com>
461
+ luoyu-intel <yu.luo@intel.com>
462
+ m.bell <m.bell@techsmith.com>
463
+ mahorozte <41834471+mahorozte@users.noreply.github.com>
464
+ mashizora <30516315+mashizora@users.noreply.github.com>
465
+ matt23654 <matthew.webber@protonmail.com>
466
+ matteo <matteogeniaccio@yahoo.it>
467
+ mgrachten <maarten@grachten.eu>
468
+ mkiol <mkiol@users.noreply.github.com>
469
+ mky_coder <47767389+mkycoder@users.noreply.github.com>
470
+ novag <7754358+novag@users.noreply.github.com>
471
+ pajowu <pajowu@pajowu.de>
472
+ pengxin99 <pengxin.yuan@intel.com>
473
+ petterreinholdtsen <pere-github@hungry.com>
474
+ polarmoon <90010972+polarmoon@users.noreply.github.com>
475
+ rlapray <lapray.romain@gmail.com>
476
+ sandrohanea <40202887+sandrohanea@users.noreply.github.com>
477
+ semiformal-net <84111142+semiformal-net@users.noreply.github.com>
478
+ shibukazu <61775791+shibukazu@users.noreply.github.com>
479
+ shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
480
+ slaren <slarengh@gmail.com>
481
+ slashlib <slashlib@users.noreply.github.com>
482
+ snadampal <87143774+snadampal@users.noreply.github.com>
483
+ someone13574 <81528246+someone13574@users.noreply.github.com>
484
+ st-gr <38470677+st-gr@users.noreply.github.com>
485
+ stduhpf <stephduh@live.fr>
486
+ stormofice <58337328+stormofice@users.noreply.github.com>
487
+ texmex76 <40733439+texmex76@users.noreply.github.com>
488
+ thefinaldegree <thefinaldegree@gmail.com>
489
+ thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
490
+ toboil-features <160222185+toboil-features@users.noreply.github.com>
491
+ trixirt <trix@redhat.com>
492
+ ulatekh <ulatekh@yahoo.com>
493
+ undef <undefdev@gmail.com>
494
+ uvos <devnull@uvos.xyz>
495
+ uvos <philipp@uvos.xyz>
496
+ valVk <valVk@users.noreply.github.com>
497
+ venkr <venkateshrameshkumar+1@gmail.com>
498
+ vicalloy <zbirder@gmail.com>
499
+ wangshuai09 <391746016@qq.com>
500
+ woachk <24752637+woachk@users.noreply.github.com>
501
+ xctan <axunlei@gmail.com>
502
+ xdrudis <xavierdrudis@yahoo.es>
503
+ yuri@FreeBSD <yuri@FreeBSD>
504
+ zhangjixiong <code.zjx@gmail.com>
505
+ zhentaoyu <zhentao.yu@intel.com>
506
+ zhouwg <6889919+zhouwg@users.noreply.github.com>
507
+ zhouwg <zhouwg2000@gmail.com>
508
+ 谢乃闻 <sienaiwun@users.noreply.github.com>
509
+ 布客飞龙 <562826179@qq.com>
510
+ Артём Земляк <azemlyak@smart-consulting.ru>
whisper.cpp/CMakeLists.txt ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
2
+ project("whisper.cpp" C CXX)
3
+ project("whisper.cpp" VERSION 1.8.2)
4
+ include(CheckIncludeFileCXX)
5
+
6
+ set(SOVERSION 1)
7
+
8
+ #set(CMAKE_WARN_DEPRECATED YES)
9
+ set(CMAKE_WARN_UNUSED_CLI YES)
10
+
11
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
12
+
13
+ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
14
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
15
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
16
+ endif()
17
+
18
+ # Add path to modules
19
+ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
20
+
21
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
22
+
23
+ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
24
+ set(WHISPER_STANDALONE ON)
25
+
26
+ include(git-vars)
27
+
28
+ # configure project version
29
+ configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
30
+ else()
31
+ set(WHISPER_STANDALONE OFF)
32
+ endif()
33
+
34
+ if (EMSCRIPTEN)
35
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
36
+
37
+ set(CMAKE_CXX_STANDARD 17)
38
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
39
+
40
+ option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
41
+
42
+ # TODO: without these, we get the following error:
43
+ # wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
44
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
45
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
46
+
47
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s TOTAL_STACK=5242880")
48
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s TOTAL_STACK=5242880")
49
+
50
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
51
+ else()
52
+ if (MINGW)
53
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
54
+ else()
55
+ set(BUILD_SHARED_LIBS_DEFAULT ON)
56
+ endif()
57
+ endif()
58
+
59
+ option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
60
+
61
+ #
62
+ # option list
63
+ #
64
+
65
+ # debug
66
+ option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
67
+ option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
68
+
69
+ # build
70
+ option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
71
+ option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
72
+
73
+ # sanitizers
74
+ option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
75
+ option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
76
+ option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
77
+
78
+ # extra artifacts
79
+ option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
80
+ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
81
+ option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
82
+
83
+ # 3rd party libs
84
+ option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
85
+ option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
86
+
87
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
88
+ option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
89
+ endif()
90
+
91
+ option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
92
+ option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
93
+ option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
94
+
95
+ # Required for relocatable CMake package
96
+ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
97
+
98
+ # override ggml options
99
+ set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
100
+ set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
101
+ set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
102
+ set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
103
+ set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
104
+
105
+ # transition helpers
106
+ function (whisper_option_depr TYPE OLD NEW)
107
+ if (${OLD})
108
+ message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
109
+ set(${NEW} ON)
110
+ endif()
111
+ endfunction()
112
+
113
+ whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
114
+ whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
115
+ whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
116
+ whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
117
+ whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
118
+ whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
119
+ whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
120
+ whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
121
+ whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
122
+ whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
123
+ whisper_option_depr(WARNING WHISPER_CCACHE GGML_CCACHE)
124
+
125
+ if (GGML_CUDA AND NOT MSVC)
126
+ #GGML_CUDA enabled, add the necessary compile options -Wno-deprecated-gpu-targets
127
+ add_compile_options(-Wno-deprecated-gpu-targets)
128
+ endif()
129
+
130
+ #
131
+ # build the library
132
+ #
133
+
134
+ if (NOT TARGET ggml)
135
+ if (WHISPER_USE_SYSTEM_GGML)
136
+ find_package(ggml REQUIRED)
137
+ if (NOT ggml_FOUND)
138
+ message(FATAL_ERROR "System-installed GGML library not found.")
139
+ endif()
140
+ add_library(ggml ALIAS ggml::ggml)
141
+ else()
142
+ add_subdirectory(ggml)
143
+ if(WIN32)
144
+ # The following adds a _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR macro and is a workaround for
145
+ # the Windows C++ standard library which does not support constexpr mutexes.
146
+ # From the release notes://github.com/microsoft/STL/wiki/Changelog
147
+ # Disable constexpr mutex constructor on Windows
148
+ # Fixed mutex's constructor to be constexpr. #3824 #4000 #4339
149
+ # Note: Programs that aren't following the documented restrictions on binary compatibility may encounter
150
+ # null dereferences in mutex machinery. You must follow this rule:
151
+ # When you mix binaries built by different supported versions of the toolset, the Redistributable version
152
+ # must be at least as new as the latest toolset used by any app component.
153
+ # You can define _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR as an escape hatch.
154
+ #
155
+ # Specifically to whisper.cpp this would cause a crash when using the Java bindings.
156
+ # resulting in a Invalid memory access error.
157
+ target_compile_definitions(ggml-base PRIVATE _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
158
+ endif()
159
+ endif()
160
+ # ... otherwise assume ggml is added by a parent CMakeLists.txt
161
+ endif()
162
+ add_subdirectory(src)
163
+
164
+ #
165
+ # install
166
+ #
167
+
168
+ include(GNUInstallDirs)
169
+ include(CMakePackageConfigHelpers)
170
+
171
+ set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
172
+ set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
173
+ set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
174
+
175
+ set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
176
+ set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
177
+ set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
178
+
179
+ get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
180
+
181
+ set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
182
+ install(TARGETS whisper LIBRARY PUBLIC_HEADER)
183
+
184
+ target_compile_definitions(whisper PRIVATE
185
+ WHISPER_VERSION="${PROJECT_VERSION}"
186
+ )
187
+
188
+ configure_package_config_file(
189
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
190
+ ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
191
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
192
+ PATH_VARS
193
+ WHISPER_INCLUDE_INSTALL_DIR
194
+ WHISPER_LIB_INSTALL_DIR
195
+ WHISPER_BIN_INSTALL_DIR )
196
+
197
+ write_basic_package_version_file(
198
+ ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
199
+ VERSION ${WHISPER_INSTALL_VERSION}
200
+ COMPATIBILITY SameMajorVersion)
201
+
202
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
203
+ ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
204
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
205
+
206
+ configure_file(cmake/whisper.pc.in
207
+ "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
208
+ @ONLY)
209
+
210
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
211
+ DESTINATION lib/pkgconfig)
212
+
213
+ #
214
+ # programs, examples and tests
215
+ #
216
+
217
+ if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
218
+ include(CTest)
219
+ add_subdirectory(tests)
220
+ endif ()
221
+
222
+ if (WHISPER_BUILD_EXAMPLES)
223
+ add_subdirectory(examples)
224
+ endif()
225
+
226
+ if (MSVC)
227
+ set(MSVC_WARNING_FLAGS
228
+ /wd4101 # Unreferenced local variable
229
+ /wd4005 # Macro redefinition
230
+ /wd4065 # switch statement contains 'default' but no 'case' labels
231
+ /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
232
+ /wd4244 # Conversion from one type to another type, possible loss of ata
233
+ /wd4805 # Unsafe mix of type
234
+ /wd4305 # Truncation from 'type1' to 'type2' (often double to float)
235
+ /wd4996 # Function or variable may be unsafe/deprecated
236
+ )
237
+ function(disable_msvc_warnings target_name)
238
+ if(TARGET ${target_name})
239
+ target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS})
240
+ endif()
241
+ endfunction()
242
+
243
+ if (WHISPER_BUILD_EXAMPLES)
244
+ disable_msvc_warnings(whisper)
245
+ disable_msvc_warnings(common)
246
+ disable_msvc_warnings(common-sdl)
247
+ disable_msvc_warnings(lsp)
248
+ disable_msvc_warnings(wchess-core)
249
+ disable_msvc_warnings(whisper-command)
250
+ disable_msvc_warnings(whisper-cli)
251
+ disable_msvc_warnings(whisper-server)
252
+ disable_msvc_warnings(whisper-stream)
253
+ disable_msvc_warnings(whisper-talk-llama)
254
+ disable_msvc_warnings(whisper-bench)
255
+ disable_msvc_warnings(quantize)
256
+ disable_msvc_warnings(vad-speech-segments)
257
+ endif()
258
+ endif()
whisper.cpp/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023-2024 The ggml authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
whisper.cpp/Makefile ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Audio samples
3
+ #
4
+
5
+ .PHONY: build
6
+ build:
7
+ cmake -B build $(CMAKE_ARGS)
8
+ cmake --build build --config Release
9
+
10
+ # download a few audio samples into folder "./samples":
11
+ .PHONY: samples
12
+ samples:
13
+ @echo "Downloading samples..."
14
+ @mkdir -p samples
15
+ @wget --quiet --show-progress -O samples/gb0.ogg https://upload.wikimedia.org/wikipedia/commons/2/22/George_W._Bush%27s_weekly_radio_address_%28November_1%2C_2008%29.oga
16
+ @wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
17
+ @wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
18
+ @wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
19
+ @wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
20
+ @wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
21
+
22
+ #
23
+ # Models
24
+ #
25
+
26
+ # if not already downloaded, the following targets download the specified model and
27
+ # runs it on all samples in the folder "./samples":
28
+
29
+ .PHONY: tiny.en
30
+ .PHONY: tiny
31
+ .PHONY: base.en
32
+ .PHONY: base
33
+ .PHONY: small.en
34
+ .PHONY: small
35
+ .PHONY: medium.en
36
+ .PHONY: medium
37
+ .PHONY: large-v1
38
+ .PHONY: large-v2
39
+ .PHONY: large-v3
40
+ .PHONY: large-v3-turbo
41
+
42
+ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
43
+ bash ./models/download-ggml-model.sh $@
44
+ cmake -B build $(CMAKE_ARGS)
45
+ cmake --build build --config Release
46
+ @echo ""
47
+ @echo "==============================================="
48
+ @echo "Running $@ on all samples in ./samples ..."
49
+ @echo "==============================================="
50
+ @echo ""
51
+ @for f in samples/*.{flac,mp3,ogg,wav}; do \
52
+ echo "----------------------------------------------" ; \
53
+ echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
54
+ echo "----------------------------------------------" ; \
55
+ echo "" ; \
56
+ ./build/bin/whisper-cli -m models/ggml-$@.bin -f $$f ; \
57
+ echo "" ; \
58
+ done
whisper.cpp/README.md ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # whisper.cpp
2
+
3
+ ![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
4
+
5
+ [![Actions Status](https://github.com/ggml-org/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggml-org/whisper.cpp/actions)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
7
+ [![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
8
+ [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
9
+
10
+ Stable: [v1.8.1](https://github.com/ggml-org/whisper.cpp/releases/tag/v1.8.1) / [Roadmap](https://github.com/orgs/ggml-org/projects/4/)
11
+
12
+ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
13
+
14
+ - Plain C/C++ implementation without dependencies
15
+ - Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
16
+ - AVX intrinsics support for x86 architectures
17
+ - [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
18
+ - Mixed F16 / F32 precision
19
+ - [Integer quantization support](#quantization)
20
+ - Zero memory allocations at runtime
21
+ - [Vulkan support](#vulkan-gpu-support)
22
+ - Support for CPU-only inference
23
+ - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
24
+ - [OpenVINO Support](#openvino-support)
25
+ - [Ascend NPU Support](#ascend-npu-support)
26
+ - [Moore Threads GPU Support](#moore-threads-gpu-support)
27
+ - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
28
+ - [Voice Activity Detection (VAD)](#voice-activity-detection-vad)
29
+
30
+ Supported platforms:
31
+
32
+ - [x] Mac OS (Intel and Arm)
33
+ - [x] [iOS](examples/whisper.objc)
34
+ - [x] [Android](examples/whisper.android)
35
+ - [x] [Java](bindings/java/README.md)
36
+ - [x] Linux / [FreeBSD](https://github.com/ggml-org/whisper.cpp/issues/56#issuecomment-1350920264)
37
+ - [x] [WebAssembly](examples/whisper.wasm)
38
+ - [x] Windows ([MSVC](https://github.com/ggml-org/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggml-org/whisper.cpp/issues/168))
39
+ - [x] [Raspberry Pi](https://github.com/ggml-org/whisper.cpp/discussions/166)
40
+ - [x] [Docker](https://github.com/ggml-org/whisper.cpp/pkgs/container/whisper.cpp)
41
+
42
+ The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
43
+ The rest of the code is part of the [`ggml`](https://github.com/ggml-org/ggml) machine learning library.
44
+
45
+ Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
46
+ As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
47
+
48
+ https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
49
+
50
+ You can also easily make your own offline voice assistant application: [command](examples/command)
51
+
52
+ https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
53
+
54
+ On Apple Silicon, the inference runs fully on the GPU via Metal:
55
+
56
+ https://github.com/ggml-org/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
57
+
58
+ ## Quick start
59
+
60
+ First clone the repository:
61
+
62
+ ```bash
63
+ git clone https://github.com/ggml-org/whisper.cpp.git
64
+ ```
65
+
66
+ Navigate into the directory:
67
+
68
+ ```
69
+ cd whisper.cpp
70
+ ```
71
+
72
+ Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
73
+
74
+ ```bash
75
+ sh ./models/download-ggml-model.sh base.en
76
+ ```
77
+
78
+ Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
79
+
80
+ ```bash
81
+ # build the project
82
+ cmake -B build
83
+ cmake --build build -j --config Release
84
+
85
+ # transcribe an audio file
86
+ ./build/bin/whisper-cli -f samples/jfk.wav
87
+ ```
88
+
89
+ ---
90
+
91
+ For a quick demo, simply run `make base.en`.
92
+
93
+ The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
94
+
95
+ For detailed usage instructions, run: `./build/bin/whisper-cli -h`
96
+
97
+ Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
98
+ For example, you can use `ffmpeg` like this:
99
+
100
+ ```bash
101
+ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
102
+ ```
103
+
104
+ ## More audio samples
105
+
106
+ If you want some extra audio samples to play with, simply run:
107
+
108
+ ```
109
+ make -j samples
110
+ ```
111
+
112
+ This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
113
+
114
+ You can download and run the other models as follows:
115
+
116
+ ```
117
+ make -j tiny.en
118
+ make -j tiny
119
+ make -j base.en
120
+ make -j base
121
+ make -j small.en
122
+ make -j small
123
+ make -j medium.en
124
+ make -j medium
125
+ make -j large-v1
126
+ make -j large-v2
127
+ make -j large-v3
128
+ make -j large-v3-turbo
129
+ ```
130
+
131
+ ## Memory usage
132
+
133
+ | Model | Disk | Mem |
134
+ | ------ | ------- | ------- |
135
+ | tiny | 75 MiB | ~273 MB |
136
+ | base | 142 MiB | ~388 MB |
137
+ | small | 466 MiB | ~852 MB |
138
+ | medium | 1.5 GiB | ~2.1 GB |
139
+ | large | 2.9 GiB | ~3.9 GB |
140
+
141
+ ## POWER VSX Intrinsics
142
+
143
+ `whisper.cpp` supports POWER architectures and includes code which
144
+ significantly speeds operation on Linux running on POWER9/10, making it
145
+ capable of faster-than-realtime transcription on underclocked Raptor
146
+ Talos II. Ensure you have a BLAS package installed, and replace the
147
+ standard cmake setup with:
148
+
149
+ ```bash
150
+ # build with GGML_BLAS defined
151
+ cmake -B build -DGGML_BLAS=1
152
+ cmake --build build -j --config Release
153
+ ./build/bin/whisper-cli [ .. etc .. ]
154
+ ```
155
+
156
+ ## Quantization
157
+
158
+ `whisper.cpp` supports integer quantization of the Whisper `ggml` models.
159
+ Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
160
+
161
+ Here are the steps for creating and using a quantized model:
162
+
163
+ ```bash
164
+ # quantize a model with Q5_0 method
165
+ cmake -B build
166
+ cmake --build build -j --config Release
167
+ ./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
168
+
169
+ # run the examples as usual, specifying the quantized model file
170
+ ./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
171
+ ```
172
+
173
+ ## Core ML support
174
+
175
+ On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
176
+ speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
177
+
178
+ - Install Python dependencies needed for the creation of the Core ML model:
179
+
180
+ ```bash
181
+ pip install ane_transformers
182
+ pip install openai-whisper
183
+ pip install coremltools
184
+ ```
185
+
186
+ - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
187
+ - Python 3.11 is recommended.
188
+ - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
189
+ - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
190
+ - To create an environment, use: `conda create -n py311-whisper python=3.11 -y`
191
+ - To activate the environment, use: `conda activate py311-whisper`
192
+
193
+ - Generate a Core ML model. For example, to generate a `base.en` model, use:
194
+
195
+ ```bash
196
+ ./models/generate-coreml-model.sh base.en
197
+ ```
198
+
199
+ This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
200
+
201
+ - Build `whisper.cpp` with Core ML support:
202
+
203
+ ```bash
204
+ # using CMake
205
+ cmake -B build -DWHISPER_COREML=1
206
+ cmake --build build -j --config Release
207
+ ```
208
+
209
+ - Run the examples as usual. For example:
210
+
211
+ ```text
212
+ $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
213
+
214
+ ...
215
+
216
+ whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
217
+ whisper_init_state: first run on a device may take a while ...
218
+ whisper_init_state: Core ML model loaded
219
+
220
+ system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
221
+
222
+ ...
223
+ ```
224
+
225
+ The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
226
+ Next runs are faster.
227
+
228
+ For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggml-org/whisper.cpp/pull/566).
229
+
230
+ ## OpenVINO support
231
+
232
+ On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
233
+ on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
234
+
235
+ This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
236
+
237
+ - First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
238
+
239
+ Windows:
240
+
241
+ ```powershell
242
+ cd models
243
+ python -m venv openvino_conv_env
244
+ openvino_conv_env\Scripts\activate
245
+ python -m pip install --upgrade pip
246
+ pip install -r requirements-openvino.txt
247
+ ```
248
+
249
+ Linux and macOS:
250
+
251
+ ```bash
252
+ cd models
253
+ python3 -m venv openvino_conv_env
254
+ source openvino_conv_env/bin/activate
255
+ python -m pip install --upgrade pip
256
+ pip install -r requirements-openvino.txt
257
+ ```
258
+
259
+ - Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
260
+
261
+ ```
262
+ python convert-whisper-to-openvino.py --model base.en
263
+ ```
264
+
265
+ This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
266
+ is the default location that the OpenVINO extension will search at runtime.
267
+
268
+ - Build `whisper.cpp` with OpenVINO support:
269
+
270
+ Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2024.6.0](https://github.com/openvinotoolkit/openvino/releases/tag/2024.6.0). Ready to use Binaries of the required libraries can be found in the [OpenVino Archives](https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/)
271
+
272
+ After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
273
+
274
+ Linux:
275
+
276
+ ```bash
277
+ source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
278
+ ```
279
+
280
+ Windows (cmd):
281
+
282
+ ```powershell
283
+ C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
284
+ ```
285
+
286
+ And then build the project using cmake:
287
+
288
+ ```bash
289
+ cmake -B build -DWHISPER_OPENVINO=1
290
+ cmake --build build -j --config Release
291
+ ```
292
+
293
+ - Run the examples as usual. For example:
294
+
295
+ ```text
296
+ $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
297
+
298
+ ...
299
+
300
+ whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
301
+ whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
302
+ whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
303
+ whisper_ctx_init_openvino_encoder: OpenVINO model loaded
304
+
305
+ system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
306
+
307
+ ...
308
+ ```
309
+
310
+ The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
311
+ cached for the next run.
312
+
313
+ For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037).
314
+
315
+ ## NVIDIA GPU support
316
+
317
+ With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
318
+ First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
319
+
320
+ Now build `whisper.cpp` with CUDA support:
321
+
322
+ ```
323
+ cmake -B build -DGGML_CUDA=1
324
+ cmake --build build -j --config Release
325
+ ```
326
+
327
+ or for newer NVIDIA GPU's (RTX 5000 series):
328
+ ```
329
+ cmake -B build -DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86"
330
+ cmake --build build -j --config Release
331
+ ```
332
+
333
+ ## Vulkan GPU support
334
+ Cross-vendor solution which allows you to accelerate workload on your GPU.
335
+ First, make sure your graphics card driver provides support for Vulkan API.
336
+
337
+ Now build `whisper.cpp` with Vulkan support:
338
+ ```
339
+ cmake -B build -DGGML_VULKAN=1
340
+ cmake --build build -j --config Release
341
+ ```
342
+
343
+ ## BLAS CPU support via OpenBLAS
344
+
345
+ Encoder processing can be accelerated on the CPU via OpenBLAS.
346
+ First, make sure you have installed `openblas`: https://www.openblas.net/
347
+
348
+ Now build `whisper.cpp` with OpenBLAS support:
349
+
350
+ ```
351
+ cmake -B build -DGGML_BLAS=1
352
+ cmake --build build -j --config Release
353
+ ```
354
+
355
+ ## Ascend NPU support
356
+
357
+ Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
358
+
359
+ First, check if your Ascend NPU device is supported:
360
+
361
+ **Verified devices**
362
+ | Ascend NPU | Status |
363
+ |:-----------------------------:|:-------:|
364
+ | Atlas 300T A2 | Support |
365
+ | Atlas 300I Duo | Support |
366
+
367
+ Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
368
+
369
+ Now build `whisper.cpp` with CANN support:
370
+
371
+ ```
372
+ cmake -B build -DGGML_CANN=1
373
+ cmake --build build -j --config Release
374
+ ```
375
+
376
+ Run the inference examples as usual, for example:
377
+
378
+ ```
379
+ ./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
380
+ ```
381
+
382
+ *Notes:*
383
+
384
+ - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
385
+ - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
386
+
387
+ ## Moore Threads GPU support
388
+
389
+ With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
390
+ First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0
391
+
392
+ Now build `whisper.cpp` with MUSA support:
393
+
394
+ ```
395
+ cmake -B build -DGGML_MUSA=1
396
+ cmake --build build -j --config Release
397
+ ```
398
+
399
+ or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
400
+
401
+ ```
402
+ cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
403
+ cmake --build build -j --config Release
404
+ ```
405
+
406
+ ## FFmpeg support (Linux only)
407
+
408
+ If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
409
+
410
+ First, you need to install required libraries:
411
+
412
+ ```bash
413
+ # Debian/Ubuntu
414
+ sudo apt install libavcodec-dev libavformat-dev libavutil-dev
415
+
416
+ # RHEL/Fedora
417
+ sudo dnf install libavcodec-free-devel libavformat-free-devel libavutil-free-devel
418
+ ```
419
+
420
+ Then you can build the project as follows:
421
+
422
+ ```bash
423
+ cmake -B build -D WHISPER_FFMPEG=yes
424
+ cmake --build build
425
+ ```
426
+
427
+ Run the following example to confirm it's working:
428
+
429
+ ```bash
430
+ # Convert an audio file to Opus format
431
+ ffmpeg -i samples/jfk.wav jfk.opus
432
+
433
+ # Transcribe the audio file
434
+ ./build/bin/whisper-cli --model models/ggml-base.en.bin --file jfk.opus
435
+ ```
436
+
437
+ ## Docker
438
+
439
+ ### Prerequisites
440
+
441
+ - Docker must be installed and running on your system.
442
+ - Create a folder to store big models & intermediate files (ex. /whisper/models)
443
+
444
+ ### Images
445
+
446
+ We have two Docker images available for this project:
447
+
448
+ 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
449
+ 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
450
+ 3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
451
+
452
+ ### Usage
453
+
454
+ ```shell
455
+ # download model and persist it in a local folder
456
+ docker run -it --rm \
457
+ -v path/to/models:/models \
458
+ whisper.cpp:main "./models/download-ggml-model.sh base /models"
459
+ # transcribe an audio file
460
+ docker run -it --rm \
461
+ -v path/to/models:/models \
462
+ -v path/to/audios:/audios \
463
+ whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
464
+ # transcribe an audio file in samples folder
465
+ docker run -it --rm \
466
+ -v path/to/models:/models \
467
+ whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
468
+ ```
469
+
470
+ ## Installing with Conan
471
+
472
+ You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
473
+
474
+ ```
475
+ conan install --requires="whisper-cpp/[*]" --build=missing
476
+ ```
477
+
478
+ For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
479
+
480
+ ## Limitations
481
+
482
+ - Inference only
483
+
484
+ ## Real-time audio input example
485
+
486
+ This is a naive example of performing real-time inference on audio from your microphone.
487
+ The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
488
+ More info is available in [issue #10](https://github.com/ggml-org/whisper.cpp/issues/10).
489
+ You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly.
490
+
491
+ ```bash
492
+ cmake -B build -DWHISPER_SDL2=ON
493
+ cmake --build build -j --config Release
494
+ ./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
495
+ ```
496
+
497
+ https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
498
+
499
+ ## Confidence color-coding
500
+
501
+ Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
502
+ to highlight words with high or low confidence:
503
+
504
+ ```bash
505
+ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
506
+ ```
507
+
508
+ <img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
509
+
510
+ ## Controlling the length of the generated text segments (experimental)
511
+
512
+ For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
513
+
514
+ ```text
515
+ $ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
516
+
517
+ whisper_model_load: loading model from './models/ggml-base.en.bin'
518
+ ...
519
+ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
520
+
521
+ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
522
+
523
+ [00:00:00.000 --> 00:00:00.850] And so my
524
+ [00:00:00.850 --> 00:00:01.590] fellow
525
+ [00:00:01.590 --> 00:00:04.140] Americans, ask
526
+ [00:00:04.140 --> 00:00:05.660] not what your
527
+ [00:00:05.660 --> 00:00:06.840] country can do
528
+ [00:00:06.840 --> 00:00:08.430] for you, ask
529
+ [00:00:08.430 --> 00:00:09.440] what you can do
530
+ [00:00:09.440 --> 00:00:10.020] for your
531
+ [00:00:10.020 --> 00:00:11.000] country.
532
+ ```
533
+
534
+ ## Word-level timestamp (experimental)
535
+
536
+ The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
537
+
538
+ ```text
539
+ $ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
540
+
541
+ whisper_model_load: loading model from './models/ggml-base.en.bin'
542
+ ...
543
+ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
544
+
545
+ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
546
+
547
+ [00:00:00.000 --> 00:00:00.320]
548
+ [00:00:00.320 --> 00:00:00.370] And
549
+ [00:00:00.370 --> 00:00:00.690] so
550
+ [00:00:00.690 --> 00:00:00.850] my
551
+ [00:00:00.850 --> 00:00:01.590] fellow
552
+ [00:00:01.590 --> 00:00:02.850] Americans
553
+ [00:00:02.850 --> 00:00:03.300] ,
554
+ [00:00:03.300 --> 00:00:04.140] ask
555
+ [00:00:04.140 --> 00:00:04.990] not
556
+ [00:00:04.990 --> 00:00:05.410] what
557
+ [00:00:05.410 --> 00:00:05.660] your
558
+ [00:00:05.660 --> 00:00:06.260] country
559
+ [00:00:06.260 --> 00:00:06.600] can
560
+ [00:00:06.600 --> 00:00:06.840] do
561
+ [00:00:06.840 --> 00:00:07.010] for
562
+ [00:00:07.010 --> 00:00:08.170] you
563
+ [00:00:08.170 --> 00:00:08.190] ,
564
+ [00:00:08.190 --> 00:00:08.430] ask
565
+ [00:00:08.430 --> 00:00:08.910] what
566
+ [00:00:08.910 --> 00:00:09.040] you
567
+ [00:00:09.040 --> 00:00:09.320] can
568
+ [00:00:09.320 --> 00:00:09.440] do
569
+ [00:00:09.440 --> 00:00:09.760] for
570
+ [00:00:09.760 --> 00:00:10.020] your
571
+ [00:00:10.020 --> 00:00:10.510] country
572
+ [00:00:10.510 --> 00:00:11.000] .
573
+ ```
574
+
575
+ ## Speaker segmentation via tinydiarize (experimental)
576
+
577
+ More information about this approach is available here: https://github.com/ggml-org/whisper.cpp/pull/1058
578
+
579
+ Sample usage:
580
+
581
+ ```py
582
+ # download a tinydiarize compatible model
583
+ ./models/download-ggml-model.sh small.en-tdrz
584
+
585
+ # run as usual, adding the "-tdrz" command-line argument
586
+ ./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
587
+ ...
588
+ main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
589
+ ...
590
+ [00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
591
+ [00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
592
+ [00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
593
+ [00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
594
+ [00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
595
+ [00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
596
+ [00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
597
+ [00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
598
+ [00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
599
+ ```
600
+
601
+ ## Karaoke-style movie generation (experimental)
602
+
603
+ The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
604
+ currently pronounced word is highlighted. Use the `-owts` argument and run the generated bash script.
605
+ This requires to have `ffmpeg` installed.
606
+
607
+ Here are a few _"typical"_ examples:
608
+
609
+ ```bash
610
+ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
611
+ source ./samples/jfk.wav.wts
612
+ ffplay ./samples/jfk.wav.mp4
613
+ ```
614
+
615
+ https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
616
+
617
+ ---
618
+
619
+ ```bash
620
+ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
621
+ source ./samples/mm0.wav.wts
622
+ ffplay ./samples/mm0.wav.mp4
623
+ ```
624
+
625
+ https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
626
+
627
+ ---
628
+
629
+ ```bash
630
+ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
631
+ source ./samples/gb0.wav.wts
632
+ ffplay ./samples/gb0.wav.mp4
633
+ ```
634
+
635
+ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
636
+
637
+ ---
638
+
639
+ ## Video comparison of different models
640
+
641
+ Use the [scripts/bench-wts.sh](https://github.com/ggml-org/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
642
+
643
+ ```bash
644
+ ./scripts/bench-wts.sh samples/jfk.wav
645
+ ffplay ./samples/jfk.wav.all.mp4
646
+ ```
647
+
648
+ https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
649
+
650
+ ---
651
+
652
+ ## Benchmarks
653
+
654
+ In order to have an objective comparison of the performance of the inference across different system configurations,
655
+ use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
656
+ took to execute it. The results are summarized in the following Github issue:
657
+
658
+ [Benchmark results](https://github.com/ggml-org/whisper.cpp/issues/89)
659
+
660
+ Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
661
+
662
+ You can run it with the following command, by default it will run against any standard model in the models folder.
663
+
664
+ ```bash
665
+ python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
666
+ ```
667
+
668
+ It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
669
+
670
+ It outputs a csv file with the results of the benchmarking.
671
+
672
+ ## `ggml` format
673
+
674
+ The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
675
+
676
+ - model parameters
677
+ - mel filters
678
+ - vocabulary
679
+ - weights
680
+
681
+ You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
682
+ or manually from here:
683
+
684
+ - https://huggingface.co/ggerganov/whisper.cpp
685
+
686
+ For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
687
+
688
+ ## [Bindings](https://github.com/ggml-org/whisper.cpp/discussions/categories/bindings)
689
+
690
+ - [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggml-org/whisper.cpp/discussions/310)
691
+ - [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggml-org/whisper.cpp/discussions/309)
692
+ - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
693
+ - [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggml-org/whisper.cpp/discussions/312)
694
+ - [x] Java:
695
+ - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
696
+ - [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggml-org/whisper.cpp/discussions/507)
697
+ - [x] Objective-C / Swift: [ggml-org/whisper.spm](https://github.com/ggml-org/whisper.spm) | [#313](https://github.com/ggml-org/whisper.cpp/discussions/313)
698
+ - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
699
+ - [x] .NET: | [#422](https://github.com/ggml-org/whisper.cpp/discussions/422)
700
+ - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
701
+ - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
702
+ - [x] Python: | [#9](https://github.com/ggml-org/whisper.cpp/issues/9)
703
+ - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
704
+ - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
705
+ - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
706
+ - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
707
+ - [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
708
+ - [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
709
+
710
+ ## XCFramework
711
+ The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS,
712
+ and macOS. It can be used in Swift projects without the need to compile the
713
+ library from source. For example, the v1.7.5 version of the XCFramework can be
714
+ used as follows:
715
+
716
+ ```swift
717
+ // swift-tools-version: 5.10
718
+ // The swift-tools-version declares the minimum version of Swift required to build this package.
719
+
720
+ import PackageDescription
721
+
722
+ let package = Package(
723
+ name: "Whisper",
724
+ targets: [
725
+ .executableTarget(
726
+ name: "Whisper",
727
+ dependencies: [
728
+ "WhisperFramework"
729
+ ]),
730
+ .binaryTarget(
731
+ name: "WhisperFramework",
732
+ url: "https://github.com/ggml-org/whisper.cpp/releases/download/v1.7.5/whisper-v1.7.5-xcframework.zip",
733
+ checksum: "c7faeb328620d6012e130f3d705c51a6ea6c995605f2df50f6e1ad68c59c6c4a"
734
+ )
735
+ ]
736
+ )
737
+ ```
738
+
739
+ ## Voice Activity Detection (VAD)
740
+ Support for Voice Activity Detection (VAD) can be enabled using the `--vad`
741
+ argument to `whisper-cli`. In addition to this option a VAD model is also
742
+ required.
743
+
744
+ The way this works is that first the audio samples are passed through
745
+ the VAD model which will detect speech segments. Using this information the
746
+ only the speech segments that are detected are extracted from the original audio
747
+ input and passed to whisper for processing. This reduces the amount of audio
748
+ data that needs to be processed by whisper and can significantly speed up the
749
+ transcription process.
750
+
751
+ The following VAD models are currently supported:
752
+
753
+ ### Silero-VAD
754
+ [Silero-vad](https://github.com/snakers4/silero-vad) is a lightweight VAD model
755
+ written in Python that is fast and accurate.
756
+
757
+ Models can be downloaded by running the following command on Linux or MacOS:
758
+ ```console
759
+ $ ./models/download-vad-model.sh silero-v6.2.0
760
+ Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ...
761
+ ggml-silero-v6.2.0.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s
762
+ Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin'
763
+ You can now use it like this:
764
+
765
+ $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin
766
+
767
+ ```
768
+ And the following command on Windows:
769
+ ```console
770
+ > .\models\download-vad-model.cmd silero-v6.2.0
771
+ Downloading vad model silero-v6.2.0...
772
+ Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin
773
+ You can now use it like this:
774
+
775
+ C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav
776
+
777
+ ```
778
+
779
+ To see a list of all available models, run the above commands without any
780
+ arguments.
781
+
782
+ This model can be also be converted manually to ggml using the following command:
783
+ ```console
784
+ $ python3 -m venv venv && source venv/bin/activate
785
+ $ (venv) pip install silero-vad
786
+ $ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin
787
+ Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin
788
+ ```
789
+ And it can then be used with whisper as follows:
790
+ ```console
791
+ $ ./build/bin/whisper-cli \
792
+ --file ./samples/jfk.wav \
793
+ --model ./models/ggml-base.en.bin \
794
+ --vad \
795
+ --vad-model ./models/silero-v6.2.0-ggml.bin
796
+ ```
797
+
798
+ ### VAD Options
799
+
800
+ * --vad-threshold: Threshold probability for speech detection. A probability
801
+ for a speech segment/frame above this threshold will be considered as speech.
802
+
803
+ * --vad-min-speech-duration-ms: Minimum speech duration in milliseconds. Speech
804
+ segments shorter than this value will be discarded to filter out brief noise or
805
+ false positives.
806
+
807
+ * --vad-min-silence-duration-ms: Minimum silence duration in milliseconds. Silence
808
+ periods must be at least this long to end a speech segment. Shorter silence
809
+ periods will be ignored and included as part of the speech.
810
+
811
+ * --vad-max-speech-duration-s: Maximum speech duration in seconds. Speech segments
812
+ longer than this will be automatically split into multiple segments at silence
813
+ points exceeding 98ms to prevent excessively long segments.
814
+
815
+ * --vad-speech-pad-ms: Speech padding in milliseconds. Adds this amount of padding
816
+ before and after each detected speech segment to avoid cutting off speech edges.
817
+
818
+ * --vad-samples-overlap: Amount of audio to extend from each speech segment into
819
+ the next one, in seconds (e.g., 0.10 = 100ms overlap). This ensures speech isn't
820
+ cut off abruptly between segments when they're concatenated together.
821
+
822
+ ## Examples
823
+
824
+ There are various examples of using the library for different projects in the [examples](examples) folder.
825
+ Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
826
+
827
+ | Example | Web | Description |
828
+ | --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
829
+ | [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
830
+ | [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
831
+ | [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
832
+ | [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
833
+ | [whisper-server](examples/server) | | HTTP transcription server with OAI-like API |
834
+ | [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
835
+ | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
836
+ | [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
837
+ | [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
838
+ | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
839
+ | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
840
+ | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggml-org/whisper.cpp/issues/185) |
841
+ | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
842
+ | [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
843
+
844
+ ## [Discussions](https://github.com/ggml-org/whisper.cpp/discussions)
845
+
846
+ If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
847
+ You can use the [Show and tell](https://github.com/ggml-org/whisper.cpp/discussions/categories/show-and-tell) category
848
+ to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
849
+ [Frequently asked questions (#126)](https://github.com/ggml-org/whisper.cpp/discussions/126) discussion.
whisper.cpp/README_sycl.md ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # whisper.cpp for SYCL
2
+
3
+ [Background](#background)
4
+
5
+ [OS](#os)
6
+
7
+ [Intel GPU](#intel-gpu)
8
+
9
+ [Linux](#linux)
10
+
11
+ [Environment Variable](#environment-variable)
12
+
13
+ [Known Issue](#known-issue)
14
+
15
+ [Todo](#todo)
16
+
17
+ ## Background
18
+
19
+ SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
20
+
21
+ oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
22
+
23
+ Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
24
+
25
+ To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
26
+
27
+ The whisper.cpp for SYCL is used to support Intel GPUs.
28
+
29
+ For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
30
+
31
+ ## OS
32
+
33
+ |OS|Status|Verified|
34
+ |-|-|-|
35
+ |Linux|Support|Ubuntu 22.04|
36
+ |Windows|Ongoing| |
37
+
38
+
39
+ ## Intel GPU
40
+
41
+ |Intel GPU| Status | Verified Model|
42
+ |-|-|-|
43
+ |Intel Data Center Max Series| Support| Max 1550|
44
+ |Intel Data Center Flex Series| Support| Flex 170|
45
+ |Intel Arc Series| Support| Arc 770|
46
+ |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
47
+ |Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
48
+
49
+
50
+ ## Linux
51
+
52
+ ### Setup Environment
53
+
54
+ 1. Install Intel GPU driver.
55
+
56
+ a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
57
+
58
+ Note: for iGPU, please install the client GPU driver.
59
+
60
+ b. Add user to group: video, render.
61
+
62
+ ```
63
+ sudo usermod -aG render username
64
+ sudo usermod -aG video username
65
+ ```
66
+
67
+ Note: re-login to enable it.
68
+
69
+ c. Check
70
+
71
+ ```
72
+ sudo apt install clinfo
73
+ sudo clinfo -l
74
+ ```
75
+
76
+ Output (example):
77
+
78
+ ```
79
+ Platform #0: Intel(R) OpenCL Graphics
80
+ `-- Device #0: Intel(R) Arc(TM) A770 Graphics
81
+
82
+
83
+ Platform #0: Intel(R) OpenCL HD Graphics
84
+ `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
85
+ ```
86
+
87
+ 2. Install Intel® oneAPI Base toolkit.
88
+
89
+
90
+ a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
91
+
92
+ Recommend to install to default folder: **/opt/intel/oneapi**.
93
+
94
+ Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
95
+
96
+ b. Check
97
+
98
+ ```
99
+ source /opt/intel/oneapi/setvars.sh
100
+
101
+ sycl-ls
102
+ ```
103
+
104
+ There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
105
+
106
+ Output (example):
107
+ ```
108
+ [opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
109
+ [opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
110
+ [opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
111
+ [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
112
+
113
+ ```
114
+
115
+ 2. Build locally:
116
+
117
+ ```
118
+ mkdir -p build
119
+ cd build
120
+ source /opt/intel/oneapi/setvars.sh
121
+
122
+ #for FP16
123
+ #cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
124
+
125
+ #for FP32
126
+ cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
127
+
128
+ #build example/main only
129
+ #cmake --build . --config Release --target main
130
+
131
+ #build all binary
132
+ cmake --build . --config Release -v
133
+
134
+ ```
135
+
136
+ or
137
+
138
+ ```
139
+ ./examples/sycl/build.sh
140
+ ```
141
+
142
+ Note:
143
+
144
+ - By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
145
+
146
+ ### Run
147
+
148
+ 1. Put model file to folder **models**
149
+
150
+ 2. Enable oneAPI running environment
151
+
152
+ ```
153
+ source /opt/intel/oneapi/setvars.sh
154
+ ```
155
+
156
+ 3. List device ID
157
+
158
+ Run without parameter:
159
+
160
+ ```
161
+ ./build/bin/ls-sycl-device
162
+
163
+ or
164
+
165
+ ./build/bin/main
166
+ ```
167
+
168
+ Check the ID in startup log, like:
169
+
170
+ ```
171
+ found 4 SYCL devices:
172
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
173
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
174
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
175
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
176
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
177
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
178
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
179
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
180
+
181
+ ```
182
+
183
+ |Attribute|Note|
184
+ |-|-|
185
+ |compute capability 1.3|Level-zero running time, recommended |
186
+ |compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
187
+
188
+ 4. Set device ID and execute whisper.cpp
189
+
190
+ Set device ID = 0 by **GGML_SYCL_DEVICE=0**
191
+
192
+ ```
193
+ GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
194
+ ```
195
+ or run by script:
196
+
197
+ ```
198
+ ./examples/sycl/run_whisper.sh
199
+ ```
200
+
201
+
202
+
203
+ 5. Check the device ID in output
204
+
205
+ Like:
206
+ ```
207
+ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
208
+ ```
209
+
210
+
211
+ ## Environment Variable
212
+
213
+ #### Build
214
+
215
+ |Name|Value|Function|
216
+ |-|-|-|
217
+ |WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
218
+ |WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
219
+ |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
220
+ |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
221
+
222
+ #### Running
223
+
224
+
225
+ |Name|Value|Function|
226
+ |-|-|-|
227
+ |GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
228
+ |GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
229
+
230
+ ## Known Issue
231
+
232
+ - Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
233
+
234
+ Miss to enable oneAPI running environment.
235
+
236
+ Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
237
+
238
+
239
+ - Hang during startup
240
+
241
+ llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
242
+
243
+ Solution: add **--no-mmap**.
244
+
245
+ ## Todo
246
+
247
+ - Support to build in Windows.
248
+
249
+ - Support multiple cards.
whisper.cpp/bindings/CMakeLists.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ if (EMSCRIPTEN)
2
+ add_subdirectory(javascript)
3
+
4
+ add_custom_command(
5
+ OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
6
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
7
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
8
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
9
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
10
+ COMMAND npm publish
11
+ COMMAND touch publish.log
12
+ COMMENT "Publishing npm module v${PROJECT_VERSION}"
13
+ VERBATIM
14
+ )
15
+
16
+ add_custom_target(publish-npm
17
+ DEPENDS javascript/publish.log
18
+ )
19
+ endif()
whisper.cpp/bindings/go/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ build
2
+ models
whisper.cpp/bindings/go/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 David Thorpe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
whisper.cpp/bindings/go/Makefile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ifndef UNAME_S
2
+ UNAME_S := $(shell uname -s)
3
+ endif
4
+
5
+ ifndef UNAME_P
6
+ UNAME_P := $(shell uname -p)
7
+ endif
8
+
9
+ ifndef UNAME_M
10
+ UNAME_M := $(shell uname -m)
11
+ endif
12
+
13
+ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
14
+ BUILD_DIR := build_go
15
+ MODELS_DIR := models
16
+ EXAMPLES_DIR := $(wildcard examples/*)
17
+ INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
18
+ LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src):$(abspath ../../${BUILD_DIR}/ggml/src)
19
+
20
+ ifeq ($(GGML_CUDA),1)
21
+ LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
22
+ BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
23
+ endif
24
+
25
+ ifeq ($(UNAME_S),Darwin)
26
+ LIBRARY_PATH := $(LIBRARY_PATH):$(abspath ../../${BUILD_DIR}/ggml/src/ggml-blas):$(abspath ../../${BUILD_DIR}/ggml/src/ggml-metal)
27
+ EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit -lggml-metal -lggml-blas
28
+ endif
29
+
30
+ all: clean whisper examples
31
+
32
+ whisper: mkdir
33
+ cmake -S ../.. -B ../../${BUILD_DIR} \
34
+ -DCMAKE_BUILD_TYPE=Release \
35
+ -DBUILD_SHARED_LIBS=OFF
36
+ cmake --build ../../${BUILD_DIR} --target whisper
37
+
38
+ test: model-small whisper modtidy
39
+ ifeq ($(UNAME_S),Darwin)
40
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
41
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
42
+ else
43
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
44
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
45
+ endif
46
+
47
+ examples: $(EXAMPLES_DIR)
48
+
49
+ model-small: mkdir examples/go-model-download
50
+ @${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
51
+
52
+ $(EXAMPLES_DIR): mkdir whisper modtidy
53
+ @echo Build example $(notdir $@)
54
+ ifeq ($(UNAME_S),Darwin)
55
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
56
+ else
57
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
58
+ endif
59
+
60
+ mkdir:
61
+ @echo Mkdir ${BUILD_DIR}
62
+ @install -d ${BUILD_DIR}
63
+ @echo Mkdir ${MODELS_DIR}
64
+ @install -d ${MODELS_DIR}
65
+
66
+ modtidy:
67
+ @go mod tidy
68
+
69
+ clean:
70
+ @echo Clean
71
+ @rm -fr $(BUILD_DIR)
72
+ @go clean
whisper.cpp/bindings/go/README.md ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Go bindings for Whisper
2
+
3
+ This package provides Go bindings for whisper.cpp. They have been tested on:
4
+
5
+ * Darwin (OS X) 12.6 on x64_64
6
+ * Debian Linux on arm64
7
+ * Fedora Linux on x86_64
8
+
9
+ The "low level" bindings are in the `bindings/go` directory and there is a more
10
+ Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage
11
+ is as follows:
12
+
13
+ ```go
14
+ import (
15
+ "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
16
+ )
17
+
18
+ func main() {
19
+ var modelpath string // Path to the model
20
+ var samples []float32 // Samples to process
21
+
22
+ // Load the model
23
+ model, err := whisper.New(modelpath)
24
+ if err != nil {
25
+ panic(err)
26
+ }
27
+ defer model.Close()
28
+
29
+ // Process samples
30
+ context, err := model.NewContext()
31
+ if err != nil {
32
+ panic(err)
33
+ }
34
+ if err := context.Process(samples, nil, nil, nil); err != nil {
35
+ return err
36
+ }
37
+
38
+ // Print out the results
39
+ for {
40
+ segment, err := context.NextSegment()
41
+ if err != nil {
42
+ break
43
+ }
44
+ fmt.Printf("[%6s->%6s] %s\n", segment.Start, segment.End, segment.Text)
45
+ }
46
+ }
47
+ ```
48
+
49
+ ## Building & Testing
50
+
51
+ In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
52
+
53
+ ```bash
54
+ git clone https://github.com/ggml-org/whisper.cpp.git
55
+ cd whisper.cpp/bindings/go
56
+ make test
57
+ ```
58
+
59
+ This will compile a static `libwhisper.a` in a `build` folder, download a model file, then run the tests. To build the examples:
60
+
61
+ ```bash
62
+ make examples
63
+ ```
64
+
65
+ To build using cuda support add `GGML_CUDA=1`:
66
+
67
+ ```bash
68
+ GGML_CUDA=1 make examples
69
+ ```
70
+
71
+ The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
72
+
73
+ ```bash
74
+ ./build/go-model-download -out models
75
+ ```
76
+
77
+ And you can then test a model against samples with the following command:
78
+
79
+ ```bash
80
+ ./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
81
+ ```
82
+
83
+ ## Using the bindings
84
+
85
+ To use the bindings in your own software,
86
+
87
+ 1. Import `github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper` (or `github.com/ggerganov/whisper.cpp/bindings/go` into your package;
88
+ 2. Compile `libwhisper.a` (you can use `make whisper` in the `bindings/go` directory);
89
+ 3. Link your go binary against whisper by setting the environment variables `C_INCLUDE_PATH` and `LIBRARY_PATH`
90
+ to point to the `whisper.h` file directory and `libwhisper.a` file directory respectively.
91
+
92
+ Look at the `Makefile` in the `bindings/go` directory for an example.
93
+
94
+ The API Documentation:
95
+
96
+ * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go
97
+ * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper
98
+
99
+ Getting help:
100
+
101
+ * Follow the discussion for the go bindings [here](https://github.com/ggml-org/whisper.cpp/discussions/312)
102
+
103
+ ## License
104
+
105
+ The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
106
+
whisper.cpp/bindings/go/doc.go ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ /*
2
+ github.com/ggml-org/whisper.cpp/bindings/go
3
+ provides a speech-to-text service bindings for the Go programming language.
4
+ */
5
+ package whisper
whisper.cpp/bindings/go/examples/go-model-download/context.go ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "os"
6
+ "os/signal"
7
+ )
8
+
9
+ // ContextForSignal returns a context object which is cancelled when a signal
10
+ // is received. It returns nil if no signal parameter is provided
11
+ func ContextForSignal(signals ...os.Signal) context.Context {
12
+ if len(signals) == 0 {
13
+ return nil
14
+ }
15
+
16
+ ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
17
+ ctx, cancel := context.WithCancel(context.Background())
18
+
19
+ // Send message on channel when signal received
20
+ signal.Notify(ch, signals...)
21
+
22
+ // When any signal is received, call cancel
23
+ go func() {
24
+ <-ch
25
+ cancel()
26
+ }()
27
+
28
+ // Return success
29
+ return ctx
30
+ }
31
+
whisper.cpp/bindings/go/examples/go-model-download/main.go ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "flag"
6
+ "fmt"
7
+ "io"
8
+ "net/http"
9
+ "net/url"
10
+ "os"
11
+ "path/filepath"
12
+ "strings"
13
+ "syscall"
14
+ "time"
15
+ )
16
+
17
+ ///////////////////////////////////////////////////////////////////////////////
18
+ // CONSTANTS
19
+
20
+ const (
21
+ srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
22
+ srcExt = ".bin" // Filename extension
23
+ bufSize = 1024 * 64 // Size of the buffer used for downloading the model
24
+ )
25
+
26
+ var (
27
+ // The models which will be downloaded, if no model is specified as an argument
28
+ modelNames = []string{
29
+ "tiny", "tiny-q5_1", "tiny-q8_0",
30
+ "tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
31
+ "base", "base-q5_1", "base-q8_0",
32
+ "base.en", "base.en-q5_1", "base.en-q8_0",
33
+ "small", "small-q5_1", "small-q8_0",
34
+ "small.en", "small.en-q5_1", "small.en-q8_0",
35
+ "medium", "medium-q5_0", "medium-q8_0",
36
+ "medium.en", "medium.en-q5_0", "medium.en-q8_0",
37
+ "large-v1",
38
+ "large-v2", "large-v2-q5_0", "large-v2-q8_0",
39
+ "large-v3", "large-v3-q5_0",
40
+ "large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
41
+ }
42
+ )
43
+
44
+ var (
45
+ // The output folder. When not set, use current working directory.
46
+ flagOut = flag.String("out", "", "Output folder")
47
+
48
+ // HTTP timeout parameter - will timeout if takes longer than this to download a model
49
+ flagTimeout = flag.Duration("timeout", 30*time.Minute, "HTTP timeout")
50
+
51
+ // Quiet parameter - will not print progress if set
52
+ flagQuiet = flag.Bool("quiet", false, "Quiet mode")
53
+ )
54
+
55
+ ///////////////////////////////////////////////////////////////////////////////
56
+ // MAIN
57
+
58
+ func main() {
59
+ flag.Usage = func() {
60
+ name := filepath.Base(flag.CommandLine.Name())
61
+ fmt.Fprintf(flag.CommandLine.Output(), `
62
+ Usage: %s [options] [<model>...]
63
+
64
+ Options:
65
+ -out string Specify the output folder where models will be saved.
66
+ Default: Current working directory.
67
+ -timeout duration Set the maximum duration for downloading a model.
68
+ Example: 10m, 1h (default: 30m0s).
69
+ -quiet Suppress all output except errors.
70
+
71
+ Examples:
72
+ 1. Download a specific model:
73
+ %s -out ./models tiny-q8_0
74
+
75
+ 2. Download all models:
76
+ %s -out ./models
77
+
78
+ `, name, name, name)
79
+
80
+ flag.PrintDefaults()
81
+ }
82
+ flag.Parse()
83
+
84
+ // Get output path
85
+ out, err := GetOut()
86
+ if err != nil {
87
+ fmt.Fprintln(os.Stderr, "Error:", err)
88
+ os.Exit(-1)
89
+ }
90
+
91
+ // Create context which quits on SIGINT or SIGQUIT
92
+ ctx := ContextForSignal(os.Interrupt, syscall.SIGQUIT)
93
+
94
+ // Progress filehandle
95
+ progress := os.Stdout
96
+ if *flagQuiet {
97
+ progress, err = os.Open(os.DevNull)
98
+ if err != nil {
99
+ fmt.Fprintln(os.Stderr, "Error:", err)
100
+ os.Exit(-1)
101
+ }
102
+ defer progress.Close()
103
+ }
104
+
105
+ // Download models - exit on error or interrupt
106
+ for _, model := range GetModels() {
107
+ url, err := URLForModel(model)
108
+ if err != nil {
109
+ fmt.Fprintln(os.Stderr, "Error:", err)
110
+ continue
111
+ } else if path, err := Download(ctx, progress, url, out); err == nil || err == io.EOF {
112
+ continue
113
+ } else if err == context.Canceled {
114
+ os.Remove(path)
115
+ fmt.Fprintln(progress, "\nInterrupted")
116
+ break
117
+ } else if err == context.DeadlineExceeded {
118
+ os.Remove(path)
119
+ fmt.Fprintln(progress, "Timeout downloading model")
120
+ continue
121
+ } else {
122
+ os.Remove(path)
123
+ fmt.Fprintln(os.Stderr, "Error:", err)
124
+ break
125
+ }
126
+ }
127
+ }
128
+
129
+ ///////////////////////////////////////////////////////////////////////////////
130
+ // PUBLIC METHODS
131
+
132
+ // GetOut returns the path to the output directory
133
+ func GetOut() (string, error) {
134
+ if *flagOut == "" {
135
+ return os.Getwd()
136
+ }
137
+ if info, err := os.Stat(*flagOut); err != nil {
138
+ return "", err
139
+ } else if !info.IsDir() {
140
+ return "", fmt.Errorf("not a directory: %s", info.Name())
141
+ } else {
142
+ return *flagOut, nil
143
+ }
144
+ }
145
+
146
+ // GetModels returns the list of models to download
147
+ func GetModels() []string {
148
+ if flag.NArg() == 0 {
149
+ fmt.Println("No model specified.")
150
+ fmt.Println("Preparing to download all models...")
151
+
152
+ // Calculate total download size
153
+ fmt.Println("Calculating total download size...")
154
+ totalSize, err := CalculateTotalDownloadSize(modelNames)
155
+ if err != nil {
156
+ fmt.Println("Error calculating download sizes:", err)
157
+ os.Exit(1)
158
+ }
159
+
160
+ fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
161
+ fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
162
+ fmt.Println("Would you like to download all models? (y/N)")
163
+
164
+ // Prompt for user input
165
+ var response string
166
+ fmt.Scanln(&response)
167
+ if response != "y" && response != "Y" {
168
+ fmt.Println("Aborting. Specify a model to download.")
169
+ os.Exit(0)
170
+ }
171
+
172
+ return modelNames // Return all models if confirmed
173
+ }
174
+ return flag.Args() // Return specific models if arguments are provided
175
+ }
176
+
177
+ func CalculateTotalDownloadSize(models []string) (int64, error) {
178
+ var totalSize int64
179
+ client := http.Client{}
180
+
181
+ for _, model := range models {
182
+ modelURL, err := URLForModel(model)
183
+ if err != nil {
184
+ return 0, err
185
+ }
186
+
187
+ // Issue a HEAD request to get the file size
188
+ req, err := http.NewRequest("HEAD", modelURL, nil)
189
+ if err != nil {
190
+ return 0, err
191
+ }
192
+
193
+ resp, err := client.Do(req)
194
+ if err != nil {
195
+ return 0, err
196
+ }
197
+ resp.Body.Close()
198
+
199
+ if resp.StatusCode != http.StatusOK {
200
+ fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
201
+ continue
202
+ }
203
+
204
+ size := resp.ContentLength
205
+ totalSize += size
206
+ }
207
+ return totalSize, nil
208
+ }
209
+
210
+ // URLForModel returns the URL for the given model on huggingface.co
211
+ func URLForModel(model string) (string, error) {
212
+ // Ensure "ggml-" prefix is added only once
213
+ if !strings.HasPrefix(model, "ggml-") {
214
+ model = "ggml-" + model
215
+ }
216
+
217
+ // Ensure ".bin" extension is added only once
218
+ if filepath.Ext(model) != srcExt {
219
+ model += srcExt
220
+ }
221
+
222
+ // Parse the base URL
223
+ url, err := url.Parse(srcUrl)
224
+ if err != nil {
225
+ return "", err
226
+ }
227
+
228
+ // Ensure no trailing slash in the base URL
229
+ url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
230
+ return url.String(), nil
231
+ }
232
+
233
+ // Download downloads the model from the given URL to the given output directory
234
+ func Download(ctx context.Context, p io.Writer, model, out string) (string, error) {
235
+ // Create HTTP client
236
+ client := http.Client{
237
+ Timeout: *flagTimeout,
238
+ }
239
+
240
+ // Initiate the download
241
+ req, err := http.NewRequest("GET", model, nil)
242
+ if err != nil {
243
+ return "", err
244
+ }
245
+ resp, err := client.Do(req)
246
+ if err != nil {
247
+ return "", err
248
+ }
249
+ defer resp.Body.Close()
250
+ if resp.StatusCode != http.StatusOK {
251
+ return "", fmt.Errorf("%s: %s", model, resp.Status)
252
+ }
253
+
254
+ // If output file exists and is the same size as the model, skip
255
+ path := filepath.Join(out, filepath.Base(model))
256
+ if info, err := os.Stat(path); err == nil && info.Size() == resp.ContentLength {
257
+ fmt.Fprintln(p, "Skipping", model, "as it already exists")
258
+ return "", nil
259
+ }
260
+
261
+ // Create file
262
+ w, err := os.Create(path)
263
+ if err != nil {
264
+ return "", err
265
+ }
266
+ defer w.Close()
267
+
268
+ // Report
269
+ fmt.Fprintln(p, "Downloading", model, "to", out)
270
+
271
+ // Progressively download the model
272
+ data := make([]byte, bufSize)
273
+ count, pct := int64(0), int64(0)
274
+ ticker := time.NewTicker(5 * time.Second)
275
+ for {
276
+ select {
277
+ case <-ctx.Done():
278
+ // Cancelled, return error
279
+ return path, ctx.Err()
280
+ case <-ticker.C:
281
+ pct = DownloadReport(p, pct, count, resp.ContentLength)
282
+ default:
283
+ // Read body
284
+ n, err := resp.Body.Read(data)
285
+ if err != nil {
286
+ DownloadReport(p, pct, count, resp.ContentLength)
287
+ return path, err
288
+ } else if m, err := w.Write(data[:n]); err != nil {
289
+ return path, err
290
+ } else {
291
+ count += int64(m)
292
+ }
293
+ }
294
+ }
295
+ }
296
+
297
+ // Report periodically reports the download progress when percentage changes
298
+ func DownloadReport(w io.Writer, pct, count, total int64) int64 {
299
+ pct_ := count * 100 / total
300
+ if pct_ > pct {
301
+ fmt.Fprintf(w, " ...%d MB written (%d%%)\n", count/1e6, pct_)
302
+ }
303
+ return pct_
304
+ }
whisper.cpp/bindings/go/examples/go-whisper/color.go ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import "fmt"
4
+
5
+ ///////////////////////////////////////////////////////////////////////////////
6
+ // CONSTANTS
7
+
8
+ const (
9
+ Reset = "\033[0m"
10
+ RGBPrefix = "\033[38;5;" // followed by RGB values in decimal format separated by colons
11
+ RGBSuffix = "m"
12
+ )
13
+
14
+ ///////////////////////////////////////////////////////////////////////////////
15
+ // PUBLIC METHODS
16
+
17
+ // Colorize text with RGB values, from 0 to 23
18
+ func Colorize(text string, v int) string {
19
+ // https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
20
+ // Grayscale colors are in the range 232-255
21
+ return RGBPrefix + fmt.Sprint(v%24+232) + RGBSuffix + text + Reset
22
+ }
whisper.cpp/bindings/go/examples/go-whisper/flags.go ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "flag"
5
+ "fmt"
6
+ "strings"
7
+ "time"
8
+
9
+ // Packages
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
+ )
12
+
13
+ ///////////////////////////////////////////////////////////////////////////////
14
+ // TYPES
15
+
16
+ type Flags struct {
17
+ *flag.FlagSet
18
+ }
19
+
20
+ ///////////////////////////////////////////////////////////////////////////////
21
+ // LIFECYCLE
22
+
23
+ func NewFlags(name string, args []string) (*Flags, error) {
24
+ flags := &Flags{
25
+ FlagSet: flag.NewFlagSet(name, flag.ContinueOnError),
26
+ }
27
+
28
+ // Register the command line arguments
29
+ registerFlags(flags)
30
+
31
+ // Parse command line
32
+ if err := flags.Parse(args); err != nil {
33
+ return nil, err
34
+ }
35
+
36
+ // Return success
37
+ return flags, nil
38
+ }
39
+
40
+ ///////////////////////////////////////////////////////////////////////////////
41
+ // PUBLIC METHODS
42
+
43
+ func (flags *Flags) GetModel() string {
44
+ return flags.Lookup("model").Value.String()
45
+ }
46
+
47
+ func (flags *Flags) GetLanguage() string {
48
+ return flags.Lookup("language").Value.String()
49
+ }
50
+
51
+ func (flags *Flags) IsTranslate() bool {
52
+ return flags.Lookup("translate").Value.(flag.Getter).Get().(bool)
53
+ }
54
+
55
+ func (flags *Flags) GetOffset() time.Duration {
56
+ return flags.Lookup("offset").Value.(flag.Getter).Get().(time.Duration)
57
+ }
58
+
59
+ func (flags *Flags) GetDuration() time.Duration {
60
+ return flags.Lookup("duration").Value.(flag.Getter).Get().(time.Duration)
61
+ }
62
+
63
+ func (flags *Flags) GetThreads() uint {
64
+ return flags.Lookup("threads").Value.(flag.Getter).Get().(uint)
65
+ }
66
+
67
+ func (flags *Flags) GetOut() string {
68
+ return strings.ToLower(flags.Lookup("out").Value.String())
69
+ }
70
+
71
+ func (flags *Flags) IsTokens() bool {
72
+ return flags.Lookup("tokens").Value.String() == "true"
73
+ }
74
+
75
+ func (flags *Flags) IsColorize() bool {
76
+ return flags.Lookup("colorize").Value.String() == "true"
77
+ }
78
+
79
+ func (flags *Flags) GetMaxLen() uint {
80
+ return flags.Lookup("max-len").Value.(flag.Getter).Get().(uint)
81
+ }
82
+
83
+ func (flags *Flags) GetMaxTokens() uint {
84
+ return flags.Lookup("max-tokens").Value.(flag.Getter).Get().(uint)
85
+ }
86
+
87
+ func (flags *Flags) GetWordThreshold() float32 {
88
+ return float32(flags.Lookup("word-thold").Value.(flag.Getter).Get().(float64))
89
+ }
90
+
91
+ func (flags *Flags) SetParams(context whisper.Context) error {
92
+ if lang := flags.GetLanguage(); lang != "" && lang != "auto" {
93
+ fmt.Fprintf(flags.Output(), "Setting language to %q\n", lang)
94
+ if err := context.SetLanguage(lang); err != nil {
95
+ return err
96
+ }
97
+ }
98
+ if flags.IsTranslate() && context.IsMultilingual() {
99
+ fmt.Fprintf(flags.Output(), "Setting translate to true\n")
100
+ context.SetTranslate(true)
101
+ }
102
+ if offset := flags.GetOffset(); offset != 0 {
103
+ fmt.Fprintf(flags.Output(), "Setting offset to %v\n", offset)
104
+ context.SetOffset(offset)
105
+ }
106
+ if duration := flags.GetDuration(); duration != 0 {
107
+ fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
108
+ context.SetDuration(duration)
109
+ }
110
+ if threads := flags.GetThreads(); threads != 0 {
111
+ fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
112
+ context.SetThreads(threads)
113
+ }
114
+ if max_len := flags.GetMaxLen(); max_len != 0 {
115
+ fmt.Fprintf(flags.Output(), "Setting max_segment_length to %d\n", max_len)
116
+ context.SetMaxSegmentLength(max_len)
117
+ }
118
+ if max_tokens := flags.GetMaxTokens(); max_tokens != 0 {
119
+ fmt.Fprintf(flags.Output(), "Setting max_tokens to %d\n", max_tokens)
120
+ context.SetMaxTokensPerSegment(max_tokens)
121
+ }
122
+ if word_threshold := flags.GetWordThreshold(); word_threshold != 0 {
123
+ fmt.Fprintf(flags.Output(), "Setting word_threshold to %f\n", word_threshold)
124
+ context.SetTokenThreshold(word_threshold)
125
+ }
126
+
127
+ // Return success
128
+ return nil
129
+ }
130
+
131
+ ///////////////////////////////////////////////////////////////////////////////
132
+ // PRIVATE METHODS
133
+
134
+ func registerFlags(flag *Flags) {
135
+ flag.String("model", "", "Path to the model file")
136
+ flag.String("language", "", "Spoken language")
137
+ flag.Bool("translate", false, "Translate from source language to english")
138
+ flag.Duration("offset", 0, "Time offset")
139
+ flag.Duration("duration", 0, "Duration of audio to process")
140
+ flag.Uint("threads", 0, "Number of threads to use")
141
+ flag.Uint("max-len", 0, "Maximum segment length in characters")
142
+ flag.Uint("max-tokens", 0, "Maximum tokens per segment")
143
+ flag.Float64("word-thold", 0, "Maximum segment score")
144
+ flag.Bool("tokens", false, "Display tokens")
145
+ flag.Bool("colorize", false, "Colorize tokens")
146
+ flag.String("out", "", "Output format (srt, none or leave as empty string)")
147
+ }
whisper.cpp/bindings/go/examples/go-whisper/main.go ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "flag"
5
+ "fmt"
6
+ "os"
7
+ "path/filepath"
8
+
9
+ // Packages
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
+ )
12
+
13
+ func main() {
14
+ flags, err := NewFlags(filepath.Base(os.Args[0]), os.Args[1:])
15
+ if err == flag.ErrHelp {
16
+ os.Exit(0)
17
+ } else if err != nil {
18
+ fmt.Fprintln(os.Stderr, err)
19
+ os.Exit(1)
20
+ } else if flags.GetModel() == "" {
21
+ fmt.Fprintln(os.Stderr, "Use -model flag to specify which model file to use")
22
+ os.Exit(1)
23
+ } else if flags.NArg() == 0 {
24
+ fmt.Fprintln(os.Stderr, "No input files specified")
25
+ os.Exit(1)
26
+ }
27
+
28
+ // Load model
29
+ model, err := whisper.New(flags.GetModel())
30
+ if err != nil {
31
+ fmt.Fprintln(os.Stderr, err)
32
+ os.Exit(1)
33
+ }
34
+ defer model.Close()
35
+
36
+ // Process files
37
+ for _, filename := range flags.Args() {
38
+ if err := Process(model, filename, flags); err != nil {
39
+ fmt.Fprintln(os.Stderr, err)
40
+ continue
41
+ }
42
+ }
43
+ }
whisper.cpp/bindings/go/examples/go-whisper/process.go ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "os"
7
+ "time"
8
+
9
+ // Package imports
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
+ wav "github.com/go-audio/wav"
12
+ )
13
+
14
+ func Process(model whisper.Model, path string, flags *Flags) error {
15
+ var data []float32
16
+
17
+ // Create processing context
18
+ context, err := model.NewContext()
19
+ if err != nil {
20
+ return err
21
+ }
22
+
23
+ // Set the parameters
24
+ if err := flags.SetParams(context); err != nil {
25
+ return err
26
+ }
27
+
28
+ fmt.Printf("\n%s\n", context.SystemInfo())
29
+
30
+ // Open the file
31
+ fmt.Fprintf(flags.Output(), "Loading %q\n", path)
32
+ fh, err := os.Open(path)
33
+ if err != nil {
34
+ return err
35
+ }
36
+ defer fh.Close()
37
+
38
+ // Decode the WAV file - load the full buffer
39
+ dec := wav.NewDecoder(fh)
40
+ if buf, err := dec.FullPCMBuffer(); err != nil {
41
+ return err
42
+ } else if dec.SampleRate != whisper.SampleRate {
43
+ return fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
44
+ } else if dec.NumChans != 1 {
45
+ return fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
46
+ } else {
47
+ data = buf.AsFloat32Buffer().Data
48
+ }
49
+
50
+ // Segment callback when -tokens is specified
51
+ var cb whisper.SegmentCallback
52
+ if flags.IsTokens() {
53
+ cb = func(segment whisper.Segment) {
54
+ fmt.Fprintf(flags.Output(), "%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
55
+ for _, token := range segment.Tokens {
56
+ if flags.IsColorize() && context.IsText(token) {
57
+ fmt.Fprint(flags.Output(), Colorize(token.Text, int(token.P*24.0)), " ")
58
+ } else {
59
+ fmt.Fprint(flags.Output(), token.Text, " ")
60
+ }
61
+ }
62
+ fmt.Fprintln(flags.Output(), "")
63
+ fmt.Fprintln(flags.Output(), "")
64
+ }
65
+ }
66
+
67
+ // Process the data
68
+ fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
69
+ context.ResetTimings()
70
+ if err := context.Process(data, nil, cb, nil); err != nil {
71
+ return err
72
+ }
73
+
74
+ context.PrintTimings()
75
+
76
+ // Print out the results
77
+ switch {
78
+ case flags.GetOut() == "srt":
79
+ return OutputSRT(os.Stdout, context)
80
+ case flags.GetOut() == "none":
81
+ return nil
82
+ default:
83
+ return Output(os.Stdout, context, flags.IsColorize())
84
+ }
85
+ }
86
+
87
+ // Output text as SRT file
88
+ func OutputSRT(w io.Writer, context whisper.Context) error {
89
+ n := 1
90
+ for {
91
+ segment, err := context.NextSegment()
92
+ if err == io.EOF {
93
+ return nil
94
+ } else if err != nil {
95
+ return err
96
+ }
97
+ fmt.Fprintln(w, n)
98
+ fmt.Fprintln(w, srtTimestamp(segment.Start), " --> ", srtTimestamp(segment.End))
99
+ fmt.Fprintln(w, segment.Text)
100
+ fmt.Fprintln(w, "")
101
+ n++
102
+ }
103
+ }
104
+
105
+ // Output text to terminal
106
+ func Output(w io.Writer, context whisper.Context, colorize bool) error {
107
+ for {
108
+ segment, err := context.NextSegment()
109
+ if err == io.EOF {
110
+ return nil
111
+ } else if err != nil {
112
+ return err
113
+ }
114
+ fmt.Fprintf(w, "[%6s->%6s]", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
115
+ if colorize {
116
+ for _, token := range segment.Tokens {
117
+ if !context.IsText(token) {
118
+ continue
119
+ }
120
+ fmt.Fprint(w, " ", Colorize(token.Text, int(token.P*24.0)))
121
+ }
122
+ fmt.Fprint(w, "\n")
123
+ } else {
124
+ fmt.Fprintln(w, " ", segment.Text)
125
+ }
126
+ }
127
+ }
128
+
129
+ // Return srtTimestamp
130
+ func srtTimestamp(t time.Duration) string {
131
+ return fmt.Sprintf("%02d:%02d:%02d,%03d", t/time.Hour, (t%time.Hour)/time.Minute, (t%time.Minute)/time.Second, (t%time.Second)/time.Millisecond)
132
+ }
whisper.cpp/bindings/go/go.mod ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module github.com/ggerganov/whisper.cpp/bindings/go
2
+
3
+ go 1.23
4
+
5
+ require (
6
+ github.com/go-audio/wav v1.1.0
7
+ github.com/stretchr/testify v1.9.0
8
+ )
9
+
10
+ require (
11
+ github.com/davecgh/go-spew v1.1.1 // indirect
12
+ github.com/go-audio/audio v1.0.0 // indirect
13
+ github.com/go-audio/riff v1.0.0 // indirect
14
+ github.com/pmezard/go-difflib v1.0.0 // indirect
15
+ gopkg.in/yaml.v3 v3.0.1 // indirect
16
+ )
whisper.cpp/bindings/go/go.sum ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2
+ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+ github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
4
+ github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
5
+ github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
6
+ github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
7
+ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
8
+ github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
9
+ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
10
+ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
11
+ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
12
+ github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
13
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
14
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
15
+ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
16
+ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
whisper.cpp/bindings/go/params.go ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "fmt"
5
+ )
6
+
7
+ ///////////////////////////////////////////////////////////////////////////////
8
+ // CGO
9
+
10
+ /*
11
+ #include <whisper.h>
12
+ */
13
+ import "C"
14
+
15
+ ///////////////////////////////////////////////////////////////////////////////
16
+ // PUBLIC METHODS
17
+
18
+ func (p *Params) SetTranslate(v bool) {
19
+ p.translate = toBool(v)
20
+ }
21
+
22
+ func (p *Params) SetSplitOnWord(v bool) {
23
+ p.split_on_word = toBool(v)
24
+ }
25
+
26
+ func (p *Params) SetNoContext(v bool) {
27
+ p.no_context = toBool(v)
28
+ }
29
+
30
+ func (p *Params) SetSingleSegment(v bool) {
31
+ p.single_segment = toBool(v)
32
+ }
33
+
34
+ func (p *Params) SetPrintSpecial(v bool) {
35
+ p.print_special = toBool(v)
36
+ }
37
+
38
+ func (p *Params) SetPrintProgress(v bool) {
39
+ p.print_progress = toBool(v)
40
+ }
41
+
42
+ func (p *Params) SetPrintRealtime(v bool) {
43
+ p.print_realtime = toBool(v)
44
+ }
45
+
46
+ func (p *Params) SetPrintTimestamps(v bool) {
47
+ p.print_timestamps = toBool(v)
48
+ }
49
+
50
+ // Voice Activity Detection (VAD)
51
+ func (p *Params) SetVAD(v bool) {
52
+ p.vad = toBool(v)
53
+ }
54
+
55
+ func (p *Params) SetVADModelPath(path string) {
56
+ p.vad_model_path = C.CString(path)
57
+ }
58
+
59
+ func (p *Params) SetVADThreshold(t float32) {
60
+ p.vad_params.threshold = C.float(t)
61
+ }
62
+
63
+ func (p *Params) SetVADMinSpeechMs(ms int) {
64
+ p.vad_params.min_speech_duration_ms = C.int(ms)
65
+ }
66
+
67
+ func (p *Params) SetVADMinSilenceMs(ms int) {
68
+ p.vad_params.min_silence_duration_ms = C.int(ms)
69
+ }
70
+
71
+ func (p *Params) SetVADMaxSpeechSec(s float32) {
72
+ p.vad_params.max_speech_duration_s = C.float(s)
73
+ }
74
+
75
+ func (p *Params) SetVADSpeechPadMs(ms int) {
76
+ p.vad_params.speech_pad_ms = C.int(ms)
77
+ }
78
+
79
+ func (p *Params) SetVADSamplesOverlap(sec float32) {
80
+ p.vad_params.samples_overlap = C.float(sec)
81
+ }
82
+
83
+ // Set language id
84
+ func (p *Params) SetLanguage(lang int) error {
85
+ if lang == -1 {
86
+ p.language = nil
87
+ return nil
88
+ }
89
+ str := C.whisper_lang_str(C.int(lang))
90
+ if str == nil {
91
+ return ErrInvalidLanguage
92
+ } else {
93
+ p.language = str
94
+ }
95
+ return nil
96
+ }
97
+
98
+ // Get language id
99
+ func (p *Params) Language() int {
100
+ if p.language == nil {
101
+ return -1
102
+ }
103
+ return int(C.whisper_lang_id(p.language))
104
+ }
105
+
106
+ // Threads available
107
+ func (p *Params) Threads() int {
108
+ return int(p.n_threads)
109
+ }
110
+
111
+ // Set number of threads to use
112
+ func (p *Params) SetThreads(threads int) {
113
+ p.n_threads = C.int(threads)
114
+ }
115
+
116
+ // Set start offset in ms
117
+ func (p *Params) SetOffset(offset_ms int) {
118
+ p.offset_ms = C.int(offset_ms)
119
+ }
120
+
121
+ // Set audio duration to process in ms
122
+ func (p *Params) SetDuration(duration_ms int) {
123
+ p.duration_ms = C.int(duration_ms)
124
+ }
125
+
126
+ // Set timestamp token probability threshold (~0.01)
127
+ func (p *Params) SetTokenThreshold(t float32) {
128
+ p.thold_pt = C.float(t)
129
+ }
130
+
131
+ // Set timestamp token sum probability threshold (~0.01)
132
+ func (p *Params) SetTokenSumThreshold(t float32) {
133
+ p.thold_ptsum = C.float(t)
134
+ }
135
+
136
+ // Set max segment length in characters
137
+ func (p *Params) SetMaxSegmentLength(n int) {
138
+ p.max_len = C.int(n)
139
+ }
140
+
141
+ func (p *Params) SetTokenTimestamps(b bool) {
142
+ p.token_timestamps = toBool(b)
143
+ }
144
+
145
+ // Set max tokens per segment (0 = no limit)
146
+ func (p *Params) SetMaxTokensPerSegment(n int) {
147
+ p.max_tokens = C.int(n)
148
+ }
149
+
150
+ // Set audio encoder context
151
+ func (p *Params) SetAudioCtx(n int) {
152
+ p.audio_ctx = C.int(n)
153
+ }
154
+
155
+ func (p *Params) SetMaxContext(n int) {
156
+ p.n_max_text_ctx = C.int(n)
157
+ }
158
+
159
+ func (p *Params) SetBeamSize(n int) {
160
+ p.beam_search.beam_size = C.int(n)
161
+ }
162
+
163
+ func (p *Params) SetEntropyThold(t float32) {
164
+ p.entropy_thold = C.float(t)
165
+ }
166
+
167
+ func (p *Params) SetTemperature(t float32) {
168
+ p.temperature = C.float(t)
169
+ }
170
+
171
+ // Sets the fallback temperature incrementation
172
+ // Pass -1.0 to disable this feature
173
+ func (p *Params) SetTemperatureFallback(t float32) {
174
+ p.temperature_inc = C.float(t)
175
+ }
176
+
177
+ // Set initial prompt
178
+ func (p *Params) SetInitialPrompt(prompt string) {
179
+ p.initial_prompt = C.CString(prompt)
180
+ }
181
+
182
+ func (p *Params) SetCarryInitialPrompt(v bool) {
183
+ p.carry_initial_prompt = toBool(v)
184
+ }
185
+
186
+ ///////////////////////////////////////////////////////////////////////////////
187
+ // PRIVATE METHODS
188
+
189
+ func toBool(v bool) C.bool {
190
+ if v {
191
+ return C.bool(true)
192
+ }
193
+ return C.bool(false)
194
+ }
195
+
196
+ ///////////////////////////////////////////////////////////////////////////////
197
+ // STRINGIFY
198
+
199
+ func (p *Params) String() string {
200
+ str := "<whisper.params"
201
+ str += fmt.Sprintf(" strategy=%v", p.strategy)
202
+ str += fmt.Sprintf(" n_threads=%d", p.n_threads)
203
+ if p.language != nil {
204
+ str += fmt.Sprintf(" language=%s", C.GoString(p.language))
205
+ }
206
+ str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
207
+ str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
208
+ str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
209
+ str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
210
+ str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
211
+ str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
212
+ str += fmt.Sprintf(" temperature=%f", p.temperature)
213
+ str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
214
+ str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
215
+ if p.translate {
216
+ str += " translate"
217
+ }
218
+ if p.no_context {
219
+ str += " no_context"
220
+ }
221
+ if p.single_segment {
222
+ str += " single_segment"
223
+ }
224
+ if p.print_special {
225
+ str += " print_special"
226
+ }
227
+ if p.print_progress {
228
+ str += " print_progress"
229
+ }
230
+ if p.print_realtime {
231
+ str += " print_realtime"
232
+ }
233
+ if p.print_timestamps {
234
+ str += " print_timestamps"
235
+ }
236
+ if p.token_timestamps {
237
+ str += " token_timestamps"
238
+ }
239
+ if p.carry_initial_prompt {
240
+ str += " carry_initial_prompt"
241
+ }
242
+
243
+ return str + ">"
244
+ }
whisper.cpp/bindings/go/pkg/whisper/consts.go ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "errors"
5
+
6
+ // Bindings
7
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
8
+ )
9
+
10
+ ///////////////////////////////////////////////////////////////////////////////
11
+ // ERRORS
12
+
13
+ var (
14
+ ErrUnableToLoadModel = errors.New("unable to load model")
15
+ ErrInternalAppError = errors.New("internal application error")
16
+ ErrProcessingFailed = errors.New("processing failed")
17
+ ErrUnsupportedLanguage = errors.New("unsupported language")
18
+ ErrModelNotMultilingual = errors.New("model is not multilingual")
19
+ )
20
+
21
+ ///////////////////////////////////////////////////////////////////////////////
22
+ // CONSTANTS
23
+
24
+ // SampleRate is the sample rate of the audio data.
25
+ const SampleRate = whisper.SampleRate
26
+
27
+ // SampleBits is the number of bytes per sample.
28
+ const SampleBits = whisper.SampleBits
whisper.cpp/bindings/go/pkg/whisper/context.go ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "runtime"
7
+ "strings"
8
+ "time"
9
+
10
+ // Bindings
11
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
12
+ )
13
+
14
+ ///////////////////////////////////////////////////////////////////////////////
15
+ // TYPES
16
+
17
+ type context struct {
18
+ n int
19
+ model *model
20
+ params whisper.Params
21
+ }
22
+
23
+ // Make sure context adheres to the interface
24
+ var _ Context = (*context)(nil)
25
+
26
+ ///////////////////////////////////////////////////////////////////////////////
27
+ // LIFECYCLE
28
+
29
+ func newContext(model *model, params whisper.Params) (Context, error) {
30
+ context := new(context)
31
+ context.model = model
32
+ context.params = params
33
+
34
+ // Return success
35
+ return context, nil
36
+ }
37
+
38
+ ///////////////////////////////////////////////////////////////////////////////
39
+ // PUBLIC METHODS
40
+
41
+ // Set the language to use for speech recognition.
42
+ func (context *context) SetLanguage(lang string) error {
43
+ if context.model.ctx == nil {
44
+ return ErrInternalAppError
45
+ }
46
+ if !context.model.IsMultilingual() {
47
+ return ErrModelNotMultilingual
48
+ }
49
+
50
+ if lang == "auto" {
51
+ context.params.SetLanguage(-1)
52
+ } else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
53
+ return ErrUnsupportedLanguage
54
+ } else if err := context.params.SetLanguage(id); err != nil {
55
+ return err
56
+ }
57
+ // Return success
58
+ return nil
59
+ }
60
+
61
+ func (context *context) IsMultilingual() bool {
62
+ return context.model.IsMultilingual()
63
+ }
64
+
65
+ // Get language
66
+ func (context *context) Language() string {
67
+ id := context.params.Language()
68
+ if id == -1 {
69
+ return "auto"
70
+ }
71
+ return whisper.Whisper_lang_str(context.params.Language())
72
+ }
73
+
74
+ func (context *context) DetectedLanguage() string {
75
+ return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id())
76
+ }
77
+
78
+ // Set translate flag
79
+ func (context *context) SetTranslate(v bool) {
80
+ context.params.SetTranslate(v)
81
+ }
82
+
83
+ // Voice Activity Detection (VAD)
84
+ func (context *context) SetVAD(v bool) {
85
+ context.params.SetVAD(v)
86
+ }
87
+
88
+ func (context *context) SetVADModelPath(path string) {
89
+ context.params.SetVADModelPath(path)
90
+ }
91
+
92
+ func (context *context) SetVADThreshold(t float32) {
93
+ context.params.SetVADThreshold(t)
94
+ }
95
+
96
+ func (context *context) SetVADMinSpeechMs(ms int) {
97
+ context.params.SetVADMinSpeechMs(ms)
98
+ }
99
+
100
+ func (context *context) SetVADMinSilenceMs(ms int) {
101
+ context.params.SetVADMinSilenceMs(ms)
102
+ }
103
+
104
+ func (context *context) SetVADMaxSpeechSec(s float32) {
105
+ context.params.SetVADMaxSpeechSec(s)
106
+ }
107
+
108
+ func (context *context) SetVADSpeechPadMs(ms int) {
109
+ context.params.SetVADSpeechPadMs(ms)
110
+ }
111
+
112
+ func (context *context) SetVADSamplesOverlap(sec float32) {
113
+ context.params.SetVADSamplesOverlap(sec)
114
+ }
115
+
116
+ func (context *context) SetSplitOnWord(v bool) {
117
+ context.params.SetSplitOnWord(v)
118
+ }
119
+
120
+ // Set number of threads to use
121
+ func (context *context) SetThreads(v uint) {
122
+ context.params.SetThreads(int(v))
123
+ }
124
+
125
+ // Set time offset
126
+ func (context *context) SetOffset(v time.Duration) {
127
+ context.params.SetOffset(int(v.Milliseconds()))
128
+ }
129
+
130
+ // Set duration of audio to process
131
+ func (context *context) SetDuration(v time.Duration) {
132
+ context.params.SetDuration(int(v.Milliseconds()))
133
+ }
134
+
135
+ // Set timestamp token probability threshold (~0.01)
136
+ func (context *context) SetTokenThreshold(t float32) {
137
+ context.params.SetTokenThreshold(t)
138
+ }
139
+
140
+ // Set timestamp token sum probability threshold (~0.01)
141
+ func (context *context) SetTokenSumThreshold(t float32) {
142
+ context.params.SetTokenSumThreshold(t)
143
+ }
144
+
145
+ // Set max segment length in characters
146
+ func (context *context) SetMaxSegmentLength(n uint) {
147
+ context.params.SetMaxSegmentLength(int(n))
148
+ }
149
+
150
+ // Set token timestamps flag
151
+ func (context *context) SetTokenTimestamps(b bool) {
152
+ context.params.SetTokenTimestamps(b)
153
+ }
154
+
155
+ // Set max tokens per segment (0 = no limit)
156
+ func (context *context) SetMaxTokensPerSegment(n uint) {
157
+ context.params.SetMaxTokensPerSegment(int(n))
158
+ }
159
+
160
+ // Set audio encoder context
161
+ func (context *context) SetAudioCtx(n uint) {
162
+ context.params.SetAudioCtx(int(n))
163
+ }
164
+
165
+ // Set maximum number of text context tokens to store
166
+ func (context *context) SetMaxContext(n int) {
167
+ context.params.SetMaxContext(n)
168
+ }
169
+
170
+ // Set Beam Size
171
+ func (context *context) SetBeamSize(n int) {
172
+ context.params.SetBeamSize(n)
173
+ }
174
+
175
+ // Set Entropy threshold
176
+ func (context *context) SetEntropyThold(t float32) {
177
+ context.params.SetEntropyThold(t)
178
+ }
179
+
180
+ // Set Temperature
181
+ func (context *context) SetTemperature(t float32) {
182
+ context.params.SetTemperature(t)
183
+ }
184
+
185
+ // Set the fallback temperature incrementation
186
+ // Pass -1.0 to disable this feature
187
+ func (context *context) SetTemperatureFallback(t float32) {
188
+ context.params.SetTemperatureFallback(t)
189
+ }
190
+
191
+ // Set initial prompt
192
+ func (context *context) SetInitialPrompt(prompt string) {
193
+ context.params.SetInitialPrompt(prompt)
194
+ }
195
+
196
+ // ResetTimings resets the mode timings. Should be called before processing
197
+ func (context *context) ResetTimings() {
198
+ context.model.ctx.Whisper_reset_timings()
199
+ }
200
+
201
+ // PrintTimings prints the model timings to stdout.
202
+ func (context *context) PrintTimings() {
203
+ context.model.ctx.Whisper_print_timings()
204
+ }
205
+
206
+ // SystemInfo returns the system information
207
+ func (context *context) SystemInfo() string {
208
+ return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n",
209
+ context.params.Threads(),
210
+ runtime.NumCPU(),
211
+ whisper.Whisper_print_system_info(),
212
+ )
213
+ }
214
+
215
+ // Use mel data at offset_ms to try and auto-detect the spoken language
216
+ // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
217
+ // Returns the probabilities of all languages.
218
+ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) {
219
+ langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads)
220
+ if err != nil {
221
+ return nil, err
222
+ }
223
+ return langProbs, nil
224
+ }
225
+
226
+ // Process new sample data and return any errors
227
+ func (context *context) Process(
228
+ data []float32,
229
+ callEncoderBegin EncoderBeginCallback,
230
+ callNewSegment SegmentCallback,
231
+ callProgress ProgressCallback,
232
+ ) error {
233
+ if context.model.ctx == nil {
234
+ return ErrInternalAppError
235
+ }
236
+ // If the callback is defined then we force on single_segment mode
237
+ if callNewSegment != nil {
238
+ context.params.SetSingleSegment(true)
239
+ }
240
+
241
+ // We don't do parallel processing at the moment
242
+ processors := 0
243
+ if processors > 1 {
244
+ if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin,
245
+ func(new int) {
246
+ if callNewSegment != nil {
247
+ num_segments := context.model.ctx.Whisper_full_n_segments()
248
+ s0 := num_segments - new
249
+ for i := s0; i < num_segments; i++ {
250
+ callNewSegment(toSegment(context.model.ctx, i))
251
+ }
252
+ }
253
+ }); err != nil {
254
+ return err
255
+ }
256
+ } else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin,
257
+ func(new int) {
258
+ if callNewSegment != nil {
259
+ num_segments := context.model.ctx.Whisper_full_n_segments()
260
+ s0 := num_segments - new
261
+ for i := s0; i < num_segments; i++ {
262
+ callNewSegment(toSegment(context.model.ctx, i))
263
+ }
264
+ }
265
+ }, func(progress int) {
266
+ if callProgress != nil {
267
+ callProgress(progress)
268
+ }
269
+ }); err != nil {
270
+ return err
271
+ }
272
+
273
+ // Reset n so that more Segments can be available within NextSegment call
274
+ context.n = 0
275
+
276
+ // Return success
277
+ return nil
278
+ }
279
+
280
+ // Return the next segment of tokens
281
+ func (context *context) NextSegment() (Segment, error) {
282
+ if context.model.ctx == nil {
283
+ return Segment{}, ErrInternalAppError
284
+ }
285
+ if context.n >= context.model.ctx.Whisper_full_n_segments() {
286
+ return Segment{}, io.EOF
287
+ }
288
+
289
+ // Populate result
290
+ result := toSegment(context.model.ctx, context.n)
291
+
292
+ // Increment the cursor
293
+ context.n++
294
+
295
+ // Return success
296
+ return result, nil
297
+ }
298
+
299
+ // Test for text tokens
300
+ func (context *context) IsText(t Token) bool {
301
+ switch {
302
+ case context.IsBEG(t):
303
+ return false
304
+ case context.IsSOT(t):
305
+ return false
306
+ case whisper.Token(t.Id) >= context.model.ctx.Whisper_token_eot():
307
+ return false
308
+ case context.IsPREV(t):
309
+ return false
310
+ case context.IsSOLM(t):
311
+ return false
312
+ case context.IsNOT(t):
313
+ return false
314
+ default:
315
+ return true
316
+ }
317
+ }
318
+
319
+ // Test for "begin" token
320
+ func (context *context) IsBEG(t Token) bool {
321
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg()
322
+ }
323
+
324
+ // Test for "start of transcription" token
325
+ func (context *context) IsSOT(t Token) bool {
326
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot()
327
+ }
328
+
329
+ // Test for "end of transcription" token
330
+ func (context *context) IsEOT(t Token) bool {
331
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot()
332
+ }
333
+
334
+ // Test for "start of prev" token
335
+ func (context *context) IsPREV(t Token) bool {
336
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev()
337
+ }
338
+
339
+ // Test for "start of lm" token
340
+ func (context *context) IsSOLM(t Token) bool {
341
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm()
342
+ }
343
+
344
+ // Test for "No timestamps" token
345
+ func (context *context) IsNOT(t Token) bool {
346
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not()
347
+ }
348
+
349
+ // Test for token associated with a specific language
350
+ func (context *context) IsLANG(t Token, lang string) bool {
351
+ if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 {
352
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id)
353
+ } else {
354
+ return false
355
+ }
356
+ }
357
+
358
+ ///////////////////////////////////////////////////////////////////////////////
359
+ // PRIVATE METHODS
360
+
361
+ func toSegment(ctx *whisper.Context, n int) Segment {
362
+ return Segment{
363
+ Num: n,
364
+ Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)),
365
+ Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10,
366
+ End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10,
367
+ Tokens: toTokens(ctx, n),
368
+ }
369
+ }
370
+
371
+ func toTokens(ctx *whisper.Context, n int) []Token {
372
+ result := make([]Token, ctx.Whisper_full_n_tokens(n))
373
+ for i := 0; i < len(result); i++ {
374
+ data := ctx.Whisper_full_get_token_data(n, i)
375
+
376
+ result[i] = Token{
377
+ Id: int(ctx.Whisper_full_get_token_id(n, i)),
378
+ Text: ctx.Whisper_full_get_token_text(n, i),
379
+ P: ctx.Whisper_full_get_token_p(n, i),
380
+ Start: time.Duration(data.T0()) * time.Millisecond * 10,
381
+ End: time.Duration(data.T1()) * time.Millisecond * 10,
382
+ }
383
+ }
384
+ return result
385
+ }
whisper.cpp/bindings/go/pkg/whisper/context_test.go ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper_test
2
+
3
+ import (
4
+ "os"
5
+ "testing"
6
+
7
+ "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
8
+ "github.com/go-audio/wav"
9
+ assert "github.com/stretchr/testify/assert"
10
+ )
11
+
12
+ func TestSetLanguage(t *testing.T) {
13
+ assert := assert.New(t)
14
+
15
+ model, err := whisper.New(ModelPath)
16
+ assert.NoError(err)
17
+ assert.NotNil(model)
18
+ defer model.Close()
19
+
20
+ context, err := model.NewContext()
21
+ assert.NoError(err)
22
+
23
+ // This returns an error since
24
+ // the model 'models/ggml-small.en.bin'
25
+ // that is loaded is not multilingual
26
+ err = context.SetLanguage("en")
27
+ assert.Error(err)
28
+ }
29
+
30
+ func TestContextModelIsMultilingual(t *testing.T) {
31
+ assert := assert.New(t)
32
+
33
+ model, err := whisper.New(ModelPath)
34
+ assert.NoError(err)
35
+ assert.NotNil(model)
36
+ defer model.Close()
37
+
38
+ context, err := model.NewContext()
39
+ assert.NoError(err)
40
+
41
+ isMultilingual := context.IsMultilingual()
42
+
43
+ // This returns false since
44
+ // the model 'models/ggml-small.en.bin'
45
+ // that is loaded is not multilingual
46
+ assert.False(isMultilingual)
47
+ }
48
+
49
+ func TestLanguage(t *testing.T) {
50
+ assert := assert.New(t)
51
+
52
+ model, err := whisper.New(ModelPath)
53
+ assert.NoError(err)
54
+ assert.NotNil(model)
55
+ defer model.Close()
56
+
57
+ context, err := model.NewContext()
58
+ assert.NoError(err)
59
+
60
+ // This always returns en since
61
+ // the model 'models/ggml-small.en.bin'
62
+ // that is loaded is not multilingual
63
+ expectedLanguage := "en"
64
+ actualLanguage := context.Language()
65
+ assert.Equal(expectedLanguage, actualLanguage)
66
+ }
67
+
68
+ func TestProcess(t *testing.T) {
69
+ assert := assert.New(t)
70
+
71
+ fh, err := os.Open(SamplePath)
72
+ assert.NoError(err)
73
+ defer fh.Close()
74
+
75
+ // Decode the WAV file - load the full buffer
76
+ dec := wav.NewDecoder(fh)
77
+ buf, err := dec.FullPCMBuffer()
78
+ assert.NoError(err)
79
+ assert.Equal(uint16(1), dec.NumChans)
80
+
81
+ data := buf.AsFloat32Buffer().Data
82
+
83
+ model, err := whisper.New(ModelPath)
84
+ assert.NoError(err)
85
+ assert.NotNil(model)
86
+ defer model.Close()
87
+
88
+ context, err := model.NewContext()
89
+ assert.NoError(err)
90
+
91
+ err = context.Process(data, nil, nil, nil)
92
+ assert.NoError(err)
93
+ }
94
+
95
+ func TestDetectedLanguage(t *testing.T) {
96
+ assert := assert.New(t)
97
+
98
+ fh, err := os.Open(SamplePath)
99
+ assert.NoError(err)
100
+ defer fh.Close()
101
+
102
+ // Decode the WAV file - load the full buffer
103
+ dec := wav.NewDecoder(fh)
104
+ buf, err := dec.FullPCMBuffer()
105
+ assert.NoError(err)
106
+ assert.Equal(uint16(1), dec.NumChans)
107
+
108
+ data := buf.AsFloat32Buffer().Data
109
+
110
+ model, err := whisper.New(ModelPath)
111
+ assert.NoError(err)
112
+ assert.NotNil(model)
113
+ defer model.Close()
114
+
115
+ context, err := model.NewContext()
116
+ assert.NoError(err)
117
+
118
+ err = context.Process(data, nil, nil, nil)
119
+ assert.NoError(err)
120
+
121
+ expectedLanguage := "en"
122
+ actualLanguage := context.DetectedLanguage()
123
+ assert.Equal(expectedLanguage, actualLanguage)
124
+ }
whisper.cpp/bindings/go/pkg/whisper/doc.go ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /*
2
+ This is the higher-level speech-to-text whisper.cpp API for go
3
+ */
4
+ package whisper
whisper.cpp/bindings/go/pkg/whisper/interface.go ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "io"
5
+ "time"
6
+ )
7
+
8
+ ///////////////////////////////////////////////////////////////////////////////
9
+ // TYPES
10
+
11
+ // SegmentCallback is the callback function for processing segments in real
12
+ // time. It is called during the Process function
13
+ type SegmentCallback func(Segment)
14
+
15
+ // ProgressCallback is the callback function for reporting progress during
16
+ // processing. It is called during the Process function
17
+ type ProgressCallback func(int)
18
+
19
+ // EncoderBeginCallback is the callback function for checking if we want to
20
+ // continue processing. It is called during the Process function
21
+ type EncoderBeginCallback func() bool
22
+
23
+ // Model is the interface to a whisper model. Create a new model with the
24
+ // function whisper.New(string)
25
+ type Model interface {
26
+ io.Closer
27
+
28
+ // Return a new speech-to-text context.
29
+ NewContext() (Context, error)
30
+
31
+ // Return true if the model is multilingual.
32
+ IsMultilingual() bool
33
+
34
+ // Return all languages supported.
35
+ Languages() []string
36
+ }
37
+
38
+ // Context is the speech recognition context.
39
+ type Context interface {
40
+ SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
41
+ SetTranslate(bool) // Set translate flag
42
+ IsMultilingual() bool // Return true if the model is multilingual.
43
+ Language() string // Get language
44
+ DetectedLanguage() string // Get detected language
45
+
46
+ SetOffset(time.Duration) // Set offset
47
+ SetDuration(time.Duration) // Set duration
48
+ SetThreads(uint) // Set number of threads to use
49
+ SetSplitOnWord(bool) // Set split on word flag
50
+ SetTokenThreshold(float32) // Set timestamp token probability threshold
51
+ SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
52
+ SetMaxSegmentLength(uint) // Set max segment length in characters
53
+ SetTokenTimestamps(bool) // Set token timestamps flag
54
+ SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
55
+ SetAudioCtx(uint) // Set audio encoder context
56
+ SetMaxContext(n int) // Set maximum number of text context tokens to store
57
+ SetBeamSize(n int) // Set Beam Size
58
+ SetEntropyThold(t float32) // Set Entropy threshold
59
+ SetInitialPrompt(prompt string) // Set initial prompt
60
+ SetTemperature(t float32) // Set temperature
61
+ SetTemperatureFallback(t float32) // Set temperature incrementation
62
+
63
+ SetVAD(v bool)
64
+ SetVADModelPath(path string)
65
+ SetVADThreshold(t float32)
66
+ SetVADMinSpeechMs(ms int)
67
+ SetVADMinSilenceMs(ms int)
68
+ SetVADMaxSpeechSec(s float32)
69
+ SetVADSpeechPadMs(ms int)
70
+ SetVADSamplesOverlap(sec float32)
71
+
72
+ // Process mono audio data and return any errors.
73
+ // If defined, newly generated segments are passed to the
74
+ // callback function during processing.
75
+ Process([]float32, EncoderBeginCallback, SegmentCallback, ProgressCallback) error
76
+
77
+ // After process is called, return segments until the end of the stream
78
+ // is reached, when io.EOF is returned.
79
+ NextSegment() (Segment, error)
80
+
81
+ IsBEG(Token) bool // Test for "begin" token
82
+ IsSOT(Token) bool // Test for "start of transcription" token
83
+ IsEOT(Token) bool // Test for "end of transcription" token
84
+ IsPREV(Token) bool // Test for "start of prev" token
85
+ IsSOLM(Token) bool // Test for "start of lm" token
86
+ IsNOT(Token) bool // Test for "No timestamps" token
87
+ IsLANG(Token, string) bool // Test for token associated with a specific language
88
+ IsText(Token) bool // Test for text token
89
+
90
+ // Timings
91
+ PrintTimings()
92
+ ResetTimings()
93
+
94
+ SystemInfo() string
95
+ }
96
+
97
+ // Segment is the text result of a speech recognition.
98
+ type Segment struct {
99
+ // Segment Number
100
+ Num int
101
+
102
+ // Time beginning and end timestamps for the segment.
103
+ Start, End time.Duration
104
+
105
+ // The text of the segment.
106
+ Text string
107
+
108
+ // The tokens of the segment.
109
+ Tokens []Token
110
+ }
111
+
112
+ // Token is a text or special token
113
+ type Token struct {
114
+ Id int
115
+ Text string
116
+ P float32
117
+ Start, End time.Duration
118
+ }
whisper.cpp/bindings/go/pkg/whisper/model.go ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "fmt"
5
+ "os"
6
+ "runtime"
7
+
8
+ // Bindings
9
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
10
+ )
11
+
12
+ ///////////////////////////////////////////////////////////////////////////////
13
+ // TYPES
14
+
15
+ type model struct {
16
+ path string
17
+ ctx *whisper.Context
18
+ }
19
+
20
+ // Make sure model adheres to the interface
21
+ var _ Model = (*model)(nil)
22
+
23
+ ///////////////////////////////////////////////////////////////////////////////
24
+ // LIFECYCLE
25
+
26
+ func New(path string) (Model, error) {
27
+ model := new(model)
28
+ if _, err := os.Stat(path); err != nil {
29
+ return nil, err
30
+ } else if ctx := whisper.Whisper_init(path); ctx == nil {
31
+ return nil, ErrUnableToLoadModel
32
+ } else {
33
+ model.ctx = ctx
34
+ model.path = path
35
+ }
36
+
37
+ // Return success
38
+ return model, nil
39
+ }
40
+
41
+ func (model *model) Close() error {
42
+ if model.ctx != nil {
43
+ model.ctx.Whisper_free()
44
+ }
45
+
46
+ // Release resources
47
+ model.ctx = nil
48
+
49
+ // Return success
50
+ return nil
51
+ }
52
+
53
+ ///////////////////////////////////////////////////////////////////////////////
54
+ // STRINGIFY
55
+
56
+ func (model *model) String() string {
57
+ str := "<whisper.model"
58
+ if model.ctx != nil {
59
+ str += fmt.Sprintf(" model=%q", model.path)
60
+ }
61
+ return str + ">"
62
+ }
63
+
64
+ ///////////////////////////////////////////////////////////////////////////////
65
+ // PUBLIC METHODS
66
+
67
+ // Return true if model is multilingual (language and translation options are supported)
68
+ func (model *model) IsMultilingual() bool {
69
+ return model.ctx.Whisper_is_multilingual() != 0
70
+ }
71
+
72
+ // Return all recognized languages. Initially it is set to auto-detect
73
+ func (model *model) Languages() []string {
74
+ result := make([]string, 0, whisper.Whisper_lang_max_id())
75
+ for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
76
+ str := whisper.Whisper_lang_str(i)
77
+ if model.ctx.Whisper_lang_id(str) >= 0 {
78
+ result = append(result, str)
79
+ }
80
+ }
81
+ return result
82
+ }
83
+
84
+ func (model *model) NewContext() (Context, error) {
85
+ if model.ctx == nil {
86
+ return nil, ErrInternalAppError
87
+ }
88
+
89
+ // Create new context
90
+ params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
91
+ params.SetTranslate(false)
92
+ params.SetPrintSpecial(false)
93
+ params.SetPrintProgress(false)
94
+ params.SetPrintRealtime(false)
95
+ params.SetPrintTimestamps(false)
96
+ params.SetThreads(runtime.NumCPU())
97
+ params.SetNoContext(true)
98
+
99
+ // Return new context
100
+ return newContext(model, params)
101
+ }
whisper.cpp/bindings/go/pkg/whisper/model_test.go ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper_test
2
+
3
+ import (
4
+ "testing"
5
+
6
+ "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
7
+ assert "github.com/stretchr/testify/assert"
8
+ )
9
+
10
+ func TestNew(t *testing.T) {
11
+ assert := assert.New(t)
12
+ t.Run("valid model path", func(t *testing.T) {
13
+ model, err := whisper.New(ModelPath)
14
+ assert.NoError(err)
15
+ assert.NotNil(model)
16
+ defer model.Close()
17
+
18
+ })
19
+
20
+ t.Run("invalid model path", func(t *testing.T) {
21
+ invalidModelPath := "invalid-model-path.bin"
22
+ model, err := whisper.New(invalidModelPath)
23
+ assert.Error(err)
24
+ assert.Nil(model)
25
+ })
26
+ }
27
+
28
+ func TestClose(t *testing.T) {
29
+ assert := assert.New(t)
30
+
31
+ model, err := whisper.New(ModelPath)
32
+ assert.NoError(err)
33
+ assert.NotNil(model)
34
+
35
+ err = model.Close()
36
+ assert.NoError(err)
37
+ }
38
+
39
+ func TestNewContext(t *testing.T) {
40
+ assert := assert.New(t)
41
+
42
+ model, err := whisper.New(ModelPath)
43
+ assert.NoError(err)
44
+ assert.NotNil(model)
45
+ defer model.Close()
46
+
47
+ context, err := model.NewContext()
48
+ assert.NoError(err)
49
+ assert.NotNil(context)
50
+ }
51
+
52
+ func TestIsMultilingual(t *testing.T) {
53
+ assert := assert.New(t)
54
+
55
+ model, err := whisper.New(ModelPath)
56
+ assert.NoError(err)
57
+ assert.NotNil(model)
58
+ defer model.Close()
59
+
60
+ isMultilingual := model.IsMultilingual()
61
+
62
+ // This returns false since
63
+ // the model 'models/ggml-small.en.bin'
64
+ // that is loaded is not multilingual
65
+ assert.False(isMultilingual)
66
+ }
67
+
68
+ func TestLanguages(t *testing.T) {
69
+ assert := assert.New(t)
70
+
71
+ model, err := whisper.New(ModelPath)
72
+ assert.NoError(err)
73
+ assert.NotNil(model)
74
+ defer model.Close()
75
+
76
+ expectedLanguages := []string{
77
+ "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
78
+ "ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
79
+ "el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
80
+ "bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
81
+ "sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
82
+ "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
83
+ "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
84
+ "lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
85
+ "bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
86
+ }
87
+
88
+ actualLanguages := model.Languages()
89
+
90
+ assert.Equal(expectedLanguages, actualLanguages)
91
+ }
whisper.cpp/bindings/go/pkg/whisper/util_test.go ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ package whisper_test
2
+
3
+ const (
4
+ ModelPath = "../../models/ggml-small.en.bin"
5
+ SamplePath = "../../samples/jfk.wav"
6
+ )
whisper.cpp/bindings/go/whisper.go ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "errors"
5
+ "unsafe"
6
+ )
7
+
8
+ ///////////////////////////////////////////////////////////////////////////////
9
+ // CGO
10
+
11
+ /*
12
+ #cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++
13
+ #cgo linux LDFLAGS: -fopenmp
14
+ #cgo darwin LDFLAGS: -lggml-metal -lggml-blas
15
+ #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
16
+ #include <whisper.h>
17
+ #include <stdlib.h>
18
+
19
+ extern void callNewSegment(void* user_data, int new);
20
+ extern void callProgress(void* user_data, int progress);
21
+ extern bool callEncoderBegin(void* user_data);
22
+
23
+ // Text segment callback
24
+ // Called on every newly generated text segment
25
+ // Use the whisper_full_...() functions to obtain the text segments
26
+ static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_state* state, int n_new, void* user_data) {
27
+ if(user_data != NULL && ctx != NULL) {
28
+ callNewSegment(user_data, n_new);
29
+ }
30
+ }
31
+
32
+ // Progress callback
33
+ // Called on every newly generated text segment
34
+ // Use the whisper_full_...() functions to obtain the text segments
35
+ static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
36
+ if(user_data != NULL && ctx != NULL) {
37
+ callProgress(user_data, progress);
38
+ }
39
+ }
40
+
41
+ // Encoder begin callback
42
+ // If not NULL, called before the encoder starts
43
+ // If it returns false, the computation is aborted
44
+ static bool whisper_encoder_begin_cb(struct whisper_context* ctx, struct whisper_state* state, void* user_data) {
45
+ if(user_data != NULL && ctx != NULL) {
46
+ return callEncoderBegin(user_data);
47
+ }
48
+ return false;
49
+ }
50
+
51
+ // Get default parameters and set callbacks
52
+ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_context* ctx, enum whisper_sampling_strategy strategy) {
53
+ struct whisper_full_params params = whisper_full_default_params(strategy);
54
+ params.new_segment_callback = whisper_new_segment_cb;
55
+ params.new_segment_callback_user_data = (void*)(ctx);
56
+ params.encoder_begin_callback = whisper_encoder_begin_cb;
57
+ params.encoder_begin_callback_user_data = (void*)(ctx);
58
+ params.progress_callback = whisper_progress_cb;
59
+ params.progress_callback_user_data = (void*)(ctx);
60
+ return params;
61
+ }
62
+ */
63
+ import "C"
64
+
65
+ ///////////////////////////////////////////////////////////////////////////////
66
+ // TYPES
67
+
68
+ type (
69
+ Context C.struct_whisper_context
70
+ Token C.whisper_token
71
+ TokenData C.struct_whisper_token_data
72
+ SamplingStrategy C.enum_whisper_sampling_strategy
73
+ Params C.struct_whisper_full_params
74
+ )
75
+
76
+ ///////////////////////////////////////////////////////////////////////////////
77
+ // GLOBALS
78
+
79
+ const (
80
+ SAMPLING_GREEDY SamplingStrategy = C.WHISPER_SAMPLING_GREEDY
81
+ SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH
82
+ )
83
+
84
+ const (
85
+ SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
86
+ SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
87
+ NumFFT = C.WHISPER_N_FFT
88
+ HopLength = C.WHISPER_HOP_LENGTH
89
+ ChunkSize = C.WHISPER_CHUNK_SIZE
90
+ )
91
+
92
+ var (
93
+ ErrTokenizerFailed = errors.New("whisper_tokenize failed")
94
+ ErrAutoDetectFailed = errors.New("whisper_lang_auto_detect failed")
95
+ ErrConversionFailed = errors.New("whisper_convert failed")
96
+ ErrInvalidLanguage = errors.New("invalid language")
97
+ )
98
+
99
+ ///////////////////////////////////////////////////////////////////////////////
100
+ // PUBLIC METHODS
101
+
102
+ // Allocates all memory needed for the model and loads the model from the given file.
103
+ // Returns NULL on failure.
104
+ func Whisper_init(path string) *Context {
105
+ cPath := C.CString(path)
106
+ defer C.free(unsafe.Pointer(cPath))
107
+ if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
108
+ return (*Context)(ctx)
109
+ } else {
110
+ return nil
111
+ }
112
+ }
113
+
114
+ // Frees all memory allocated by the model.
115
+ func (ctx *Context) Whisper_free() {
116
+ C.whisper_free((*C.struct_whisper_context)(ctx))
117
+ }
118
+
119
+ // Convert RAW PCM audio to log mel spectrogram.
120
+ // The resulting spectrogram is stored inside the provided whisper context.
121
+ func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error {
122
+ if C.whisper_pcm_to_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(threads)) == 0 {
123
+ return nil
124
+ } else {
125
+ return ErrConversionFailed
126
+ }
127
+ }
128
+
129
+ // This can be used to set a custom log mel spectrogram inside the provided whisper context.
130
+ // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
131
+ // n_mel must be 80
132
+ func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error {
133
+ if C.whisper_set_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(n_mel)) == 0 {
134
+ return nil
135
+ } else {
136
+ return ErrConversionFailed
137
+ }
138
+ }
139
+
140
+ // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
141
+ // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
142
+ // offset can be used to specify the offset of the first frame in the spectrogram.
143
+ func (ctx *Context) Whisper_encode(offset, threads int) error {
144
+ if C.whisper_encode((*C.struct_whisper_context)(ctx), C.int(offset), C.int(threads)) == 0 {
145
+ return nil
146
+ } else {
147
+ return ErrConversionFailed
148
+ }
149
+ }
150
+
151
+ // Run the Whisper decoder to obtain the logits and probabilities for the next token.
152
+ // Make sure to call whisper_encode() first.
153
+ // tokens + n_tokens is the provided context for the decoder.
154
+ // n_past is the number of tokens to use from previous decoder calls.
155
+ func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
156
+ if C.whisper_decode((*C.struct_whisper_context)(ctx), (*C.whisper_token)(&tokens[0]), C.int(len(tokens)), C.int(past), C.int(threads)) == 0 {
157
+ return nil
158
+ } else {
159
+ return ErrConversionFailed
160
+ }
161
+ }
162
+
163
+ // Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
164
+ // Returns the number of tokens on success
165
+ func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
166
+ cText := C.CString(text)
167
+ defer C.free(unsafe.Pointer(cText))
168
+ if n := C.whisper_tokenize((*C.struct_whisper_context)(ctx), cText, (*C.whisper_token)(&tokens[0]), C.int(len(tokens))); n >= 0 {
169
+ return int(n), nil
170
+ } else {
171
+ return 0, ErrTokenizerFailed
172
+ }
173
+ }
174
+
175
+ // Return the id of the specified language, returns -1 if not found
176
+ // Examples:
177
+ //
178
+ // "de" -> 2
179
+ // "german" -> 2
180
+ func (ctx *Context) Whisper_lang_id(lang string) int {
181
+ return int(C.whisper_lang_id(C.CString(lang)))
182
+ }
183
+
184
+ // Largest language id (i.e. number of available languages - 1)
185
+ func Whisper_lang_max_id() int {
186
+ return int(C.whisper_lang_max_id())
187
+ }
188
+
189
+ // Return the short string of the specified language id (e.g. 2 -> "de"),
190
+ // returns empty string if not found
191
+ func Whisper_lang_str(id int) string {
192
+ return C.GoString(C.whisper_lang_str(C.int(id)))
193
+ }
194
+
195
+ // Use mel data at offset_ms to try and auto-detect the spoken language
196
+ // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
197
+ // Returns the probabilities of all languages.
198
+ // ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
199
+ func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float32, error) {
200
+ probs := make([]float32, Whisper_lang_max_id()+1)
201
+ if n := int(C.whisper_lang_auto_detect((*C.struct_whisper_context)(ctx), C.int(offset_ms), C.int(n_threads), (*C.float)(&probs[0]))); n < 0 {
202
+ return nil, ErrAutoDetectFailed
203
+ } else {
204
+ return probs, nil
205
+ }
206
+ }
207
+
208
+ func (ctx *Context) Whisper_n_len() int {
209
+ return int(C.whisper_n_len((*C.struct_whisper_context)(ctx)))
210
+ }
211
+
212
+ func (ctx *Context) Whisper_n_vocab() int {
213
+ return int(C.whisper_n_vocab((*C.struct_whisper_context)(ctx)))
214
+ }
215
+
216
+ func (ctx *Context) Whisper_n_text_ctx() int {
217
+ return int(C.whisper_n_text_ctx((*C.struct_whisper_context)(ctx)))
218
+ }
219
+
220
+ func (ctx *Context) Whisper_n_audio_ctx() int {
221
+ return int(C.whisper_n_audio_ctx((*C.struct_whisper_context)(ctx)))
222
+ }
223
+
224
+ func (ctx *Context) Whisper_is_multilingual() int {
225
+ return int(C.whisper_is_multilingual((*C.struct_whisper_context)(ctx)))
226
+ }
227
+
228
+ // The probabilities for the next token
229
+ //func (ctx *Whisper_context) Whisper_get_probs() []float32 {
230
+ // return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_probs((*C.struct_whisper_context)(ctx))))[:ctx.Whisper_n_vocab()]
231
+ //}
232
+
233
+ // Token Id -> String. Uses the vocabulary in the provided context
234
+ func (ctx *Context) Whisper_token_to_str(token Token) string {
235
+ return C.GoString(C.whisper_token_to_str((*C.struct_whisper_context)(ctx), C.whisper_token(token)))
236
+ }
237
+
238
+ // Special tokens
239
+ func (ctx *Context) Whisper_token_eot() Token {
240
+ return Token(C.whisper_token_eot((*C.struct_whisper_context)(ctx)))
241
+ }
242
+
243
+ // Special tokens
244
+ func (ctx *Context) Whisper_token_sot() Token {
245
+ return Token(C.whisper_token_sot((*C.struct_whisper_context)(ctx)))
246
+ }
247
+
248
+ // Special tokens
249
+ func (ctx *Context) Whisper_token_prev() Token {
250
+ return Token(C.whisper_token_prev((*C.struct_whisper_context)(ctx)))
251
+ }
252
+
253
+ // Special tokens
254
+ func (ctx *Context) Whisper_token_solm() Token {
255
+ return Token(C.whisper_token_solm((*C.struct_whisper_context)(ctx)))
256
+ }
257
+
258
+ // Special tokens
259
+ func (ctx *Context) Whisper_token_not() Token {
260
+ return Token(C.whisper_token_not((*C.struct_whisper_context)(ctx)))
261
+ }
262
+
263
+ // Special tokens
264
+ func (ctx *Context) Whisper_token_beg() Token {
265
+ return Token(C.whisper_token_beg((*C.struct_whisper_context)(ctx)))
266
+ }
267
+
268
+ // Special tokens
269
+ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
270
+ return Token(C.whisper_token_lang((*C.struct_whisper_context)(ctx), C.int(lang_id)))
271
+ }
272
+
273
+ // Task tokens
274
+ func (ctx *Context) Whisper_token_translate() Token {
275
+ return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
276
+ }
277
+
278
+ // Task tokens
279
+ func (ctx *Context) Whisper_token_transcribe() Token {
280
+ return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
281
+ }
282
+
283
+ // Performance information
284
+ func (ctx *Context) Whisper_print_timings() {
285
+ C.whisper_print_timings((*C.struct_whisper_context)(ctx))
286
+ }
287
+
288
+ // Performance information
289
+ func (ctx *Context) Whisper_reset_timings() {
290
+ C.whisper_reset_timings((*C.struct_whisper_context)(ctx))
291
+ }
292
+
293
+ // Print system information
294
+ func Whisper_print_system_info() string {
295
+ return C.GoString(C.whisper_print_system_info())
296
+ }
297
+
298
+ // Return default parameters for a strategy
299
+ func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params {
300
+ // Get default parameters
301
+ return Params(C.whisper_full_default_params_cb((*C.struct_whisper_context)(ctx), C.enum_whisper_sampling_strategy(strategy)))
302
+ }
303
+
304
+ // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
305
+ // Uses the specified decoding strategy to obtain the text.
306
+ func (ctx *Context) Whisper_full(
307
+ params Params,
308
+ samples []float32,
309
+ encoderBeginCallback func() bool,
310
+ newSegmentCallback func(int),
311
+ progressCallback func(int),
312
+ ) error {
313
+ registerEncoderBeginCallback(ctx, encoderBeginCallback)
314
+ registerNewSegmentCallback(ctx, newSegmentCallback)
315
+ registerProgressCallback(ctx, progressCallback)
316
+ defer registerEncoderBeginCallback(ctx, nil)
317
+ defer registerNewSegmentCallback(ctx, nil)
318
+ defer registerProgressCallback(ctx, nil)
319
+ if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
320
+ return nil
321
+ } else {
322
+ return ErrConversionFailed
323
+ }
324
+ }
325
+
326
+ // Split the input audio in chunks and process each chunk separately using whisper_full()
327
+ // It seems this approach can offer some speedup in some cases.
328
+ // However, the transcription accuracy can be worse at the beginning and end of each chunk.
329
+ func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
330
+ registerEncoderBeginCallback(ctx, encoderBeginCallback)
331
+ registerNewSegmentCallback(ctx, newSegmentCallback)
332
+ defer registerEncoderBeginCallback(ctx, nil)
333
+ defer registerNewSegmentCallback(ctx, nil)
334
+
335
+ if C.whisper_full_parallel((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples)), C.int(processors)) == 0 {
336
+ return nil
337
+ } else {
338
+ return ErrConversionFailed
339
+ }
340
+ }
341
+
342
+ // Return the id of the autodetected language, returns -1 if not found
343
+ // Added to whisper.cpp in
344
+ // https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
345
+ //
346
+ // Examples:
347
+ //
348
+ // "de" -> 2
349
+ // "german" -> 2
350
+ func (ctx *Context) Whisper_full_lang_id() int {
351
+ return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
352
+ }
353
+
354
+ // Number of generated text segments.
355
+ // A segment can be a few words, a sentence, or even a paragraph.
356
+ func (ctx *Context) Whisper_full_n_segments() int {
357
+ return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx)))
358
+ }
359
+
360
+ // Get the start and end time of the specified segment.
361
+ func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64 {
362
+ return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(segment)))
363
+ }
364
+
365
+ // Get the start and end time of the specified segment.
366
+ func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64 {
367
+ return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(segment)))
368
+ }
369
+
370
+ // Get the text of the specified segment.
371
+ func (ctx *Context) Whisper_full_get_segment_text(segment int) string {
372
+ return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(segment)))
373
+ }
374
+
375
+ // Get number of tokens in the specified segment.
376
+ func (ctx *Context) Whisper_full_n_tokens(segment int) int {
377
+ return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(segment)))
378
+ }
379
+
380
+ // Get the token text of the specified token index in the specified segment.
381
+ func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string {
382
+ return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
383
+ }
384
+
385
+ // Get the token of the specified token index in the specified segment.
386
+ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
387
+ return Token(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
388
+ }
389
+
390
+ // Get token data for the specified token in the specified segment.
391
+ // This contains probabilities, timestamps, etc.
392
+ func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
393
+ return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
394
+ }
395
+
396
+ // Get the probability of the specified token in the specified segment.
397
+ func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
398
+ return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
399
+ }
400
+
401
+ ///////////////////////////////////////////////////////////////////////////////
402
+ // CALLBACKS
403
+
404
+ var (
405
+ cbNewSegment = make(map[unsafe.Pointer]func(int))
406
+ cbProgress = make(map[unsafe.Pointer]func(int))
407
+ cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
408
+ )
409
+
410
+ func registerNewSegmentCallback(ctx *Context, fn func(int)) {
411
+ if fn == nil {
412
+ delete(cbNewSegment, unsafe.Pointer(ctx))
413
+ } else {
414
+ cbNewSegment[unsafe.Pointer(ctx)] = fn
415
+ }
416
+ }
417
+
418
+ func registerProgressCallback(ctx *Context, fn func(int)) {
419
+ if fn == nil {
420
+ delete(cbProgress, unsafe.Pointer(ctx))
421
+ } else {
422
+ cbProgress[unsafe.Pointer(ctx)] = fn
423
+ }
424
+ }
425
+
426
+ func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
427
+ if fn == nil {
428
+ delete(cbEncoderBegin, unsafe.Pointer(ctx))
429
+ } else {
430
+ cbEncoderBegin[unsafe.Pointer(ctx)] = fn
431
+ }
432
+ }
433
+
434
+ //export callNewSegment
435
+ func callNewSegment(user_data unsafe.Pointer, new C.int) {
436
+ if fn, ok := cbNewSegment[user_data]; ok {
437
+ fn(int(new))
438
+ }
439
+ }
440
+
441
+ //export callProgress
442
+ func callProgress(user_data unsafe.Pointer, progress C.int) {
443
+ if fn, ok := cbProgress[user_data]; ok {
444
+ fn(int(progress))
445
+ }
446
+ }
447
+
448
+ //export callEncoderBegin
449
+ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
450
+ if fn, ok := cbEncoderBegin[user_data]; ok {
451
+ if fn() {
452
+ return C.bool(true)
453
+ } else {
454
+ return C.bool(false)
455
+ }
456
+ }
457
+ return true
458
+ }
459
+
460
+ func (t TokenData) T0() int64 {
461
+ return int64(t.t0)
462
+ }
463
+
464
+ func (t TokenData) T1() int64 {
465
+ return int64(t.t1)
466
+ }
467
+
468
+ func (t TokenData) Id() Token {
469
+ return Token(t.id)
470
+ }
whisper.cpp/bindings/go/whisper_test.go ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper_test
2
+
3
+ import (
4
+ "os"
5
+ "runtime"
6
+ "testing"
7
+ "time"
8
+
9
+ // Packages
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
11
+ wav "github.com/go-audio/wav"
12
+ assert "github.com/stretchr/testify/assert"
13
+ )
14
+
15
+ const (
16
+ ModelPath = "models/ggml-small.en.bin"
17
+ SamplePath = "samples/jfk.wav"
18
+ )
19
+
20
+ func Test_Whisper_000(t *testing.T) {
21
+ assert := assert.New(t)
22
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
23
+ t.Skip("Skipping test, model not found:", ModelPath)
24
+ }
25
+ ctx := whisper.Whisper_init(ModelPath)
26
+ assert.NotNil(ctx)
27
+ ctx.Whisper_free()
28
+ }
29
+
30
+ func Test_Whisper_001(t *testing.T) {
31
+ assert := assert.New(t)
32
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
33
+ t.Skip("Skipping test, model not found:", ModelPath)
34
+ }
35
+ if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
36
+ t.Skip("Skipping test, sample not found:", SamplePath)
37
+ }
38
+
39
+ // Open samples
40
+ fh, err := os.Open(SamplePath)
41
+ assert.NoError(err)
42
+ defer fh.Close()
43
+
44
+ // Read samples
45
+ d := wav.NewDecoder(fh)
46
+ buf, err := d.FullPCMBuffer()
47
+ assert.NoError(err)
48
+
49
+ // Run whisper
50
+ ctx := whisper.Whisper_init(ModelPath)
51
+ assert.NotNil(ctx)
52
+ defer ctx.Whisper_free()
53
+ params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
54
+ data := buf.AsFloat32Buffer().Data
55
+ err = ctx.Whisper_full(params, data, nil, nil, nil)
56
+ assert.NoError(err)
57
+
58
+ // Print out tokens
59
+ num_segments := ctx.Whisper_full_n_segments()
60
+ assert.GreaterOrEqual(num_segments, 1)
61
+ for i := 0; i < num_segments; i++ {
62
+ str := ctx.Whisper_full_get_segment_text(i)
63
+ assert.NotEmpty(str)
64
+ t0 := time.Duration(ctx.Whisper_full_get_segment_t0(i)) * time.Millisecond
65
+ t1 := time.Duration(ctx.Whisper_full_get_segment_t1(i)) * time.Millisecond
66
+ t.Logf("[%6s->%-6s] %q", t0, t1, str)
67
+ }
68
+ }
69
+
70
+ func Test_Whisper_002(t *testing.T) {
71
+ assert := assert.New(t)
72
+ for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
73
+ str := whisper.Whisper_lang_str(i)
74
+ assert.NotEmpty(str)
75
+ t.Log(str)
76
+ }
77
+ }
78
+
79
+ func Test_Whisper_003(t *testing.T) {
80
+ threads := runtime.NumCPU()
81
+ assert := assert.New(t)
82
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
83
+ t.Skip("Skipping test, model not found:", ModelPath)
84
+ }
85
+ if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
86
+ t.Skip("Skipping test, sample not found:", SamplePath)
87
+ }
88
+
89
+ // Open samples
90
+ fh, err := os.Open(SamplePath)
91
+ assert.NoError(err)
92
+ defer fh.Close()
93
+
94
+ // Read samples
95
+ d := wav.NewDecoder(fh)
96
+ buf, err := d.FullPCMBuffer()
97
+ assert.NoError(err)
98
+
99
+ // Make the model
100
+ ctx := whisper.Whisper_init(ModelPath)
101
+ assert.NotNil(ctx)
102
+ defer ctx.Whisper_free()
103
+
104
+ // Get MEL
105
+ assert.NoError(ctx.Whisper_pcm_to_mel(buf.AsFloat32Buffer().Data, threads))
106
+
107
+ // Get Languages
108
+ languages, err := ctx.Whisper_lang_auto_detect(0, threads)
109
+ assert.NoError(err)
110
+ for i, p := range languages {
111
+ t.Logf("%s: %f", whisper.Whisper_lang_str(i), p)
112
+ }
113
+ }
whisper.cpp/bindings/java/README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Java JNI bindings for Whisper
2
+
3
+ This package provides Java JNI bindings for whisper.cpp. They have been tested on:
4
+
5
+ * <strike>Darwin (OS X) 12.6 on x64_64</strike>
6
+ * Ubuntu on x86_64
7
+ * Windows on x86_64
8
+
9
+ The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
10
+
11
+ JNA will attempt to load the `whispercpp` shared library from:
12
+
13
+ - jna.library.path
14
+ - jna.platform.library
15
+ - ~/Library/Frameworks
16
+ - /Library/Frameworks
17
+ - /System/Library/Frameworks
18
+ - classpath
19
+
20
+ ```java
21
+ import io.github.ggerganov.whispercpp.WhisperCpp;
22
+
23
+ public class Example {
24
+
25
+ public static void main(String[] args) {
26
+
27
+ WhisperCpp whisper = new WhisperCpp();
28
+ try {
29
+ // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
30
+ // or you can provide the absolute path to the model file.
31
+ whisper.initContext("../ggml-base.en.bin");
32
+ WhisperFullParams.ByValue whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
33
+
34
+ // custom configuration if required
35
+ //whisperParams.n_threads = 8;
36
+ whisperParams.temperature = 0.0f;
37
+ whisperParams.temperature_inc = 0.2f;
38
+ //whisperParams.language = "en";
39
+
40
+ float[] samples = readAudio(); // divide each value by 32767.0f
41
+ List<WhisperSegment> whisperSegmentList = whisper.fullTranscribeWithTime(whisperParams, samples);
42
+
43
+ for (WhisperSegment whisperSegment : whisperSegmentList) {
44
+
45
+ long start = whisperSegment.getStart();
46
+ long end = whisperSegment.getEnd();
47
+
48
+ String text = whisperSegment.getSentence();
49
+
50
+ System.out.println("start: "+start);
51
+ System.out.println("end: "+end);
52
+ System.out.println("text: "+text);
53
+
54
+ }
55
+
56
+ } catch (IOException e) {
57
+ e.printStackTrace();
58
+ } finally {
59
+ whisper.close();
60
+ }
61
+
62
+ }
63
+ }
64
+ ```
65
+
66
+ ## Building & Testing
67
+
68
+ In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
69
+
70
+ ```bash
71
+ git clone https://github.com/ggml-org/whisper.cpp.git
72
+ cd whisper.cpp/bindings/java
73
+
74
+ ./gradlew build
75
+ ```
76
+
77
+ You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
78
+
79
+ ```bash
80
+ copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
81
+ ```
82
+
83
+
84
+ ## License
85
+
86
+ The license for the Java bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
87
+
whisper.cpp/bindings/java/build.gradle ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ plugins {
2
+ id 'java'
3
+ id 'java-library'
4
+ id 'maven-publish'
5
+ id 'signing'
6
+ }
7
+
8
+ archivesBaseName = 'whispercpp'
9
+ group = 'io.github.ggerganov'
10
+ version = '1.4.0'
11
+
12
+
13
+ sourceCompatibility = 1.8
14
+ targetCompatibility = 1.8
15
+
16
+ sourceSets {
17
+ main {
18
+ resources {
19
+ srcDirs = ['src/main/resources', 'build/generated/resources/main']
20
+ }
21
+ }
22
+ test {
23
+ runtimeClasspath += files('build/generated/resources/main')
24
+ }
25
+ }
26
+
27
+ tasks.register('copyLibwhisperDynlib', Copy) {
28
+ from '../../build/src'
29
+ include 'libwhisper.dylib'
30
+ into 'build/generated/resources/main'
31
+ }
32
+
33
+ tasks.register('copyLibwhisperSo', Copy) {
34
+ from '../../build/src'
35
+ include 'libwhisper.so'
36
+ into 'build/generated/resources/main'
37
+ }
38
+
39
+ tasks.register('copyWhisperDLL', Copy) {
40
+ from '../../build/bin/Release'
41
+ include 'whisper.dll'
42
+ into 'build/generated/resources/main'
43
+ }
44
+
45
+ tasks.register('copyGGML_BASE_DLL', Copy) {
46
+ from '../../build/bin/Release'
47
+ include 'ggml-base.dll'
48
+ into 'build/generated/resources/main'
49
+ }
50
+
51
+ tasks.register('copyGGML_DLL', Copy) {
52
+ from '../../build/bin/Release'
53
+ include 'ggml.dll'
54
+ into 'build/generated/resources/main'
55
+ }
56
+
57
+ tasks.register('copyGGML_CPU_DLL', Copy) {
58
+ from '../../build/bin/Release'
59
+ include 'ggml-cpu.dll'
60
+ into 'build/generated/resources/main'
61
+ }
62
+
63
+ tasks.register('copyLibs') {
64
+ dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDLL, copyGGML_BASE_DLL, copyGGML_DLL, copyGGML_CPU_DLL
65
+ }
66
+
67
+ test {
68
+ systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
69
+ }
70
+
71
+ java {
72
+ withSourcesJar()
73
+ withJavadocJar()
74
+ }
75
+
76
+ sourcesJar() {
77
+ dependsOn copyLibs
78
+ }
79
+
80
+ jar {
81
+ dependsOn copyLibs
82
+ exclude '**/whisper_java.exp', '**/whisper_java.lib'
83
+ }
84
+
85
+ javadoc {
86
+ options.addStringOption('Xdoclint:none', '-quiet')
87
+ }
88
+
89
+ tasks.withType(Test) {
90
+ useJUnitPlatform()
91
+ }
92
+
93
+ test.dependsOn copyLibs
94
+ processResources.dependsOn copyLibs
95
+
96
+ dependencies {
97
+ implementation "net.java.dev.jna:jna:5.13.0"
98
+ testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
99
+ testImplementation "org.assertj:assertj-core:3.24.2"
100
+ }
101
+
102
+ repositories {
103
+ mavenCentral()
104
+ }
105
+
106
+ publishing {
107
+ publications {
108
+ mavenJava(MavenPublication) {
109
+ artifactId = 'whispercpp'
110
+ from components.java
111
+ pom {
112
+ name = 'whispercpp'
113
+ description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
114
+ url = 'https://github.com/ggerganov/whisper.cpp'
115
+ licenses {
116
+ license {
117
+ name = 'MIT licence'
118
+ url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
119
+ }
120
+ }
121
+ developers {
122
+ developer {
123
+ id = 'ggerganov'
124
+ name = 'Georgi Gerganov'
125
+ email = 'ggerganov@gmail.com'
126
+ }
127
+ developer {
128
+ id = 'nalbion'
129
+ name = 'Nicholas Albion'
130
+ email = 'nalbion@yahoo.com'
131
+ }
132
+ }
133
+ scm {
134
+ connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
135
+ url = 'https://github.com/ggerganov/whisper.cpp'
136
+ }
137
+ }
138
+ }
139
+ }
140
+
141
+ repositories {
142
+ maven {
143
+ def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
144
+ def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
145
+ url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
146
+ credentials {
147
+ username = System.getenv("MAVEN_USERNAME")
148
+ password = System.getenv("MAVEN_PASSWORD")
149
+ }
150
+ }
151
+ }
152
+ }
153
+
154
+ signing {
155
+ def signingKey = System.getenv("PGP_SECRET")
156
+ def signingPassword = System.getenv("PGP_PASSPHRASE")
157
+ useInMemoryPgpKeys(signingKey, signingPassword)
158
+ sign publishing.publications.mavenJava
159
+ }
whisper.cpp/bindings/java/gradle.properties ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ org.gradle.jvmargs=-Xms256m -Xmx1024m
2
+ system.include.dir=/usr/include
3
+ #system.local.include.dir=../../include
4
+ system.local.include.dir=./build/generated/sources/headers/java/main
5
+ jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
6
+ jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
whisper.cpp/bindings/java/gradle/wrapper/gradle-wrapper.jar ADDED
Binary file (61.6 kB). View file