ZeroWw commited on Jul 1, 2024

Commit

055eba4

verified ·

1 Parent(s): da618d9

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +34 -0
9b-it-sfp.sbs +3 -0
gemma.cpp/.bazelrc +1 -0
gemma.cpp/.bazelversion +1 -0
gemma.cpp/.clang-format +2 -0
gemma.cpp/.clang-tidy +211 -0
gemma.cpp/.github/workflows/build.yml +119 -0
gemma.cpp/.gitignore +4 -0
gemma.cpp/.vscode/settings.json +6 -0
gemma.cpp/BUILD.bazel +420 -0
gemma.cpp/CMakeLists.txt +149 -0
gemma.cpp/CMakePresets.json +59 -0
gemma.cpp/DEVELOPERS.md +203 -0
gemma.cpp/LICENSE +202 -0
gemma.cpp/LICENSE-BSD3 +26 -0
gemma.cpp/MODULE.bazel +72 -0
gemma.cpp/README.md +493 -0
gemma.cpp/WORKSPACE +4 -0
gemma.cpp/backprop/backward-inl.h +428 -0
gemma.cpp/backprop/backward.cc +95 -0
gemma.cpp/backprop/backward.h +32 -0
gemma.cpp/backprop/backward_scalar.h +362 -0
gemma.cpp/backprop/backward_scalar_test.cc +614 -0
gemma.cpp/backprop/backward_test.cc +264 -0
gemma.cpp/backprop/common_scalar.h +120 -0
gemma.cpp/backprop/forward-inl.h +289 -0
gemma.cpp/backprop/forward.cc +86 -0
gemma.cpp/backprop/forward.h +33 -0
gemma.cpp/backprop/forward_scalar.h +300 -0
gemma.cpp/backprop/optimize_test.cc +144 -0
gemma.cpp/backprop/optimizer.cc +135 -0
gemma.cpp/backprop/optimizer.h +37 -0
gemma.cpp/backprop/prompt.h +34 -0
gemma.cpp/backprop/sampler.h +87 -0
gemma.cpp/backprop/test_util.h +168 -0
gemma.cpp/bazel/BUILD +5 -0
gemma.cpp/bazel/sentencepiece.bazel +97 -0
gemma.cpp/bazel/sentencepiece.patch +2339 -0
gemma.cpp/build/.gitignore +3 -0
gemma.cpp/build/CMakeCache.txt +982 -0
gemma.cpp/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake +74 -0
gemma.cpp/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake +85 -0
gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin +3 -0
gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin +3 -0
gemma.cpp/build/CMakeFiles/3.27.9/CMakeSystem.cmake +15 -0
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/CMakeCCompilerId.c +866 -0
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/a.out +0 -0
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/CMakeCXXCompilerId.cpp +855 -0
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out +0 -0
gemma.cpp/build/CMakeFiles/CMakeConfigureLog.yaml +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+9b-it-sfp.sbs filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/CMakeFiles/libgemma.dir/gemma/gemma.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_128a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_128d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f32a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f32d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f64a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f64d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i16a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i16d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i32a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i32d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i64a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i64d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv128a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv128d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv64a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv64d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u16a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u16d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u32a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u32d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u64a.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u64d.cc.o filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/libhwy.a filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-build/libhwy_contrib.a filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/highway-src/g3doc/highway_intro.pdf filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/json-src/docs/avatars.png filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/json-src/docs/json.gif filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/json-src/docs/usages/macos.png filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/_deps/sentencepiece-build/src/libsentencepiece.a filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/gemma filter=lfs diff=lfs merge=lfs -text
+gemma.cpp/build/libgemma.a filter=lfs diff=lfs merge=lfs -text
+tokenizer.spm filter=lfs diff=lfs merge=lfs -text

9b-it-sfp.sbs ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1aad1b51e34090d4f4ed0a392470c47bd34acb10301fd12e7b81160f8b6d2d41
+size 10159826688

gemma.cpp/.bazelrc ADDED Viewed

	@@ -0,0 +1 @@


1	+ common --enable_bzlmod

gemma.cpp/.bazelversion ADDED Viewed

	@@ -0,0 +1 @@


1	+ 7.1.1

gemma.cpp/.clang-format ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Language: Cpp
2	+ BasedOnStyle: Google

gemma.cpp/.clang-tidy ADDED Viewed

	@@ -0,0 +1,211 @@

+FormatStyle: file
+WarningsAsErrors: "*"
+Checks: "-*,\
+          abseil-*,\
+          -abseil-string-find-startswith,\
+          -abseil-string-find-str-contains,\
+          bugprone-*,\
+          -bugprone-argument-comment,\
+          -bugprone-assert-side-effect,\
+          -bugprone-bad-signal-to-kill-thread,\
+          -bugprone-bool-pointer-implicit-conversion,\
+          -bugprone-branch-clone,\
+          -bugprone-copy-constructor-init,\
+          -bugprone-dangling-handle,\
+          -bugprone-dynamic-static-initializers,\
+          -bugprone-easily-swappable-parameters,\
+          -bugprone-exception-escape,\
+          -bugprone-fold-init-type,\
+          -bugprone-forward-declaration-namespace,\
+          -bugprone-forwarding-reference-overload,\
+          -bugprone-implicit-widening-of-multiplication-result,\
+          -bugprone-inaccurate-erase,\
+          -bugprone-incorrect-roundings,\
+          -bugprone-infinite-loop,\
+          -bugprone-integer-division,\
+          -bugprone-lambda-function-name,\
+          -bugprone-macro-parentheses,\
+          -bugprone-macro-repeated-side-effects,\
+          -bugprone-misplaced-operator-in-strlen-in-alloc,\
+          -bugprone-misplaced-widening-cast,\
+          -bugprone-move-forwarding-reference,\
+          -bugprone-multiple-statement-macro,\
+          -bugprone-narrowing-conversions,\
+          -bugprone-no-escape,\
+          -bugprone-not-null-terminated-result,\
+          -bugprone-parent-virtual-call,\
+          -bugprone-posix-return,\
+          -bugprone-redundant-branch-condition,\
+          -bugprone-reserved-identifier,\
+          -bugprone-signal-handler,\
+          -bugprone-signed-char-misuse,\
+          -bugprone-sizeof-container,\
+          -bugprone-sizeof-expression,\
+          -bugprone-spuriously-wake-up-functions,\
+          -bugprone-string-constructor,\
+          -bugprone-string-integer-assignment,\
+          -bugprone-string-literal-with-embedded-nul,\
+          -bugprone-stringview-nullptr,\
+          -bugprone-suspicious-enum-usage,\
+          -bugprone-suspicious-include,\
+          -bugprone-suspicious-memory-comparison,\
+          -bugprone-suspicious-memset-usage,\
+          -bugprone-suspicious-missing-comma,\
+          -bugprone-suspicious-semicolon,\
+          -bugprone-suspicious-string-compare,\
+          -bugprone-swapped-arguments,\
+          -bugprone-terminating-continue,\
+          -bugprone-throw-keyword-missing,\
+          -bugprone-too-small-loop-variable,\
+          -bugprone-undefined-memory-manipulation,\
+          -bugprone-undelegated-constructor,\
+          -bugprone-unhandled-exception-at-new,\
+          -bugprone-unhandled-self-assignment,\
+          -bugprone-unused-raii,\
+          -bugprone-unused-return-value,\
+          -bugprone-use-after-move,\
+          -bugprone-virtual-near-miss,\
+          cert-*,\
+          -cert-dcl16-c,\
+          -cert-dcl21-cpp,\
+          -cert-dcl37-c,\
+          -cert-dcl50-cpp,\
+          -cert-dcl51-cpp,\
+          -cert-dcl54-cpp,\
+          -cert-dcl58-cpp,\
+          -cert-err33-c,\
+          -cert-msc30-c,\
+          -cert-msc32-c,\
+          -cert-msc50-cpp,\
+          -cert-msc51-cpp,\
+          -cert-oop54-cpp,\
+          -cert-str34-c,\
+          -cert-str34-c,\
+          -cert-str34-c,\
+          -cert-str34-c,\
+          -clang-analyzer-*,\
+          concurrency-*,\
+          -concurrency-mt-unsafe,\
+          cppcoreguidelines-*,\
+          -concurrency-mt-unsafe,\
+          -cppcoreguidelines-avoid-c-arrays,\
+          -cppcoreguidelines-avoid-const-or-ref-data-members,\
+          -cppcoreguidelines-avoid-do-while,\
+          -cppcoreguidelines-avoid-goto,\
+          -cppcoreguidelines-avoid-magic-numbers,\
+          -cppcoreguidelines-avoid-non-const-global-variables,\
+          -cppcoreguidelines-c-copy-assignment-signature,\
+          -cppcoreguidelines-explicit-virtual-functions,\
+          -cppcoreguidelines-init-variables,\
+          -cppcoreguidelines-interfaces-global-init,\
+          -cppcoreguidelines-macro-usage,\
+          -cppcoreguidelines-narrowing-conversions,\
+          -cppcoreguidelines-no-malloc,\
+          -cppcoreguidelines-non-private-member-variables-in-classes,\
+          -cppcoreguidelines-owning-memory,\
+          -cppcoreguidelines-prefer-member-initializer,\
+          -cppcoreguidelines-pro-bounds-array-to-pointer-decay,\
+          -cppcoreguidelines-pro-bounds-constant-array-index,\
+          -cppcoreguidelines-pro-bounds-pointer-arithmetic,\
+          -cppcoreguidelines-pro-type-const-cast,\
+          -cppcoreguidelines-pro-type-member-init,\
+          -cppcoreguidelines-pro-type-reinterpret-cast,\
+          -cppcoreguidelines-pro-type-static-cast-downcast,\
+          -cppcoreguidelines-pro-type-union-access,\
+          -cppcoreguidelines-pro-type-vararg,\
+          -cppcoreguidelines-slicing,\
+          -cppcoreguidelines-special-member-functions,\
+          -cppcoreguidelines-virtual-class-destructor,\
+          google-*,\
+          -google-default-arguments,\
+          -google-explicit-constructor,\
+          -google-readability-avoid-underscore-in-googletest-name,\
+          -google-readability-braces-around-statements,\
+          -google-readability-casting,\
+          -google-readability-namespace-comments,\
+          -google-readability-todo,\
+          -google-runtime-int,\
+          -google-upgrade-googletest-case,\
+          misc-*,\
+          -misc-misplaced-const,\
+          -misc-new-delete-overloads,\
+          -misc-non-private-member-variables-in-classes,\
+          -misc-no-recursion,\
+          -misc-redundant-expression,\
+          -misc-uniqueptr-reset-release,\
+          -misc-unconventional-assign-operator,\
+          -misc-unused-parameters,\
+          -misc-unused-using-decls,\
+          modernize-*,\
+          -modernize-avoid-c-arrays,\
+          -modernize-concat-nested-namespaces,\
+          -modernize-deprecated-headers,\
+          -modernize-loop-convert,\
+          -modernize-macro-to-enum,\
+          -modernize-make-unique,\
+          -modernize-pass-by-value,\
+          -modernize-raw-string-literal,\
+          -modernize-redundant-void-arg,\
+          -modernize-return-braced-init-list,\
+          -modernize-unary-static-assert,\
+          -modernize-use-auto,\
+          -modernize-use-bool-literals,\
+          -modernize-use-default-member-init,\
+          -modernize-use-emplace,\
+          -modernize-use-equals-default,\
+          -modernize-use-equals-delete,\
+          -modernize-use-nodiscard,\
+          -modernize-use-nullptr,\
+          -modernize-use-override,\
+          -modernize-use-trailing-return-type,\
+          -modernize-use-transparent-functors,\
+          -modernize-use-using,\
+          performance-*,\
+          -performance-faster-string-find,\
+          -performance-for-range-copy,\
+          -performance-inefficient-algorithm,\
+          -performance-inefficient-string-concatenation,\
+          -performance-inefficient-vector-operation,\
+          -performance-move-const-arg,\
+          -performance-no-automatic-move,\
+          -performance-noexcept-move-constructor,\
+          -performance-no-int-to-ptr,\
+          -performance-trivially-destructible,\
+          -performance-unnecessary-copy-initialization,\
+          -performance-unnecessary-value-param,\
+          portability-*,\
+          readability-*,\
+          -readability-avoid-const-params-in-decls,\
+          -readability-braces-around-statements,\
+          -readability-const-return-type,\
+          -readability-container-data-pointer,\
+          -readability-container-size-empty,\
+          -readability-convert-member-functions-to-static,\
+          -readability-else-after-return,\
+          -readability-function-cognitive-complexity,\
+          -readability-identifier-length,\
+          -readability-implicit-bool-conversion,\
+          -readability-inconsistent-declaration-parameter-name,\
+          -readability-isolate-declaration,\
+          -readability-magic-numbers,\
+          -readability-make-member-function-const,\
+          -readability-named-parameter,\
+          -readability-non-const-parameter,\
+          -readability-qualified-auto,\
+          -readability-redundant-access-specifiers,\
+          -readability-redundant-control-flow,\
+          -readability-redundant-declaration,\
+          -readability-redundant-member-init,\
+          -readability-redundant-smartptr-get,\
+          -readability-redundant-string-cstr,\
+          -readability-redundant-string-init,\
+          -readability-simplify-boolean-expr,\
+          -readability-static-accessed-through-instance,\
+          -readability-static-definition-in-anonymous-namespace,\
+          -readability-suspicious-call-argument,\
+          -readability-uppercase-literal-suffix,\
+          -readability-use-anyofallof
+          "
+CheckOptions:
+    - { key: readability-identifier-naming.ConstexprVariableCase,    value: CamelCase }
+    - { key: readability-identifier-naming.ConstexprVariablePrefix,  value: k         }

gemma.cpp/.github/workflows/build.yml ADDED Viewed

	@@ -0,0 +1,119 @@

+name: build
+# Trigger on push, pull request, or via manual dispatch.
+on:
+  push:
+  pull_request:
+    types: [opened, reopened, labeled, unlabeled, synchronize]
+  workflow_dispatch:
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.os }} (${{ matrix.preset }}) ${{ matrix.build_type }}
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        # When adding another, also add to copybara's github_check_runs.
+        os: ['ubuntu-latest', 'macos-latest', 'windows-latest', 'ubuntu-20.04']
+        build_type: ['Release']
+        preset: ['make', 'windows']
+        exclude:
+          - os: ubuntu-20.04
+            preset: windows
+          - os: ubuntu-latest
+            preset: windows
+          - os: macos-latest
+            preset: windows
+          - os: windows-latest
+            preset: make
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.preset }}-${{ matrix.build_type }}
+      cancel-in-progress: true
+    steps:
+    - uses: actions/checkout@v4
+    # Set up ccache
+    - name: ccache
+      uses: hendrikmuhs/ccache-action@v1.2
+    - name: Configure CMake
+      run: >
+        cmake --preset ${{ matrix.preset }}
+        -S ${{ github.workspace }} -B ${{ github.workspace }}/build
+        -D CMAKE_BUILD_TYPE=${{ matrix.build_type }}
+        -D CMAKE_C_COMPILER_LAUNCHER=ccache
+        -D CMAKE_CXX_COMPILER_LAUNCHER=ccache
+    - name: Build
+      run: cmake --build ${{ github.workspace }}/build --preset ${{ matrix.preset }} --config ${{ matrix.build_type }} -j 4
+    - name: Archive production artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: gemma-${{ matrix.os }}-${{ matrix.preset }}-${{ matrix.build_type }}
+        path: |
+          ${{ github.workspace }}/build/${{ matrix.build_type }}/gemma.exe
+          ${{ github.workspace }}/build/${{ matrix.build_type }}/libgemma.lib
+          ${{ github.workspace }}/build/gemma
+          ${{ github.workspace }}/build/libgemma.a
+    - if: matrix.os == 'ubuntu-20.04'
+      name: Upload build artifacts to Kaggle
+      uses: pculliton/push-kaggle-dataset@v1.0.0
+      env:
+        KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
+        KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
+      with:
+        id:  "phillipculliton/gemma-build-artifacts"
+        files: |
+          build/gemma
+          build/_deps/sentencepiece-build/src/libsentencepiece.so.0
+    - if: matrix.os == 'ubuntu-20.04'
+      name: Create code for new test notebook version
+      run: |
+        cat > runner.py << EOF
+        import subprocess
+        subprocess.run(["cp", "/kaggle/input/gemma-build-artifacts/gemma", "/kaggle/working"])
+        subprocess.run(["chmod", "700", "/kaggle/working/gemma"])
+        subprocess.run(["cp", "/kaggle/input/gemma-build-artifacts/_deps/sentencepiece-build/src/libsentencepiece.so.0", "/kaggle/working"])
+        output = subprocess.run(["/kaggle/working/gemma", "--tokenizer", "/kaggle/input/gemma/gemmacpp/2b-it-sfp/4/tokenizer.spm", "--compressed_weights", "/kaggle/input/gemma/gemmacpp/2b-it-sfp/4/2b-it-sfp.sbs", "--model", "2b-it", "--verbosity", "0", "--max_generated_tokens", "128"], stdout=subprocess.PIPE, input='Write an email to the moon.', encoding='ascii').stdout
+        assert("write an email to the moon." not in output.lower());
+        assert("moon" in output.lower());
+        EOF
+    - if: matrix.os == 'ubuntu-20.04'
+      name: Run kaggle test notebook
+      uses: pculliton/kaggle-action@v1.0.28
+      with:
+        username: ${{ secrets.KAGGLE_USERNAME }}
+        key: ${{ secrets.KAGGLE_KEY }}
+        title: GemmaCPP-CI-2
+        code_file: runner.py
+        dataset_sources: "phillipculliton/gemma-build-artifacts"
+        model_sources: "google/gemma/gemmaCpp/2b-it-sfp/4"
+        enable_gpu: False
+        kernel_type: script
+  bazel:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0
+        with:
+          egress-policy: audit  # cannot be block - runner does git checkout
+      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.0.0
+      - uses: bazelbuild/setup-bazelisk@b39c379c82683a5f25d34f0d062761f62693e0b2 # v3.0.0
+      - uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1
+        with:
+          path: ~/.cache/bazel
+          key: bazel-${{ runner.os }}
+      - run: bazel build --cxxopt=-std=c++20 //:all

gemma.cpp/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.cache/
+bazel-*/
+build-*/
+python/*/__pycache__

gemma.cpp/.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "cmake.configureOnOpen": false,
+    "files.associations": {
+        "array": "cpp"
+    }
+}

gemma.cpp/BUILD.bazel ADDED Viewed

	@@ -0,0 +1,420 @@

+# gemma.cpp is a lightweight, standalone C++ inference engine for the Gemma
+# foundation models from Google.
+load("@rules_license//rules:license.bzl", "license")
+package(
+    default_applicable_licenses = [
+        "//:license",  # Placeholder comment, do not modify
+    ],
+    default_visibility = ["//visibility:public"],
+)
+license(
+    name = "license",
+    package_name = "gemma_cpp",
+)
+# Dual-licensed Apache 2 and 3-clause BSD.
+licenses(["notice"])
+exports_files(["LICENSE"])
+cc_library(
+    name = "ops",
+    hdrs = ["gemma/ops.h"],
+    deps = [
+        "//compression:compress",
+        "@hwy//:algo",
+        "@hwy//:dot",
+        "@hwy//:hwy",
+        "@hwy//:math",
+        "@hwy//:matvec",
+        "@hwy//:profiler",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_test(
+    name = "ops_test",
+    size = "small",
+    timeout = "long",
+    srcs = ["gemma/ops_test.cc"],
+    local_defines = ["HWY_IS_TEST"],
+    # for test_suite.
+    tags = ["hwy_ops_test"],
+    deps = [
+        ":ops",
+        "@googletest//:gtest_main",  # buildcleaner: keep
+        "//compression:compress",
+        "@hwy//:hwy",
+        "@hwy//:hwy_test_util",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_library(
+    name = "common",
+    srcs = ["gemma/common.cc"],
+    hdrs = [
+        "gemma/common.h",
+        "gemma/configs.h",
+    ],
+    deps = [
+        "//compression:compress",
+        "@hwy//:hwy",  # base.h
+        "@hwy//:thread_pool",
+    ],
+)
+cc_library(
+    name = "weights",
+    srcs = ["gemma/weights.cc"],
+    hdrs = ["gemma/weights.h"],
+    deps = [
+        ":common",
+        "//compression:compress",
+        "//compression:io",
+        "@hwy//:hwy",
+        "@hwy//:profiler",
+        "@hwy//:stats",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_library(
+    name = "weights_raw",
+    hdrs = ["gemma/weights_raw.h"],
+    deps = [
+        ":common",
+        ":weights",
+        "//compression:compress",
+        "@hwy//:hwy",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_library(
+    name = "gemma_lib",
+    srcs = [
+        "gemma/gemma.cc",
+    ],
+    hdrs = [
+        "gemma/activations.h",
+        "gemma/gemma.h",
+    ],
+    textual_hdrs = [
+        # Placeholder for internal file1, do not remove,
+        # Placeholder for internal file2, do not remove,
+    ],
+    deps = [
+        ":common",
+        ":ops",
+        ":weights",
+        "//compression:compress",
+        "//compression:io",
+        "@hwy//:hwy",
+        "@hwy//:matvec",
+        "@hwy//:nanobenchmark",  # timer
+        "@hwy//:profiler",
+        "@hwy//:thread_pool",
+        "@com_google_sentencepiece//:sentencepiece_processor",
+    ],
+)
+cc_library(
+    name = "cross_entropy",
+    srcs = ["gemma/cross_entropy.cc"],
+    hdrs = ["gemma/cross_entropy.h"],
+    deps = [
+        ":common",
+        ":gemma_lib",
+        "@hwy//:hwy",
+    ],
+)
+cc_library(
+    name = "args",
+    hdrs = ["util/args.h"],
+    deps = [
+        "//compression:io",
+        "@hwy//:hwy",
+    ],
+)
+cc_library(
+    name = "app",
+    hdrs = ["util/app.h"],
+    deps = [
+        ":args",
+        ":common",
+        ":gemma_lib",
+        "//compression:io",
+        "@hwy//:hwy",
+        "@hwy//:thread_pool",
+        "@hwy//:topology",
+    ],
+)
+cc_library(
+    name = "benchmark_helper",
+    srcs = ["gemma/benchmark_helper.cc"],
+    hdrs = ["gemma/benchmark_helper.h"],
+    deps = [
+        ":app",
+        ":args",
+        ":common",
+        ":cross_entropy",
+        ":gemma_lib",
+        # Placeholder for internal dep, do not remove.,
+        "@benchmark//:benchmark",
+        "//compression:compress",
+        "@hwy//:hwy",
+        "@hwy//:nanobenchmark",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_test(
+    name = "gemma_test",
+    srcs = ["gemma/gemma_test.cc"],
+    # Requires model files
+    tags = [
+        "local",
+        "manual",
+        "no_tap",
+    ],
+    deps = [
+        ":app",
+        ":args",
+        ":benchmark_helper",
+        ":common",
+        ":cross_entropy",
+        ":gemma_lib",
+        ":ops",
+        "@googletest//:gtest_main",
+        "//compression:io",
+        "@hwy//:hwy_test_util",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_binary(
+    name = "gemma",
+    srcs = ["gemma/run.cc"],
+    deps = [
+        ":app",
+        ":args",
+        ":benchmark_helper",
+        ":common",
+        ":gemma_lib",
+        # Placeholder for internal dep, do not remove.,
+        "//compression:compress",
+        "@hwy//:hwy",
+        "@hwy//:nanobenchmark",
+        "@hwy//:profiler",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_binary(
+    name = "compress_weights",
+    srcs = ["gemma/compress_weights.cc"],
+    deps = [
+        ":args",
+        ":common",
+        ":gemma_lib",
+        ":weights",
+        ":weights_raw",
+        # Placeholder for internal dep, do not remove.,
+        "//compression:compress",
+        "@hwy//:hwy",
+        "@hwy//:nanobenchmark",
+        "@hwy//:profiler",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_binary(
+    name = "single_benchmark",
+    srcs = ["gemma/benchmark.cc"],
+    deps = [
+        ":app",
+        ":args",
+        ":benchmark_helper",
+        ":common",
+        ":cross_entropy",
+        ":gemma_lib",
+        "//compression:io",
+        "@hwy//:hwy",
+        "@hwy//:nanobenchmark",
+        "@hwy//:thread_pool",
+        "@nlohmann_json//:json",
+    ],
+)
+cc_binary(
+    name = "benchmarks",
+    srcs = ["gemma/benchmarks.cc"],
+    deps = [
+        ":benchmark_helper",
+        "@benchmark//:benchmark",
+    ],
+)
+cc_binary(
+    name = "debug_prompt",
+    srcs = [
+        "debug_prompt.cc",
+    ],
+    deps = [
+        ":app",
+        ":args",
+        ":benchmark_helper",
+        ":gemma_lib",
+        "//compression:io",
+        "@hwy//:hwy",
+        "@hwy//:thread_pool",
+        "@nlohmann_json//:json",
+    ],
+)
+cc_binary(
+    name = "gemma_mmlu",
+    srcs = ["gemma/run_mmlu.cc"],
+    deps = [
+        ":app",
+        ":args",
+        ":benchmark_helper",
+        ":gemma_lib",
+        "//compression:io",
+        "@hwy//:hwy",
+        "@hwy//:profiler",
+        "@hwy//:thread_pool",
+        "@nlohmann_json//:json",
+    ],
+)
+cc_library(
+    name = "prompt",
+    hdrs = ["backprop/prompt.h"],
+    deps = [],
+)
+cc_library(
+    name = "sampler",
+    hdrs = ["backprop/sampler.h"],
+    deps = [
+        ":prompt",
+    ],
+)
+cc_library(
+    name = "backprop",
+    srcs = [
+        "backprop/backward.cc",
+        "backprop/forward.cc",
+    ],
+    hdrs = [
+        "backprop/backward.h",
+        "backprop/backward-inl.h",
+        "backprop/forward.h",
+        "backprop/forward-inl.h",
+    ],
+    deps = [
+        ":common",
+        ":gemma_lib",
+        ":ops",
+        ":prompt",
+        ":weights",
+        "@hwy//:hwy",  # base.h
+        "@hwy//:thread_pool",
+    ],
+)
+cc_library(
+    name = "backprop_scalar",
+    hdrs = [
+        "backprop/backward_scalar.h",
+        "backprop/common_scalar.h",
+        "backprop/forward_scalar.h",
+    ],
+    deps = [
+        ":common",
+        ":gemma_lib",
+        ":prompt",
+        ":weights_raw",
+    ],
+)
+cc_test(
+    name = "backward_scalar_test",
+    size = "large",
+    srcs = [
+        "backprop/backward_scalar_test.cc",
+        "backprop/test_util.h",
+    ],
+    deps = [
+        ":backprop_scalar",
+        ":prompt",
+        ":sampler",
+        ":weights_raw",
+        "@googletest//:gtest_main",
+    ],
+)
+cc_test(
+    name = "backward_test",
+    size = "large",
+    srcs = [
+        "backprop/backward_test.cc",
+        "backprop/test_util.h",
+    ],
+    deps = [
+        ":backprop",
+        ":backprop_scalar",
+        ":gemma_lib",
+        ":ops",
+        ":sampler",
+        ":weights_raw",
+        "@googletest//:gtest_main",
+        "@hwy//:hwy",
+        "@hwy//:hwy_test_util",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_library(
+    name = "optimizer",
+    srcs = [
+        "backprop/optimizer.cc",
+    ],
+    hdrs = [
+        "backprop/optimizer.h",
+    ],
+    deps = [
+        ":common",
+        ":weights",
+        "//compression:compress",
+        "@hwy//:hwy",
+        "@hwy//:thread_pool",
+    ],
+)
+cc_test(
+    name = "optimize_test",
+    srcs = [
+        "backprop/optimize_test.cc",
+    ],
+    deps = [
+        ":backprop",
+        ":common",
+        ":gemma_lib",
+        ":optimizer",
+        ":prompt",
+        ":sampler",
+        ":weights",
+        "@googletest//:gtest_main",
+        "@hwy//:thread_pool",
+    ],
+)

gemma.cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,149 @@

+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+cmake_minimum_required(VERSION 3.11)
+include(FetchContent)
+project(gemma)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 457c891775a7397bdb0376bb1031e6e027af1c48 EXCLUDE_FROM_ALL)
+FetchContent_MakeAvailable(highway)
+## Note: absl needs to be installed by sentencepiece. This will only happen if
+## cmake is invoked with -DSPM_ENABLE_SHARED=OFF and -DSPM_ABSL_PROVIDER=module
+FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c EXCLUDE_FROM_ALL)
+FetchContent_MakeAvailable(sentencepiece)
+FetchContent_Declare(json GIT_REPOSITORY https://github.com/nlohmann/json.git GIT_TAG 9cca280a4d0ccf0c08f47a99aa71d1b0e52f8d03 EXCLUDE_FROM_ALL)
+FetchContent_MakeAvailable(json)
+FetchContent_Declare(benchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG v1.8.2 EXCLUDE_FROM_ALL)
+FetchContent_MakeAvailable(benchmark)
+set(SOURCES
+  compression/blob_store.cc
+  compression/blob_store.h
+  compression/compress.h
+  compression/compress-inl.h
+  compression/io_win.cc
+  compression/io.cc
+  compression/io.h
+  compression/nuq.h
+  compression/nuq-inl.h
+  compression/sfp.h
+  compression/sfp-inl.h
+  compression/test_util.h
+  backprop/backward.cc
+  backprop/backward.h
+  backprop/backward-inl.h
+  backprop/backward_scalar.h
+  backprop/common_scalar.h
+  backprop/forward.cc
+  backprop/forward.h
+  backprop/forward-inl.h
+  backprop/forward_scalar.h
+  backprop/optimizer.cc
+  backprop/optimizer.h
+  gemma/configs.h
+  gemma/activations.h
+  gemma/benchmark_helper.cc
+  gemma/benchmark_helper.h
+  gemma/common.cc
+  gemma/common.h
+  gemma/cross_entropy.cc
+  gemma/cross_entropy.h
+  gemma/gemma.cc
+  gemma/gemma.h
+  gemma/ops.h
+  gemma/weights.cc
+  gemma/weights.h
+  gemma/weights_raw.h
+  util/app.h
+  util/args.h
+  )
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release")
+endif()
+FetchContent_GetProperties(sentencepiece)
+## Library Target
+add_library(libgemma ${SOURCES})
+set_property(TARGET libgemma PROPERTY CXX_STANDARD 17)
+set_target_properties(libgemma PROPERTIES PREFIX "")
+set_property(TARGET libgemma PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(libgemma PUBLIC ./)
+target_link_libraries(libgemma hwy hwy_contrib sentencepiece-static)
+target_include_directories(libgemma PUBLIC ${sentencepiece_SOURCE_DIR})
+target_compile_definitions(libgemma PRIVATE $<$<PLATFORM_ID:Windows>:_CRT_SECURE_NO_WARNINGS NOMINMAX>)
+target_compile_options(libgemma PRIVATE $<$<PLATFORM_ID:Windows>:-Wno-deprecated-declarations>)
+install(TARGETS libgemma DESTINATION lib)
+# Executable Target
+add_executable(gemma gemma/run.cc)
+target_link_libraries(gemma libgemma hwy hwy_contrib)
+install(TARGETS gemma DESTINATION bin)
+add_executable(single_benchmark gemma/benchmark.cc)
+target_link_libraries(single_benchmark libgemma hwy hwy_contrib nlohmann_json::nlohmann_json)
+add_executable(benchmarks gemma/benchmarks.cc)
+target_link_libraries(benchmarks libgemma hwy hwy_contrib nlohmann_json::nlohmann_json benchmark)
+add_executable(debug_prompt debug_prompt.cc)
+target_link_libraries(debug_prompt libgemma hwy hwy_contrib nlohmann_json::nlohmann_json)
+## Tests
+set(GEMMA_ENABLE_TESTS OFF CACHE BOOL "Enable Gemma tests")
+if (GEMMA_ENABLE_TESTS)
+enable_testing()
+include(GoogleTest)
+set(GEMMA_TEST_FILES
+  backprop/backward_test.cc
+  backprop/backward_scalar_test.cc
+  backprop/optimize_test.cc
+  gemma/ops_test.cc
+  gemma/gemma_test.cc
+)
+foreach (TESTFILE IN LISTS GEMMA_TEST_FILES)
+  # The TESTNAME is the name without the extension or directory.
+  get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
+  add_executable(${TESTNAME} ${TESTFILE})
+  # Test all targets, not just the best/baseline. This changes the default
+  # policy to all-attainable; note that setting -DHWY_COMPILE_* directly can
+  # cause compile errors because only one may be set, and other CMakeLists.txt
+  # that include us may set them.
+  target_compile_options(${TESTNAME} PRIVATE -DHWY_IS_TEST=1)
+  target_link_libraries(${TESTNAME} PRIVATE libgemma GTest::gtest_main hwy hwy_contrib hwy_test)
+  gtest_discover_tests(${TESTNAME})
+endforeach ()
+endif()  # GEMMA_ENABLE_TESTS
+## Tools
+add_executable(compress_weights gemma/compress_weights.cc)
+target_link_libraries(compress_weights libgemma hwy hwy_contrib)

gemma.cpp/CMakePresets.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+    "version": 3,
+    "cmakeMinimumRequired": {
+      "major": 3,
+      "minor": 11,
+      "patch": 0
+    },
+    "configurePresets": [
+      {
+        "name": "__defaults__",
+        "hidden": true,
+        "binaryDir": "${sourceDir}/build"
+      },
+      {
+        "name": "make",
+        "inherits": "__defaults__",
+        "displayName": "Make",
+        "description": "Unix Makefiles",
+        "generator": "Unix Makefiles",
+        "binaryDir": "${sourceDir}/build"
+      },
+      {
+        "name": "windows",
+        "inherits": "__defaults__",
+        "displayName": "Windows",
+        "description": "Visual Studio 2022 with Clang/LLVM frontend",
+        "generator": "Visual Studio 17 2022",
+        "toolset": "ClangCL",
+        "condition": {
+          "type": "equals",
+          "lhs": "${hostSystemName}",
+          "rhs": "Windows"
+        }
+      }
+    ],
+    "buildPresets": [
+      {
+        "name": "__defaults__",
+        "hidden": true,
+        "targets": [
+            "gemma",
+            "libgemma"
+        ]
+      },
+      {
+        "name": "make",
+        "inherits": "__defaults__",
+        "displayName": "Unix Makefiles",
+        "configurePreset": "make"
+      },
+      {
+        "name": "windows",
+        "inherits": "__defaults__",
+        "displayName": "Windows",
+        "configuration": "Release",
+        "configurePreset": "windows"
+      }
+    ]
+  }

gemma.cpp/DEVELOPERS.md ADDED Viewed

	@@ -0,0 +1,203 @@

+# Developer Notes
+## Motivation: A Minimalist C++ LLM Runtime for Research and Experimentation
+In the past, neural network inference has been similar to a simple, opaque,
+stateless function function with a single input and output. By contrast,
+foundation model runtimes are better considered as systems with multiple forms
+of state, subsystems, and heterogeneous inputs and outputs. They are often
+integrated with a wide variety of other systems that have their own resources
+(e.g. RAG and tools) and potentially interact with an external environment. They
+have become compute engines to embed proximal tasks and goals within expansively
+broad, general-purpose world models.
+With this in mind, we believe that developing an experimental runtime that is
+flexible and approachable will allow us to explore the design space of co-design
+between high level model concerns and low-level runtime computation.
+## Design Priorities
+Given these motivations, we propose the following priorities for
+making decisions regarding the direction and design of the codebase.
+**Maximize Leverage with a Narrow Scope.** We focus on direct implementations of
+foundation models like Gemma. This allows us to focus effort on bottlenecks of
+specific models. We are willing to trade off generality to keep implementation
+code relatively simple and readable at all layers of the stack, achieve good
+performance, and maintain the velocity of a small team.
+**Data Oriented Design.** Follow data oriented design principles where possible
+to minimize unnecessary performance pessimization. It's best to apply these
+optimizations during the initial design, or when refactoring a subcomponent. The
+first step is to think in terms of batches or tuples of plain old data (POD)
+types: separate arrays, instead of an array of structs. The second is to
+de-emphasize control flow (if statements, virtual functions and class
+hierarchies). The third step is to know intrinsic properties of data and bake
+that into the layout and algorithm.
+**Prioritize Small Batch Latency** Since production serving solutions are
+available for large-scale serving powered by accelerators and optimizing for
+throughput, this project focuses on the possibilities of local, interactive use
+of foundation models. Although throughput remains important, low latency and
+small batch sizes are prioritized, other things being equal.
+**Maintain a Portable Baseline** Our starting point is a portable CPU SIMD (via
+[highway](https://github.com/google/highway)). We expect to add accelerator and
+hybrid CPU/GPU support in the future, but the project should continue to allow
+builds using this portable baseline. This ensures that research-oriented and
+experimental runtimes and hardware platforms will have a minimum viable option
+to run Gemma even if specialized production-ready deployment paths are not
+available.
+## Code Organization
+The implementation code is roughly split into 4 layers, from high to low level:
+1.  Frontends (`run.cc`) - Either interactive interfaces or automation
+    orchestration that interacts. Frontend code implements a use case objective
+    in terms of invocations to model inference and generation (2). Projects that
+    use gemma.cpp as a library are considered alternative frontends to `run.cc`.
+    We will add examples of additional frontends in the future.
+2.  Models (`gemma.cc`, `gemma.h`, `configs.h`) - Implements the compute graph
+    of the model including supporting functions such as loading and compressing
+    weights using transformer operations provided by layer (3).
+3.  Operations (`ops.h`) - A minimal set of transformer and supporting
+    mathematical operations implementations using compute backends (4). This
+    code should be agnostic to the specifics of the compute graph of the model
+    implementation (2).
+4.  Backend (`highway`) - Low-level hardware interface (SIMD in the case of
+    highway) supporting the implementations in (3).
+Besides these layers, supporting utilities are:
+- `compression/` - model compression operations. The 8-bit switched floating
+  point model conversion is here.
+- `util/` - command line argument handling and any other utilities.
+## Style and Formatting
+A `.clang-format` configuration is provided with our defaults, please run source
+files through `clang-format` (or a formatter that produces equivalent behavior)
+before finalizing PR for submission.
+## Converting weights
+We use a stripped down binary blob (.sbs) artifact to accelerate weight loading
+in C++. These files can be downloaded directly from Kaggle and HuggingFace. You
+can also convert Pytorch or Keras checkpoints to .sbs, but most end users should
+not have to do this.
+If starting with Keras, first run this script to convert to Pytorch:
+https://github.com/keras-team/keras-nlp/blob/master/tools/gemma/export_gemma_to_torch_xla.py
+From Pytorch, use the following script to generate uncompressed weights:
+https://github.com/google/gemma.cpp/blob/dev/util/convert_weights.py
+Then run gemma/compress_weights.cc (Bazel target :compress_weights), specifying
+the resulting file as `--weights` and the desired .sbs name as the
+`--compressed_weights`.
+## Compile-Time Flags (Advanced)
+There are several compile-time flags to be aware of (note these may or may not
+be exposed to the build system):
+- `GEMMA_MAX_SEQ_LEN` : Sets maximum sequence length to preallocate for the KV
+  Cache. The default is 4096 tokens but can be overridden. This is not exposed
+  through `CMakeLists.txt` yet.
+In the medium term this will likely be deprecated in favor of handling options
+at runtime - dynamically resizing the KV cache as needed.
+## Using gemma.cpp as a Library (Advanced)
+Unless you are doing lower level implementations or research, from an
+application standpoint you can think of gemma.h and gemma.cc as the "core" of
+the library.
+You can regard `run.cc` as an example application that your own application is
+substituting for, so the invocations into gemma.h and gemma.cc you see in
+`run.cc` are probably the functions you'll be invoking. You can find examples of
+the invocations to tokenizer methods and `Generate()` in `run.cc`.
+Keep in mind gemma.cpp is oriented at more experimental / prototype / research
+applications. If you're targeting production, there's more standard paths via
+jax / pytorch / keras / XNNPACK for NN deployments.
+### Gemma struct contains all the state of the inference engine - tokenizer, weights, and activations
+`Gemma(...)` - constructor, creates a gemma model object.
+In a standard LLM chat app, you'll probably use a Gemma object directly, in
+more exotic data processing or research applications, you might decompose
+working with weights, kv cache and activations (e.g. you might have multiple kv
+caches and activations for a single set of weights) more directly rather than
+only using a Gemma object.
+### Use the tokenizer in the Gemma object (or interact with the Tokenizer object directly)
+The Gemma object contains contains a pointer to a Tokenizer object. The main
+operations performed on the tokenizer are to load the tokenizer model from a
+file (usually `tokenizer.spm`), call `Encode()` to go from string prompts to
+token id vectors, or `Decode()` to go from token id vector outputs from the
+model back to strings. `benchmark_helper.h` provides wrapper functions that make
+them easier to use.
+### `model.Generate()` is the entrypoint for token generation
+Calling into `model.Generate` with a tokenized prompt will
+1.  mutate the activation values in `model` and
+2.  invoke `StreamFunc` - a lambda callback for each generated token.
+Your application defines its own `StreamFunc` as a lambda callback to do
+something every time a token string is streamed from the engine (e.g., print to
+the screen, write data to the disk, send the string to a server, etc.). You can
+see in `run.cc` the `StreamFunc` lambda takes care of printing each token to the
+screen as it arrives.
+Optionally you can define `accept_token` as another lambda - this is mostly for
+constrained decoding type of use cases where you want to force the generation to
+fit a grammar. If you're not doing this, you can send an empty lambda or
+`std::function` as a no-op which is what `run.cc` does.
+### `Transformer()` implements the inference (i.e. `forward()` method in PyTorch or Jax) computation of the neural network
+For high-level applications, you might only call `model.Generate()` and never
+interact directly with the neural network, but if you're doing something a bit
+more custom you can call transformer which performs a single inference operation
+on a single token and mutates the Activations and the KVCache through the neural
+network computation.
+Note that an experimental backward pass is available in backprop/, which may be
+useful for fine tuning.
+### For low level operations, defining new architectures, call `ops.h` functions directly
+You use `ops.h` if you're writing other NN architectures or modifying the
+inference path of the Gemma model.
+## Building with Bazel
+The sentencepiece library we depend on requires some additional work to build
+with the Bazel build system. First, it does not export its BUILD file, so we
+provide `bazel/sentencepiece.bazel`. Second, it ships with a vendored subset of
+the Abseil library. `bazel/sentencepiece.patch` changes the code to support
+Abseil as a standalone dependency without third_party/ prefixes, similar to the
+transforms we apply to Gemma via Copybara.
+## Debugging
+At the first sign of incorrect or unexpected results, we recommend running with
+ASan/MSan enabled. When using blaze/bazel, you can add `--config=asan` or
+`--config=msan-track-origins` to the build command. In addition to their checks
+for memory overruns or uninitialized memory, we also enable debug-only asserts
+in Gemma.cpp for those build configurations.
+## Discord
+We're also trying out a discord server for discussion here -
+https://discord.gg/H5jCBAWxAe

gemma.cpp/LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

gemma.cpp/LICENSE-BSD3 ADDED Viewed

	@@ -0,0 +1,26 @@

+Copyright (c) The gemma.cpp Project Authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+1.  Redistributions of source code must retain the above copyright notice, this
+    list of conditions and the following disclaimer.
+2.  Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+3.  Neither the name of the copyright holder nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

gemma.cpp/MODULE.bazel ADDED Viewed

	@@ -0,0 +1,72 @@

+module(
+    name = "gemma",
+    version = "0.1.0",
+)
+bazel_dep(name = "rules_license", version = "0.0.7")
+bazel_dep(name = "googletest", version = "1.14.0")
+# Copied from Highway because Bazel does not load them transitively
+bazel_dep(name = "bazel_skylib", version = "1.4.1")
+bazel_dep(name = "rules_cc", version = "0.0.9")
+bazel_dep(name = "platforms", version = "0.0.7")
+http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+http_archive(
+    name = "hwy",
+    urls = ["https://github.com/google/highway/archive/refs/tags/1.2.0.zip"],
+    integrity = "sha256-fbtKAGj5hhhBr5Bggtsrj4aIodC2OHb1njB8LGfom8A=",    strip_prefix = "highway-1.2.0",
+)
+http_archive(
+    name = "nlohmann_json",
+    urls = ["https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.zip"],
+    integrity = "sha256-BAIrBdgG61/3MCPCgLaGl9Erk+G3JnoLIqGjnsdXgGk=",
+    strip_prefix = "json-3.11.3",
+)
+http_archive(
+    name = "com_google_sentencepiece",
+    sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
+    strip_prefix = "sentencepiece-0.1.96",
+    urls = ["https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip"],
+    build_file = "@//bazel:sentencepiece.bazel",
+    patches = ["@//bazel:sentencepiece.patch"],
+    patch_args = ["-p1"],
+)
+# For sentencepiece
+http_archive(
+    name = "darts_clone",
+    build_file_content = """
+licenses(["notice"])
+exports_files(["LICENSE"])
+package(default_visibility = ["//visibility:public"])
+cc_library(
+    name = "darts_clone",
+    hdrs = [
+        "include/darts.h",
+    ],
+)
+""",
+    sha256 = "c97f55d05c98da6fcaf7f9ecc6a6dc6bc5b18b8564465f77abff8879d446491c",
+    strip_prefix = "darts-clone-e40ce4627526985a7767444b6ed6893ab6ff8983",
+    urls = [
+        "https://github.com/s-yata/darts-clone/archive/e40ce4627526985a7767444b6ed6893ab6ff8983.zip",
+    ],
+)
+# ABSL on 2023-10-18
+http_archive(
+    name = "com_google_absl",
+    sha256 = "f841f78243f179326f2a80b719f2887c38fe226d288ecdc46e2aa091e6aa43bc",
+    strip_prefix = "abseil-cpp-9687a8ea750bfcddf790372093245a1d041b21a3",
+    urls = ["https://github.com/abseil/abseil-cpp/archive//9687a8ea750bfcddf790372093245a1d041b21a3.tar.gz"],
+)
+# Benchmark
+http_archive(
+    name = "benchmark",
+    urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.8.2.tar.gz"],
+    integrity = "sha256-KqspgNA3YTf5adkoSPu2gharsHYzA0U0/IxlzE56DpM=",
+    strip_prefix = "benchmark-1.8.2",
+)

gemma.cpp/README.md ADDED Viewed

	@@ -0,0 +1,493 @@

+# gemma.cpp
+gemma.cpp is a lightweight, standalone C++ inference engine for the Gemma
+foundation models from Google.
+For additional information about Gemma, see
+[ai.google.dev/gemma](https://ai.google.dev/gemma). Model weights, including gemma.cpp
+specific artifacts, are [available on
+kaggle](https://www.kaggle.com/models/google/gemma).
+NOTE: 2024-04-04: if using 2B models, please re-download weights from Kaggle and
+ensure you have the latest version (-mqa or version 3). We are changing the code
+to match the new weights. If you wish to use old weights, change `ConfigGemma2B`
+in `configs.h` back to `kVocabSize = 256128` and `kKVHeads = 8`.
+## Who is this project for?
+Modern LLM inference engines are sophisticated systems, often with bespoke
+capabilities extending beyond traditional neural network runtimes. With this
+comes opportunities for research and innovation through co-design of high level
+algorithms and low-level computation. However, there is a gap between
+deployment-oriented C++ inference runtimes, which are not designed for
+experimentation, and Python-centric ML research frameworks, which abstract away
+low-level computation through compilation.
+gemma.cpp provides a minimalist implementation of Gemma 2B and 7B models,
+focusing on simplicity and directness rather than full generality. This is
+inspired by vertically-integrated model implementations such as
+[ggml](https://github.com/ggerganov/ggml),
+[llama.c](https://github.com/karpathy/llama2.c), and
+[llama.rs](https://github.com/srush/llama2.rs).
+gemma.cpp targets experimentation and research use cases. It is intended to be
+straightforward to embed in other projects with minimal dependencies and also
+easily modifiable with a small ~2K LoC core implementation (along with ~4K LoC
+of supporting utilities). We use the [Google
+Highway](https://github.com/google/highway) Library to take advantage of
+portable SIMD for CPU inference.
+For production-oriented edge deployments we recommend standard deployment
+pathways using Python frameworks like JAX, Keras, PyTorch, and Transformers
+([all model variations here](https://www.kaggle.com/models/google/gemma)).
+## Contributing
+Community contributions large and small are welcome. See
+[DEVELOPERS.md](https://github.com/google/gemma.cpp/blob/main/DEVELOPERS.md)
+for additional notes contributing developers and [join the discord by following
+this invite link](https://discord.gg/H5jCBAWxAe). This project follows
+[Google's Open Source Community
+Guidelines](https://opensource.google.com/conduct/).
+*Active development is currently done on the `dev` branch. Please open pull
+requests targeting `dev` branch instead of `main`, which is intended to be more
+stable.*
+## Quick Start
+### System requirements
+Before starting, you should have installed:
+- [CMake](https://cmake.org/)
+- [Clang C++ compiler](https://clang.llvm.org/get_started.html), supporting at
+  least C++17.
+- `tar` for extracting archives from Kaggle.
+Building natively on Windows requires the Visual Studio 2012 Build Tools with the
+optional Clang/LLVM C++ frontend (`clang-cl`). This can be installed from the
+command line with
+[`winget`](https://learn.microsoft.com/en-us/windows/package-manager/winget/):
+```sh
+winget install --id Kitware.CMake
+winget install --id Microsoft.VisualStudio.2022.BuildTools --force --override "--passive --wait --add Microsoft.VisualStudio.Workload.VCTools;installRecommended --add Microsoft.VisualStudio.Component.VC.Llvm.Clang --add Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset"
+```
+### Step 1: Obtain model weights and tokenizer from Kaggle or Hugging Face Hub
+Visit [the Gemma model page on
+Kaggle](https://www.kaggle.com/models/google/gemma/frameworks/gemmaCpp) and select `Model Variations
+|> Gemma C++`. On this tab, the `Variation` dropdown includes the options below.
+Note bfloat16 weights are higher fidelity, while 8-bit switched floating point
+weights enable faster inference. In general, we recommend starting with the
+`-sfp` checkpoints.
+Alternatively, visit the [gemma.cpp](https://huggingface.co/models?other=gemma.cpp)
+models on the Hugging Face Hub. First go the the model repository of the model of interest
+(see recommendations below). Then, click the `Files and versions` tab and download the
+model and tokenizer files. For programmatic downloading, if you have `huggingface_hub`
+installed, you can also download by running:
+```
+huggingface-cli login # Just the first time
+huggingface-cli download google/gemma-2b-sfp-cpp --local-dir build/
+```
+2B instruction-tuned (`it`) and pre-trained (`pt`) models:
+| Model name  | Description |
+| ----------- | ----------- |
+| `2b-it`     | 2 billion parameter instruction-tuned model, bfloat16 |
+| `2b-it-sfp` | 2 billion parameter instruction-tuned model, 8-bit switched floating point |
+| `2b-pt`     | 2 billion parameter pre-trained model, bfloat16 |
+| `2b-pt-sfp` | 2 billion parameter pre-trained model, 8-bit switched floating point |
+7B instruction-tuned (`it`) and pre-trained (`pt`) models:
+| Model name  | Description |
+| ----------- | ----------- |
+| `7b-it`     | 7 billion parameter instruction-tuned model, bfloat16 |
+| `7b-it-sfp` | 7 billion parameter instruction-tuned model, 8-bit switched floating point |
+| `7b-pt`     | 7 billion parameter pre-trained model, bfloat16 |
+| `7b-pt-sfp` | 7 billion parameter pre-trained model, 8-bit switched floating point |
+> [!NOTE]
+> **Important**: We strongly recommend starting off with the `2b-it-sfp` model to
+> get up and running.
+### Step 2: Extract Files
+If you downloaded the models from Hugging Face, skip to step 3.
+After filling out the consent form, the download should proceed to retrieve a
+tar archive file `archive.tar.gz`. Extract files from `archive.tar.gz` (this can
+take a few minutes):
+```
+tar -xf archive.tar.gz
+```
+This should produce a file containing model weights such as `2b-it-sfp.sbs` and
+a tokenizer file (`tokenizer.spm`). You may want to move these files to a
+convenient directory location (e.g. the `build/` directory in this repo).
+### Step 3: Build
+The build system uses [CMake](https://cmake.org/). To build the gemma inference
+runtime, create a build directory and generate the build files using `cmake`
+from the top-level project directory. Note if you previous ran `cmake` and are
+re-running with a different setting, be sure to delete all files in the `build/`
+directory with `rm -rf build/*`.
+#### Unix-like Platforms
+```sh
+cmake -B build
+```
+After running `cmake`, you can enter the `build/` directory and run `make` to
+build the `./gemma` executable:
+```sh
+# Configure `build` directory
+cmake --preset make
+# Build project using make
+cmake --build --preset make -j [number of parallel threads to use]
+```
+Replace `[number of parallel threads to use]` with a number - the number of
+cores available on your system is a reasonable heuristic.  For example,
+`make -j4 gemma` will build using 4 threads. If the `nproc` command is
+available, you can use `make -j$(nproc) gemma` as a reasonable default
+for the number of threads.
+If you aren't sure of the right value for the `-j` flag, you can simply run
+`make gemma` instead and it should still build the `./gemma` executable.
+> [!NOTE]
+> On Windows Subsystem for Linux (WSL) users should set the number of
+> parallel threads to 1. Using a larger number may result in errors.
+If the build is successful, you should now have a `gemma` executable in the `build/` directory.
+#### Windows
+```sh
+# Configure `build` directory
+cmake --preset windows
+# Build project using Visual Studio Build Tools
+cmake --build --preset windows -j [number of parallel threads to use]
+```
+If the build is successful, you should now have a `gemma.exe` executable in the `build/` directory.
+#### Bazel
+```sh
+bazel build -c opt --cxxopt=-std=c++20 :gemma
+```
+If the build is successful, you should now have a `gemma` executable in the `bazel-bin/` directory.
+#### Make
+If you prefer Makefiles, @jart has made one available here:
+https://github.com/jart/gemma3/blob/main/Makefile
+### Step 4: Run
+You can now run `gemma` from inside the `build/` directory.
+`gemma` has the following required arguments:
+Argument        | Description                  | Example value
+--------------- | ---------------------------- | -----------------------
+`--model`       | The model type.              | `2b-it` ... (see below)
+`--weights`     | The compressed weights file. | `2b-it-sfp.sbs`
+`--weight_type` | The compressed weight type.  | `sfp`
+`--tokenizer`   | The tokenizer file.          | `tokenizer.spm`
+`gemma` is invoked as:
+```sh
+./gemma \
+--tokenizer [tokenizer file] \
+--weights [compressed weights file] \
+--weight_type [f32 or bf16 or sfp] \
+--model [2b-it or 2b-pt or 7b-it or 7b-pt or ...]
+```
+Example invocation for the following configuration:
+- Compressed weights file `2b-it-sfp.sbs` (2B instruction-tuned model, 8-bit
+  switched floating point).
+- Tokenizer file `tokenizer.spm`.
+```sh
+./gemma \
+--tokenizer tokenizer.spm \
+--weights 2b-it-sfp.sbs --weight_type sfp --model 2b-it
+```
+### RecurrentGemma
+This repository includes a version of Gemma based on Griffin
+([paper](https://arxiv.org/abs/2402.19427),
+[code](https://github.com/google-deepmind/recurrentgemma)). Its architecture
+includes both recurrent layers and local attention, thus it is more efficient
+for longer sequences and has a smaller memory footprint than standard Gemma. We
+here provide a C++ implementation of this model based on the paper.
+To use the recurrent version of Gemma included in this repository, build the
+gemma binary as noted above in Step 3. Download the compressed weights and
+tokenizer from the RecurrentGemma
+[Kaggle](https://www.kaggle.com/models/google/recurrentgemma/gemmaCpp) as in
+Step 1, and run the binary as follows:
+`./gemma --tokenizer tokenizer.spm --model gr2b-it --weights 2b-it-sfp.sbs`
+### Troubleshooting and FAQs
+**Running `./gemma` fails with "Failed to read cache gating_ein_0 (error 294) ..."**
+The most common problem is that the `--weight_type` argument does not match that
+of the model file. Revisit step #3 and check which weights you downloaded.
+Note that we have already moved weight type from a compile-time decision to a
+runtime argument. In a subsequent step, we plan to bake this information into
+the weights.
+**Problems building in Windows / Visual Studio**
+Currently if you're using Windows, we recommend building in WSL (Windows
+Subsystem for Linux). We are exploring options to enable other build
+configurations, see issues for active discussion.
+**Model does not respond to instructions and produces strange output**
+A common issue is that you are using a pre-trained model, which is not
+instruction-tuned and thus does not respond to instructions. Make sure you are
+using an instruction-tuned model (`2b-it-sfp`, `2b-it`, `7b-it-sfp`, `7b-it`)
+and not a pre-trained model (any model with a `-pt` suffix).
+**How do I convert my fine-tune to a `.sbs` compressed model file?**
+We're working on a python script to convert a standard model format to `.sbs`,
+and hope have it available in the next week or so. Follow [this
+issue](https://github.com/google/gemma.cpp/issues/11) for updates.
+**What are some easy ways to make the model run faster?**
+1. Make sure you are using the 8-bit switched floating point `-sfp` models.
+2. If you're on a laptop, make sure power mode is set to maximize performance
+and saving mode is **off**. For most laptops, the power saving modes get
+activated automatically if the computer is not plugged in.
+3. Close other unused cpu-intensive applications.
+4. On macs, anecdotally we observe a "warm-up" ramp-up in speed as performance
+cores get engaged.
+5. Experiment with the `--num_threads` argument value. Depending on the device,
+larger numbers don't always mean better performance.
+We're also working on algorithmic and optimization approaches for faster
+inference, stay tuned.
+## Usage
+`gemma` has different usage modes, controlled by the verbosity flag.
+All usage modes are currently interactive, triggering text generation upon
+newline input.
+| Verbosity       | Usage mode | Details                                       |
+| --------------- | ---------- | --------------------------------------------- |
+| `--verbosity 0` | Minimal | Only prints generation output. Suitable as a CLI tool. |
+| `--verbosity 1` | Default | Standard user-facing terminal UI. |
+| `--verbosity 2` | Detailed | Shows additional developer and debug info. |
+### Interactive Terminal App
+By default, verbosity is set to 1, bringing up a terminal-based interactive
+interface when `gemma` is invoked:
+```console
+$ ./gemma [...]
+  __ _  ___ _ __ ___  _ __ ___   __ _   ___ _ __  _ __
+ / _` |/ _ \ '_ ` _ \| '_ ` _ \ / _` | / __| '_ \| '_ \
+| (_| |  __/ | | | | | | | | | | (_| || (__| |_) | |_) |
+ \__, |\___|_| |_| |_|_| |_| |_|\__,_(_)___| .__/| .__/
+  __/ |                                    | |   | |
+ |___/                                     |_|   |_|
+tokenizer                     : tokenizer.spm
+compressed_weights            : 2b-it-sfp.sbs
+model                         : 2b-it
+weights                       : [no path specified]
+max_tokens                    : 3072
+max_generated_tokens          : 2048
+*Usage*
+  Enter an instruction and press enter (%C reset conversation, %Q quits).
+*Examples*
+  - Write an email to grandma thanking her for the cookies.
+  - What are some historical attractions to visit around Massachusetts?
+  - Compute the nth fibonacci number in javascript.
+  - Write a standup comedy bit about WebGPU programming.
+> What are some outdoorsy places to visit around Boston?
+[ Reading prompt ] .....................
+**Boston Harbor and Islands:**
+* **Boston Harbor Islands National and State Park:** Explore pristine beaches, wildlife, and maritime history.
+* **Charles River Esplanade:** Enjoy scenic views of the harbor and city skyline.
+* **Boston Harbor Cruise Company:** Take a relaxing harbor cruise and admire the city from a different perspective.
+* **Seaport Village:** Visit a charming waterfront area with shops, restaurants, and a seaport museum.
+**Forest and Nature:**
+* **Forest Park:** Hike through a scenic forest with diverse wildlife.
+* **Quabbin Reservoir:** Enjoy boating, fishing, and hiking in a scenic setting.
+* **Mount Forest:** Explore a mountain with breathtaking views of the city and surrounding landscape.
+...
+```
+### Usage as a Command Line Tool
+For using the `gemma` executable as a command line tool, it may be useful to
+create an alias for gemma.cpp with arguments fully specified:
+```sh
+alias gemma2b="~/gemma.cpp/build/gemma -- --tokenizer ~/gemma.cpp/build/tokenizer.spm --weights ~/gemma.cpp/build/2b-it-sfp.sbs --model 2b-it --verbosity 0"
+```
+Replace the above paths with your own paths to the model and tokenizer paths
+from the download.
+Here is an example of prompting `gemma` with a truncated input
+file (using a `gemma2b` alias like defined above):
+```sh
+cat configs.h | tail -35 | tr '\n' ' ' | xargs -0 echo "What does this C++ code do: " | gemma2b
+```
+> [!NOTE]
+> CLI usage of gemma.cpp is experimental and should take context length
+> limitations into account.
+The output of the above command should look like:
+```console
+$ cat configs.h | tail -35 | tr '\n' ' ' | xargs -0 echo "What does this C++ code do: " | gemma2b
+[ Reading prompt ] ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
+The code defines two C++ structs, `ConfigGemma7B` and `ConfigGemma2B`, which are used for configuring a deep learning model.
+**ConfigGemma7B**:
+* `kSeqLen`: Stores the length of the sequence to be processed. It's set to 7168.
+* `kVocabSize`: Stores the size of the vocabulary, which is 256128.
+* `kLayers`: Number of layers in the deep learning model. It's set to 28.
+* `kModelDim`: Dimension of the model's internal representation. It's set to 3072.
+* `kFFHiddenDim`: Dimension of the feedforward and recurrent layers' hidden representations. It's set to 16 * 3072 / 2.
+**ConfigGemma2B**:
+* `kSeqLen`: Stores the length of the sequence to be processed. It's also set to 7168.
+* `kVocabSize`: Size of the vocabulary, which is 256128.
+* `kLayers`: Number of layers in the deep learning model. It's set to 18.
+* `kModelDim`: Dimension of the model's internal representation. It's set to 2048.
+* `kFFHiddenDim`: Dimension of the feedforward and recurrent layers' hidden representations. It's set to 16 * 2048 / 2.
+These structs are used to configure a deep learning model with specific parameters for either Gemma7B or Gemma2B architecture.
+```
+### Incorporating gemma.cpp as a Library in your Project
+The easiest way to incorporate gemma.cpp in your own project is to pull in
+gemma.cpp and dependencies using `FetchContent`. You can add the following to your
+CMakeLists.txt:
+```
+include(FetchContent)
+FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c)
+FetchContent_MakeAvailable(sentencepiece)
+FetchContent_Declare(gemma GIT_REPOSITORY https://github.com/google/gemma.cpp GIT_TAG origin/main)
+FetchContent_MakeAvailable(gemma)
+FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG da250571a45826b21eebbddc1e50d0c1137dee5f)
+FetchContent_MakeAvailable(highway)
+```
+Note for the gemma.cpp `GIT_TAG`, you may replace `origin/main` for a specific
+commit hash if you would like to pin the library version.
+After your executable is defined (substitute your executable name for
+`[Executable Name]` below):
+```
+target_link_libraries([Executable Name] libgemma hwy hwy_contrib sentencepiece)
+FetchContent_GetProperties(gemma)
+FetchContent_GetProperties(sentencepiece)
+target_include_directories([Executable Name] PRIVATE ${gemma_SOURCE_DIR})
+target_include_directories([Executable Name] PRIVATE ${sentencepiece_SOURCE_DIR})
+```
+### Building gemma.cpp as a Library
+gemma.cpp can also be used as a library dependency in your own project. The
+shared library artifact can be built by modifying the make invocation to build
+the `libgemma` target instead of `gemma`.
+> [!NOTE]
+> If you are using gemma.cpp in your own project with the `FetchContent` steps
+> in the previous section, building the library is done automatically by `cmake`
+> and this section can be skipped.
+First, run `cmake`:
+```sh
+cmake -B build
+```
+Then, run `make` with the `libgemma` target:
+```sh
+cd build
+make -j [number of parallel threads to use] libgemma
+```
+If this is successful, you should now have a `libgemma` library file in the
+`build/` directory. On Unix platforms, the filename is `libgemma.a`.
+## Independent Projects Using gemma.cpp
+Some independent projects using gemma.cpp:
+- [gemma-cpp-python - Python bindings](https://github.com/namtranase/gemma-cpp-python)
+- [lua-cgemma - Lua bindings](https://github.com/ufownl/lua-cgemma)
+- [Godot engine demo project](https://github.com/Rliop913/Gemma-godot-demo-project)
+If you would like to have your project included, feel free to get in touch or
+submit a PR with a `README.md` edit.
+## Acknowledgements and Contacts
+gemma.cpp was started in fall 2023 by [Austin Huang](mailto:austinvhuang@google.com)
+and [Jan Wassenberg](mailto:janwas@google.com), and subsequently released February 2024
+thanks to contributions from Phil Culliton, Paul Chang, and Dan Zheng.
+Griffin support was implemented in April 2024 thanks to contributions by Andrey
+Mikhaylov, Eugene Kliuchnikov, Jan Wassenberg, Jyrki Alakuijala, Lode
+Vandevenne, Luca Versari, Martin Bruse, Phil Culliton, Sami Boukortt, Thomas
+Fischbacher and Zoltan Szabadka.
+This is not an officially supported Google product.

gemma.cpp/WORKSPACE ADDED Viewed

	@@ -0,0 +1,4 @@

+workspace(name = "gemma")
+# This file marks the root of the Bazel workspace.
+# See MODULE.bazel for external dependencies setup.

gemma.cpp/backprop/backward-inl.h ADDED Viewed

	@@ -0,0 +1,428 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Implementation of the Vector-Jacobian Products (VJP) of the individual
+// operations of the forward pass.
+// Include guard for non-SIMD code.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_INL_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_INL_H_
+#include <stddef.h>
+#include <algorithm>
+#include <cmath>
+#include "backprop/prompt.h"
+#include "gemma/activations.h"
+#include "gemma/common.h"
+#include "hwy/base.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_INL_H_
+// Include guard for (potentially) SIMD code.
+#if defined(THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE) == defined(HWY_TARGET_TOGGLE)
+#ifdef THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE
+#undef THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE
+#else
+#define THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE
+#endif
+#include "gemma/ops.h"
+#include "hwy/highway.h"
+HWY_BEFORE_NAMESPACE();
+namespace gcpp {
+namespace HWY_NAMESPACE {
+namespace hn = hwy::HWY_NAMESPACE;
+template <size_t kCols, size_t kRows>
+void MatMulVJP(const float* HWY_RESTRICT weights,  // kRows * kCols,
+               const float* HWY_RESTRICT x,       // num_tokens * kCols
+               const float* HWY_RESTRICT v,       // num_tokens * kRows
+               size_t num_tokens,
+               float* HWY_RESTRICT grad_w,         // kRows * kCols,
+               float* HWY_RESTRICT grad_x,        // num_tokens * kCols
+               hwy::ThreadPool& pool) {
+  hwy::ZeroBytes(grad_x, num_tokens * kCols * sizeof(grad_x[0]));
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    const size_t voffs = pos * kRows;
+    const size_t xoffs = pos * kCols;
+    for (size_t j = 0; j < kRows; ++j) {
+      MulByConstAndAdd(v[voffs + j], &x[xoffs], &grad_w[j * kCols], kCols);
+      MulByConstAndAdd(v[voffs + j], &weights[j * kCols], &grad_x[xoffs],
+                       kCols);
+    }
+  }
+}
+template <size_t kHeads, size_t kCols, size_t kRows>
+void MultiHeadMatMulVJP(
+    const float* HWY_RESTRICT weights,  // kHeads * kRows * kCols
+    const float* HWY_RESTRICT x,        // num_tokens * kHeads * kCols
+    const float* HWY_RESTRICT v,        // num_tokens * kRows
+    size_t num_tokens,
+    float* HWY_RESTRICT grad_w,         // kHeads * kRows * kCols
+    float* HWY_RESTRICT grad_x,         // num_tokens * kHeads * kCols
+    hwy::ThreadPool& pool) {
+  hwy::ZeroBytes(grad_x, num_tokens * kHeads * kCols * sizeof(grad_x[0]));
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    for (size_t j = 0; j < kRows; ++j) {
+      for (size_t h = 0; h < kHeads; ++h) {
+        MulByConstAndAdd(v[pos * kRows + j],
+                         &x[pos * kHeads * kCols + h * kCols],
+                         &grad_w[h * kRows * kCols + j * kCols], kCols);
+        MulByConstAndAdd(v[pos * kRows + j],
+                         &weights[h * kRows * kCols + j * kCols],
+                         &grad_x[pos * kHeads * kCols + h * kCols], kCols);
+      }
+    }
+  }
+}
+template <class D, HWY_IF_F32_D(D)>
+static HWY_INLINE hn::Vec<D> DGelu(D d, hn::Vec<D> v) {
+  const hn::Vec<D> kMul = hn::Set(d, 0.044715f);
+  const hn::Vec<D> kSqrt2OverPi = hn::Set(d, 0.797884560804236f);
+  const hn::Vec<D> kHalf = hn::Set(d, 0.5f);
+  const hn::Vec<D> kOne = hn::Set(d, 1.0f);
+  // kSqrtOverPi*3*kMul
+  const hn::Vec<D> kMulv2 = hn::Set(d, 0.1070322244f);
+  const hn::Vec<D> v2 = hn::Mul(v, v);
+  const hn::Vec<D> v3 = hn::Mul(v2, v);
+  const hn::Vec<D> arg = hn::Mul(kSqrt2OverPi, hn::MulAdd(kMul, v3, v));
+  const hn::Vec<D> tanh = hn::Tanh(d, arg);
+  const hn::Vec<D> cdf = hn::MulAdd(kHalf, tanh, kHalf);
+  const hn::Vec<D> dtanh = hn::Sub(kOne, hn::Mul(tanh, tanh));
+  const hn::Vec<D> darg = hn::MulAdd(kMulv2, v2, kSqrt2OverPi);
+  return hn::MulAdd(kHalf, hn::Mul(v, hn::Mul(dtanh, darg)), cdf);
+}
+static HWY_NOINLINE void SoftmaxVJP(const float* HWY_RESTRICT forward,
+                                    float* HWY_RESTRICT backward,
+                                    const size_t size) {
+  namespace hn = hwy::HWY_NAMESPACE;
+  using D = hn::ScalableTag<float>;
+  const D d;
+  const auto offset =
+      hn::Set(d, hn::Dot::Compute<0>(d, forward, backward, size));
+  hn::Transform1(
+      d, backward, size, forward,
+      [&offset](const auto d, const auto v, const auto y)
+      HWY_ATTR { return hn::Mul(y, hn::Sub(v, offset)); });
+}
+static HWY_NOINLINE void RMSNormVJP(
+    const float* HWY_RESTRICT weights, const float* HWY_RESTRICT x,
+    const float* HWY_RESTRICT v, size_t model_dim, size_t num_tokens,
+    float* HWY_RESTRICT grad_w, float* HWY_RESTRICT grad_x,
+    hwy::ThreadPool& pool) {
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    const size_t offset = pos * model_dim;
+    constexpr float eps = 1e-6f;
+    float ss = SquaredL2(x + offset, model_dim);
+    ss = 1.0f / sqrtf(ss / StaticCast<float>(model_dim) + eps);
+    for (size_t i = 0; i < model_dim; ++i) {
+      grad_w[i] += v[offset + i] * x[offset + i] * ss;
+    }
+    const float ss3 = ss * ss * ss / StaticCast<float>(model_dim);
+    float tmp = 0.0f;
+    for (size_t i = 0; i < model_dim; ++i) {
+      tmp += (1.0f + weights[i]) * v[offset + i] * x[offset + i];
+    }
+    tmp *= ss3;
+    for (size_t i = 0; i < model_dim; ++i) {
+      grad_x[offset + i] = ss * (1.0f + weights[i]) * v[offset + i] -
+                           tmp * x[offset + i];
+    }
+  }
+}
+static HWY_NOINLINE void InputEmbeddingVJP(
+    const float* weights, const std::vector<int>& prompt,
+    const float scaling, const float* HWY_RESTRICT v,
+    float* HWY_RESTRICT grad, size_t model_dim) {
+  HWY_ASSERT(!prompt.empty());
+  for (size_t pos = 0; pos < prompt.size() - 1; ++pos) {
+    int token = prompt[pos];
+    MulByConstAndAdd(scaling, v + pos * model_dim,
+                     grad + token * model_dim, model_dim);
+  }
+}
+template <typename TConfig, template<typename> typename LayerT>
+void LayerVJP(const LayerT<TConfig>& weights,
+              const ForwardLayer<float, TConfig>& forward,
+              const float* HWY_RESTRICT next_layer_grad,
+              size_t num_tokens,
+              LayerT<TConfig>& grad,
+              ForwardLayer<float, TConfig>& backward,
+              hwy::ThreadPool& pool) {
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kQKVDim = TConfig::kQKVDim;
+  static constexpr size_t kHeads = TConfig::kHeads;
+  static constexpr size_t kSeqLen = TConfig::kSeqLen;
+  static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
+  static const float kQueryScale =
+      static_cast<float>(1.0 / sqrt(static_cast<double>(kQKVDim)));
+  HWY_ASSERT(num_tokens <= kSeqLen);
+  MatMulVJP<kFFHiddenDim, kModelDim>(
+      weights.linear_w.data(), forward.ffw_hidden_gated.data(), next_layer_grad,
+      num_tokens, grad.linear_w.data(), backward.ffw_hidden_gated.data(),
+      pool);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    const size_t hidden_offset = pos * kFFHiddenDim * 2;
+    const float* HWY_RESTRICT f_out = forward.ffw_hidden.data() + hidden_offset;
+    const float* HWY_RESTRICT f_out_mul = f_out + kFFHiddenDim;
+    const float* HWY_RESTRICT b_out_gated =
+        backward.ffw_hidden_gated.data() + pos * kFFHiddenDim;
+    float* HWY_RESTRICT b_out = backward.ffw_hidden.data() + hidden_offset;
+    float* HWY_RESTRICT b_out_mul = b_out + kFFHiddenDim;
+    namespace hn = hwy::HWY_NAMESPACE;
+    using DF = hn::ScalableTag<float>;
+    using VF = hn::Vec<DF>;
+    DF df;
+    for (size_t i = 0; i < kFFHiddenDim; i += Lanes(df)) {
+      const auto y = Load(df, f_out + i);
+      const auto x = Load(df, f_out_mul + i);
+      const auto v = Load(df, b_out_gated + i);
+      hn::Store(hn::Mul(v, Gelu(df, y)), df, b_out_mul + i);
+      hn::Store(hn::Mul(v, hn::Mul(x, DGelu(df, y))), df, b_out + i);
+    }
+  }
+  MatMulVJP<kModelDim, kFFHiddenDim * 2>(
+      weights.gating_einsum_w.data(),
+      forward.bf_pre_ffw_rms_out.data(), backward.ffw_hidden.data(),
+      num_tokens, grad.gating_einsum_w.data(),
+      backward.bf_pre_ffw_rms_out.data(), pool);
+  RMSNormVJP(weights.pre_ffw_norm_scale.data(),
+             forward.attention_out.data(),
+             backward.bf_pre_ffw_rms_out.data(),
+             kModelDim, num_tokens,
+             grad.pre_ffw_norm_scale.data(),
+             backward.attention_out.data(), pool);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    AddFrom(next_layer_grad + pos * kModelDim,
+            backward.attention_out.data() + pos * kModelDim, kModelDim);
+  }
+  hwy::ZeroBytes(backward.qkv.data(),
+                 num_tokens * (kHeads + 2) * kQKVDim * sizeof(backward.qkv[0]));
+  MultiHeadMatMulVJP<kHeads, kQKVDim, kModelDim>(
+      weights.attn_vec_einsum_w.data(), forward.att_out.data(),
+      backward.attention_out.data(), num_tokens,
+      grad.attn_vec_einsum_w.data(), backward.att_out.data(), pool);
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      const size_t aoffset = head * kSeqLen + pos * kHeads * kSeqLen;
+      const float* HWY_RESTRICT f_head_att = forward.att.data() + aoffset;
+      const float* HWY_RESTRICT b_att_out =
+          backward.att_out.data() + (pos * kHeads + head) * kQKVDim;
+      float* HWY_RESTRICT b_head_att = backward.att.data() + aoffset;
+      for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+        const size_t v2offs = (pos2 * (kHeads + 2) + kHeads + 1) * kQKVDim;
+        const float* HWY_RESTRICT f_v2 = forward.qkv.data() + v2offs;
+        float* HWY_RESTRICT b_v2 = backward.qkv.data() + v2offs;
+        b_head_att[pos2] = Dot(b_att_out, f_v2, kQKVDim);
+        MulByConstAndAdd(f_head_att[pos2], b_att_out, b_v2, kQKVDim);
+      }
+    }
+  }
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      const size_t aoffset = head * kSeqLen + pos * kHeads * kSeqLen;
+      const float* HWY_RESTRICT f_head_att = forward.att.data() + aoffset;
+      float* HWY_RESTRICT b_head_att = backward.att.data() + aoffset;
+      SoftmaxVJP(f_head_att, b_head_att, pos + 1);
+    }
+  }
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      const size_t qoffs = (pos * (kHeads + 2) + head) * kQKVDim;
+      const size_t aoffs = head * kSeqLen + pos * kHeads * kSeqLen;
+      const float* HWY_RESTRICT f_q = forward.qkv.data() + qoffs;
+      const float* HWY_RESTRICT b_head_att = backward.att.data() + aoffs;
+      float* HWY_RESTRICT b_q = backward.qkv.data() + qoffs;
+      for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+        const size_t k2offs = (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
+        const float* HWY_RESTRICT f_k2 = forward.qkv.data() + k2offs;
+        float* HWY_RESTRICT b_k2 = backward.qkv.data() + k2offs;
+        MulByConstAndAdd(b_head_att[pos2], f_k2, b_q, kQKVDim);
+        MulByConstAndAdd(b_head_att[pos2], f_q, b_k2, kQKVDim);
+      }
+    }
+  }
+  for (int pos = 0; pos < static_cast<int>(num_tokens); ++pos) {
+    float* HWY_RESTRICT b_kv =
+        backward.qkv.data() + (pos * (kHeads + 2) + kHeads) * kQKVDim;
+    Rope(b_kv, kQKVDim, -pos);
+  }
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      float* HWY_RESTRICT b_q =
+          backward.qkv.data() + (pos * (kHeads + 2) + head) * kQKVDim;
+      MulByConst(kQueryScale, b_q, kQKVDim);
+      Rope(b_q, kQKVDim, -pos);
+    }
+  }
+  MatMulVJP<kModelDim, (kHeads + 2) * kQKVDim>(
+      weights.qkv_einsum_w.data(), forward.pre_att_rms_out.data(),
+      backward.qkv.data(), num_tokens,
+      grad.qkv_einsum_w.data(), backward.pre_att_rms_out.data(), pool);
+  RMSNormVJP(weights.pre_attention_norm_scale.data(),
+             forward.input.data(),
+             backward.pre_att_rms_out.data(),
+             kModelDim, num_tokens,
+             grad.pre_attention_norm_scale.data(),
+             backward.input.data(), pool);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    AddFrom(backward.attention_out.data() + pos * kModelDim,
+            backward.input.data() + pos * kModelDim, kModelDim);
+  }
+}
+static HWY_NOINLINE void SoftcapVJP(const float* HWY_RESTRICT forward,
+                                    float* HWY_RESTRICT backward,
+                                    const float cap,
+                                    const size_t size) {
+  namespace hn = hwy::HWY_NAMESPACE;
+  using D = hn::ScalableTag<float>;
+  const D d;
+  const auto one = hn::Set(d, 1.0f);
+  const auto vcap = hn::Set(d, cap);
+  const auto vinv_cap = hn::Div(hn::Set(d, 1.0f), vcap);
+  // TODO(szabadka): Investigate what to do when the argmax is not unique.
+  // TODO(szabadka): Use IndexOfMax from hwy when it is available.
+  size_t imax = std::max_element(forward, forward + size) - forward;
+  hn::Transform1(
+      d, backward, size, forward,
+      [&](const auto d, const auto v, const auto y) HWY_ATTR {
+        const auto scaled = hn::Mul(vinv_cap, y);
+        return hn::Mul(v, hn::Sub(one, hn::Mul(scaled, scaled)));
+      });
+  backward[imax] = 0;
+  auto sum = hn::Zero(d);
+  Foreach(d, backward, size, sum,
+          [&sum](const auto d, const auto value) HWY_ATTR {
+            sum = hn::Add(sum, value);
+          });
+  backward[imax] = -hn::ReduceSum(d, sum);
+}
+static HWY_NOINLINE void CrossEntropyLossGrad(
+    const float* HWY_RESTRICT x, float* HWY_RESTRICT grad,
+    const Prompt& prompt, size_t vocab_size) {
+  HWY_ASSERT(!prompt.tokens.empty());
+  const float scaling = -1.0 / std::log(2.0);
+  size_t num_tokens = prompt.tokens.size() - 1;
+  hwy::ZeroBytes(grad, num_tokens * vocab_size * sizeof(grad[0]));
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    if (pos + 1 < prompt.context_size) {
+      continue;
+    }
+    const int next_token = prompt.tokens[pos + 1];
+    grad[pos * vocab_size + next_token] =
+        scaling / x[pos * vocab_size + next_token];
+  }
+}
+template <typename TConfig, template<typename...> typename WeightsT,
+          template<typename> typename LayerT>
+void CrossEntropyLossBackwardPass(const Prompt& prompt,
+                                  const WeightsT<TConfig>& weights,
+                                  const ForwardPass<float, TConfig>& forward,
+                                  WeightsT<TConfig>& grad,
+                                  ForwardPass<float, TConfig>& backward,
+                                  hwy::ThreadPool& pool) {
+  static constexpr size_t kVocabSize = TConfig::kVocabSize;
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kLayers = TConfig::kLayers;
+  const float kEmbScaling = EmbeddingScaling<TConfig>();
+  static_assert(!TConfig::kAbsolutePE);
+  static_assert(!TConfig::kPostNormScale);
+  static_assert(TConfig::kKVHeads == 1);
+  HWY_DASSERT(prompt.context_size > 0);
+  HWY_DASSERT(prompt.context_size < prompt.tokens.size());
+  const size_t num_tokens = prompt.tokens.size() - 1;
+  CrossEntropyLossGrad(forward.probs.data(), backward.logits.data(), prompt,
+                       kVocabSize);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    SoftmaxVJP(forward.probs.data() + pos * kVocabSize,
+               backward.logits.data() + pos * kVocabSize,
+               kVocabSize);
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    SoftcapVJP(forward.logits.data() + pos * kVocabSize,
+               backward.logits.data() + pos * kVocabSize, 30.0f, kVocabSize);
+  }
+  MatMulVJP<kModelDim, kVocabSize>(
+      weights.embedder_input_embedding.data(), forward.final_norm_output.data(),
+      backward.logits.data(), num_tokens,
+      grad.embedder_input_embedding.data(), backward.final_norm_output.data(),
+      pool);
+  RMSNormVJP(weights.final_norm_scale.data(),
+             forward.final_layer_output.data(),
+             backward.final_norm_output.data(),
+             kModelDim, num_tokens,
+             grad.final_norm_scale.data(),
+             backward.final_layer_output.data(), pool);
+  for (int layer = static_cast<int>(kLayers) - 1; layer >= 0; --layer) {
+    auto type = TConfig::kLayerConfig[layer];
+    // TODO(szabadka) Implement Griffin layer vjp.
+    HWY_ASSERT(type == LayerAttentionType::kGemma);
+    float* next_layer_grad = layer + 1 < kLayers
+                             ? backward.layers[layer + 1].input.data()
+                             : backward.final_layer_output.data();
+    LayerVJP<TConfig, LayerT>(
+        *weights.GetLayer(layer), forward.layers[layer], next_layer_grad,
+        num_tokens, *grad.GetLayer(layer), backward.layers[layer], pool);
+  }
+  InputEmbeddingVJP(weights.embedder_input_embedding.data(), prompt.tokens,
+                    kEmbScaling, backward.layers[0].input.data(),
+                    grad.embedder_input_embedding.data(), kModelDim);
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace gcpp
+HWY_AFTER_NAMESPACE();
+#endif  // NOLINT

gemma.cpp/backprop/backward.cc ADDED Viewed

	@@ -0,0 +1,95 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "backprop/backward.h"
+#include "backprop/prompt.h"
+#include "gemma/activations.h"
+#include "gemma/common.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+// Compiles this file for multiple architectures via "foreach_target.h", to
+// which we pass the filename via macro 'argument'.
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "backprop/backward.cc"  // NOLINT
+#include "hwy/foreach_target.h"        // IWYU pragma: keep
+#include "hwy/highway.h"
+// After highway.h
+#include "backprop/backward-inl.h"
+#include "gemma/weights.h"
+HWY_BEFORE_NAMESPACE();
+namespace gcpp {
+namespace HWY_NAMESPACE {
+template <typename TConfig>
+void CrossEntropyLossBackwardPass(const Prompt& prompt,
+                                  const ByteStorageT& weights_u8,
+                                  const ByteStorageT& forward_u8,
+                                  ByteStorageT& grad_u8,
+                                  ByteStorageT& backward_u8,
+                                  hwy::ThreadPool& pool) {
+  using TWeights = CompressedWeights<TConfig>;
+  const auto& weights = *reinterpret_cast<const TWeights*>(weights_u8.get());
+  auto& grad = *reinterpret_cast<TWeights*>(grad_u8.get());
+  using TAct = ForwardPass<float, TConfig>;
+  const auto& forward = *reinterpret_cast<const TAct*>(forward_u8.get());
+  auto& backward = *reinterpret_cast<TAct*>(backward_u8.get());
+  CrossEntropyLossBackwardPass<TConfig, CompressedWeights, CompressedLayer>(
+      prompt, weights, forward, grad, backward, pool);
+}
+void CrossEntropyLossBackwardPassT(Model model,
+                                   const Prompt& prompt,
+                                   const ByteStorageT& weights,
+                                   const ByteStorageT& forward,
+                                   ByteStorageT& grad,
+                                   ByteStorageT& backward,
+                                   hwy::ThreadPool& pool) {
+  // TODO(janwas): use CallFunctorForModel
+  switch (model) {
+    case Model::GEMMA_2B:
+      CrossEntropyLossBackwardPass<ConfigGemma2B<float>>(
+          prompt, weights, forward, grad, backward, pool);
+      break;
+    case Model::GEMMA_TINY:
+      CrossEntropyLossBackwardPass<ConfigGemmaTiny<float>>(
+          prompt, weights, forward, grad, backward, pool);
+      break;
+    default:
+      HWY_ABORT("Model type %d unknown.", static_cast<int>(model));
+  }
+}
+}  // namespace HWY_NAMESPACE
+}  // namespace gcpp
+HWY_AFTER_NAMESPACE();
+#if HWY_ONCE
+namespace gcpp {
+HWY_EXPORT(CrossEntropyLossBackwardPassT);
+void CrossEntropyLossBackwardPass(
+    const Model& model, const Prompt& prompt,
+    const ByteStorageT& weights, const ByteStorageT& forward,
+    ByteStorageT& grad, ByteStorageT& backward, hwy::ThreadPool& pool) {
+  return HWY_DYNAMIC_DISPATCH(CrossEntropyLossBackwardPassT)(
+      model, prompt, weights, forward, grad, backward, pool);
+}
+}  // namespace gcpp
+#endif  // HWY_ONCE

gemma.cpp/backprop/backward.h ADDED Viewed

	@@ -0,0 +1,32 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_H_
+#include "backprop/prompt.h"
+#include "gemma/common.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+namespace gcpp {
+void CrossEntropyLossBackwardPass(
+    const Model& model, const Prompt& prompt,
+    const ByteStorageT& weights, const ByteStorageT& forward,
+    ByteStorageT& grad, ByteStorageT& backward, hwy::ThreadPool& pool);
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_H_

gemma.cpp/backprop/backward_scalar.h ADDED Viewed

	@@ -0,0 +1,362 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_SCALAR_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_SCALAR_H_
+#include <stddef.h>
+#include <string.h>
+#include <cmath>
+#include <vector>
+#include "backprop/common_scalar.h"
+#include "backprop/prompt.h"
+#include "gemma/activations.h"
+#include "gemma/common.h"  // EmbeddingScaling
+#include "gemma/weights_raw.h"
+namespace gcpp {
+template<typename T>
+void MatMulVJPT(const T* w, const T* x, const T* dy, T* dw, T* dx,
+                size_t N, size_t M, size_t K) {
+  memset(dx, 0, M * K * sizeof(dx[0]));
+  for (size_t i = 0; i < K; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      MulByConstAndAddT(dy[i * N + j], &x[i * M], &dw[j * M], M);
+      MulByConstAndAddT(dy[i * N + j], &w[j * M], &dx[i * M], M);
+    }
+  }
+}
+template<typename T>
+void MultiHeadMatMulVJPT(const T* w, const T* x, const T* dy, T* dw, T* dx,
+                         size_t H, size_t N, size_t M, size_t K) {
+  memset(dx, 0, H * M * K * sizeof(dx[0]));
+  for (size_t i = 0; i < K; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      for (size_t h = 0; h < H; ++h) {
+        MulByConstAndAddT(dy[i * N + j], &x[i * H * M + h * M],
+                          &dw[h * N * M + j * M], M);
+        MulByConstAndAddT(dy[i * N + j], &w[h * N * M + j * M],
+                          &dx[i * H * M + h * M], M);
+      }
+    }
+  }
+}
+template<typename T>
+void RMSNormVJPT(const T* w, const T* x, const T* dy, T* dw, T* dx,
+                 size_t N, size_t K) {
+  for (size_t i = 0; i < K; ++i) {
+    constexpr T eps(1e-6);
+    T ss = SquaredL2(x + i * N, N);
+    ss = T(1.0) / std::sqrt(ss / T(N) + eps);
+    for (size_t j = 0; j < N; ++j) {
+      dw[j] += dy[i * N + j] * x[i * N + j] * ss;
+    }
+    const T ss3 = ss * ss * ss / T(N);
+    T tmp = 0.0;
+    for (size_t j = 0; j < N; ++j) {
+      tmp += (T(1.0) + w[j]) * dy[i* N + j] * x[i * N + j];
+    }
+    tmp *= ss3;
+    for (size_t j = 0; j < N; ++j) {
+      dx[i * N + j] = ss * (T(1.0) + w[j]) * dy[i* N + j] - tmp * x[i * N + j];
+    }
+  }
+}
+template<typename T>
+void SoftmaxVJPT(const T* y, T* dy, size_t N) {
+  T sum = {};
+  for (size_t i = 0; i < N; ++i) {
+    sum += y[i] * dy[i];
+  }
+  for (size_t i = 0; i < N; ++i) {
+    dy[i] = y[i] * (dy[i] - sum);
+  }
+}
+template<typename T>
+void SoftmaxVJPT(const T* y, T* dy, size_t N, size_t K) {
+  for (size_t i = 0; i < K; ++i) {
+    SoftmaxVJPT(y + i * N, dy + i * N, N);
+  }
+}
+template<typename T>
+T GeluDerivative(T x) {
+  static const T kMul = 0.044715;
+  static const T kSqrt2OverPi = 0.797884560804236;
+  static const T kMul2 = kSqrt2OverPi * T(3.0) * kMul;
+  const T x2 = x * x;
+  const T x3 = x2 * x;
+  const T arg = kSqrt2OverPi * (kMul * x3 + x);
+  const T tanh = std::tanh(arg);
+  const T cdf = T(0.5) * (T(1.0) + tanh);
+  const T dtanh = T(1.0) - tanh * tanh;
+  const T darg = kMul2 * x2 + kSqrt2OverPi;
+  return T(0.5) * x * dtanh * darg + cdf;
+}
+template<typename T>
+void GatedGeluVJP(const T* in, const T* d_out, T* d_in, size_t N, size_t K) {
+  for (size_t i = 0; i < K; ++i) {
+    const T* x1 = in + i * 2 * N;
+    const T* x2 = x1 + N;
+    const T* v = d_out + i * N;
+    T* dx1 = d_in + i * 2 * N;
+    T* dx2 = dx1 + N;
+    for (size_t j = 0; j < N; ++j) {
+      dx1[j] = v[j] * x2[j] * GeluDerivative(x1[j]);
+      dx2[j] = v[j] * Gelu(x1[j]);
+    }
+  }
+}
+template<typename T>
+void MaskedAttentionVJP(const T* qkv, const T* doutput, T* dqkv,
+                        size_t num_tokens, size_t kHeads, size_t kQKVDim,
+                        size_t kSeqLen) {
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    const size_t offset = pos * (kHeads + 2) * kQKVDim;
+    memset(dqkv + offset, 0, (kHeads + 1) * kQKVDim * sizeof(qkv[0]));
+  }
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      const size_t qoffs = (pos * (kHeads + 2) + head) * kQKVDim;
+      const size_t aoffs = head * kSeqLen + pos * kHeads * kSeqLen;
+      const T* q = qkv + qoffs;
+      const T* dout = doutput + aoffs;
+      T* dq = dqkv + qoffs;
+      for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+        const size_t koffs = (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
+        const T* k = qkv + koffs;
+        T* dk = dqkv + koffs;
+        MulByConstAndAddT(dout[pos2], k, dq, kQKVDim);
+        MulByConstAndAddT(dout[pos2], q, dk, kQKVDim);
+      }
+    }
+  }
+}
+template<typename T>
+void MaskedSoftmaxVJPT(const T* y, T* dy, size_t num_tokens,
+                       size_t kHeads, size_t kSeqLen) {
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      size_t offset = pos * kHeads * kSeqLen + head * kSeqLen;
+      SoftmaxVJPT(y + offset, dy + offset, pos + 1);
+      memset(dy + offset + pos + 1, 0, (kSeqLen - pos - 1) * sizeof(T));
+    }
+  }
+}
+template<typename T>
+void MixByAttentionVJP(const T* qkv, const T* attention, const T* doutput,
+                       T* dqkv, T* dattention, size_t num_tokens,
+                       size_t kHeads, size_t kQKVDim, size_t kSeqLen) {
+  auto v_offset = [&](size_t pos) {
+    return (pos * (kHeads + 2) + kHeads + 1) * kQKVDim;
+  };
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    memset(&dqkv[v_offset(pos)], 0, kQKVDim * sizeof(qkv[0]));
+  }
+  for (size_t head = 0; head < kHeads; ++head) {
+    for (size_t pos = 0; pos < num_tokens; ++pos) {
+      const size_t offset = head * kQKVDim + pos * kHeads * kQKVDim;
+      const size_t aoffset = head * kSeqLen + pos * kHeads * kSeqLen;
+      const T* att = &attention[aoffset];
+      const T* dout = &doutput[offset];
+      T* datt = &dattention[aoffset];
+      for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+        datt[pos2] = DotT(dout, &qkv[v_offset(pos2)], kQKVDim);
+        MulByConstAndAddT(att[pos2], dout, &dqkv[v_offset(pos2)], kQKVDim);
+      }
+    }
+  }
+}
+template<typename T>
+void InputEmbeddingVJPT(const T* w, const std::vector<int>& tokens, T scaling,
+                        const T* dy, T* dw, size_t N) {
+  const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
+  for (size_t i = 0; i < num_tokens; ++i) {
+    int token = tokens[i];
+    MulByConstAndAddT(scaling, dy + i * N, dw + token * N, N);
+  }
+}
+template<typename T, typename TConfig>
+void LayerVJP(const Layer<T, TConfig>& weights,
+              const ForwardLayer<T, TConfig>& forward,
+              const T* dy,
+              Layer<T, TConfig>& grad,
+              ForwardLayer<T, TConfig>& backward,
+              size_t num_tokens) {
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kSeqLen = TConfig::kSeqLen;
+  static constexpr size_t kQKVDim = TConfig::kQKVDim;
+  static constexpr size_t kHeads = TConfig::kHeads;
+  static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
+  static const T kQueryScale = 1.0 / std::sqrt(T(kQKVDim));
+  MatMulVJPT(weights.linear_w.data(), forward.ffw_hidden_gated.data(),
+             dy, grad.linear_w.data(), backward.ffw_hidden_gated.data(),
+             kModelDim, kFFHiddenDim, num_tokens);
+  GatedGeluVJP(forward.ffw_hidden.data(), backward.ffw_hidden_gated.data(),
+               backward.ffw_hidden.data(), kFFHiddenDim, num_tokens);
+  MatMulVJPT(weights.gating_einsum_w.data(), forward.bf_pre_ffw_rms_out.data(),
+             backward.ffw_hidden.data(), grad.gating_einsum_w.data(),
+             backward.bf_pre_ffw_rms_out.data(), kFFHiddenDim * 2, kModelDim,
+             num_tokens);
+  RMSNormVJPT(weights.pre_ffw_norm_scale.data(), forward.attention_out.data(),
+              backward.bf_pre_ffw_rms_out.data(),
+              grad.pre_ffw_norm_scale.data(), backward.attention_out.data(),
+              kModelDim, num_tokens);
+  AddFromT(dy, backward.attention_out.data(), num_tokens * kModelDim);
+  MultiHeadMatMulVJPT(weights.attn_vec_einsum_w.data(), forward.att_out.data(),
+                      backward.attention_out.data(),
+                      grad.attn_vec_einsum_w.data(),
+                      backward.att_out.data(),
+                      kHeads, kModelDim, kQKVDim, num_tokens);
+  MixByAttentionVJP(forward.qkv.data(), forward.att.data(),
+                    backward.att_out.data(), backward.qkv.data(),
+                    backward.att.data(), num_tokens, kHeads, kQKVDim,
+                    kSeqLen);
+  MaskedSoftmaxVJPT(forward.att.data(), backward.att.data(),
+                    num_tokens, kHeads, kSeqLen);
+  MaskedAttentionVJP(forward.qkv.data(), backward.att.data(),
+                     backward.qkv.data(), num_tokens, kHeads, kQKVDim, kSeqLen);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    T* qkv = backward.qkv.data() + pos * (kHeads + 2) * kQKVDim;
+    MulByConstT(kQueryScale, qkv, kHeads * kQKVDim);
+  }
+  for (int pos = 0; pos < num_tokens; ++pos) {
+    T* qkv = backward.qkv.data() + pos * (kHeads + 2) * kQKVDim;
+    for (size_t h = 0; h <= kHeads; ++h) {
+      Rope(qkv + h * kQKVDim, kQKVDim, -pos);
+    }
+  }
+  MatMulVJPT(weights.qkv_einsum_w.data(), forward.pre_att_rms_out.data(),
+             backward.qkv.data(), grad.qkv_einsum_w.data(),
+            backward.pre_att_rms_out.data(),
+            (kHeads + 2) * kQKVDim, kModelDim, num_tokens);
+  RMSNormVJPT(weights.pre_attention_norm_scale.data(), forward.input.data(),
+              backward.pre_att_rms_out.data(),
+              grad.pre_attention_norm_scale.data(),
+              backward.input.data(), kModelDim, num_tokens);
+  AddFromT(backward.attention_out.data(), backward.input.data(),
+           num_tokens * kModelDim);
+}
+template<typename T>
+void SoftcapVJPT(const T* y, T* dy, size_t N) {
+  size_t imax = std::max_element(y, y + N) - y;
+  T cap = 30.0;
+  T inv_cap = T(1.0) / cap;
+  for (size_t i = 0; i < N; ++i) {
+    T scaled = y[i] * inv_cap;
+    dy[i] *= (T(1.0) - scaled * scaled);
+  }
+  dy[imax] = T(0.0);
+  for (size_t i = 0; i < N; ++i) {
+    if (i != imax) {
+      dy[imax] -= dy[i];
+    }
+  }
+}
+template<typename T>
+void CrossEntropyLossGrad(const T* x, T* dx, const Prompt& prompt, size_t V) {
+  T scaling = -1.0 / std::log(2.0);
+  const std::vector<int> tokens = prompt.tokens;
+  const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
+  memset(dx, 0, V * num_tokens * sizeof(x[0]));
+  for (size_t i = 0; i < num_tokens; ++i) {
+    if (i + 1 < prompt.context_size) {
+      continue;
+    }
+    const int next_token = tokens[i + 1];
+    dx[i * V + next_token] = scaling / x[i * V + next_token];
+  }
+}
+template<typename T, typename TConfig>
+void CrossEntropyLossBackwardPass(const Prompt& prompt,
+                                  const Weights<T, TConfig>& weights,
+                                  const ForwardPass<T, TConfig>& forward,
+                                  Weights<T, TConfig>& grad,
+                                  ForwardPass<T, TConfig>& backward) {
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kVocabSize = TConfig::kVocabSize;
+  static constexpr size_t kLayers = TConfig::kLayers;
+  const std::vector<int> tokens = prompt.tokens;
+  const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
+  CrossEntropyLossGrad(forward.probs.data(), backward.logits.data(), prompt,
+                       kVocabSize);
+  SoftmaxVJPT(forward.probs.data(), backward.logits.data(),
+              kVocabSize, num_tokens);
+  for (size_t i = 0; i < num_tokens; ++i) {
+    SoftcapVJPT(forward.logits.data() + i * kVocabSize,
+                backward.logits.data() + i * kVocabSize,
+                kVocabSize);
+  }
+  MatMulVJPT(weights.embedder_input_embedding.data(),
+             forward.final_norm_output.data(),
+             backward.logits.data(),
+             grad.embedder_input_embedding.data(),
+             backward.final_norm_output.data(),
+             kVocabSize, kModelDim, num_tokens);
+  RMSNormVJPT(weights.final_norm_scale.data(),
+              forward.final_layer_output.data(),
+              backward.final_norm_output.data(),
+              grad.final_norm_scale.data(),
+              backward.final_layer_output.data(), kModelDim, num_tokens);
+  for (int layer = static_cast<int>(kLayers) - 1; layer >= 0; --layer) {
+    T* next_layer_grad = layer + 1 < kLayers
+                         ? backward.layers[layer + 1].input.data()
+                         : backward.final_layer_output.data();
+    LayerVJP(*weights.GetLayer(layer), forward.layers[layer], next_layer_grad,
+             *grad.GetLayer(layer), backward.layers[layer], num_tokens);
+  }
+  const T kEmbScaling = EmbeddingScaling(kModelDim);
+  InputEmbeddingVJPT(weights.embedder_input_embedding.data(),
+                     tokens, kEmbScaling, backward.layers[0].input.data(),
+                     grad.embedder_input_embedding.data(), kModelDim);
+}
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_SCALAR_H_

gemma.cpp/backprop/backward_scalar_test.cc ADDED Viewed

	@@ -0,0 +1,614 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "backprop/backward_scalar.h"
+#include <stddef.h>
+#include <string.h>  // memset
+#include <array>
+#include <complex>
+#include <random>
+#include "gtest/gtest.h"
+#include "backprop/forward_scalar.h"
+#include "backprop/sampler.h"
+#include "backprop/test_util.h"
+#include "gemma/weights_raw.h"
+namespace gcpp {
+TEST(BackPropTest, MatMulVJP) {
+  static const size_t kRows = 8;
+  static const size_t kCols = 64;
+  static const size_t kTokens = 5;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, kRows * kCols> weights;
+  std::array<T, kTokens * kCols> x;
+  std::array<T, kRows * kCols> grad;
+  std::array<T, kTokens * kCols> dx;
+  std::array<TC, kRows * kCols> c_weights;
+  std::array<TC, kTokens * kCols> c_x;
+  std::array<TC, kTokens * kRows> c_y;
+  std::array<T, kTokens * kRows> dy;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0 * (1 << iter), gen);
+    RandInit(x, 1.0 * (1 << iter), gen);
+    RandInit(dy, 1.0, gen);
+    Complexify(weights, c_weights);
+    Complexify(x, c_x);
+    auto func = [&]() {
+      MatMulT(c_weights.data(), c_x.data(), c_y.data(), kRows, kCols, kTokens);
+      return DotT(dy.data(), c_y.data(), kTokens * kRows);
+    };
+    memset(&grad, 0, sizeof(grad));
+    MatMulVJPT(weights.data(), x.data(), dy.data(), grad.data(), dx.data(),
+               kRows, kCols, kTokens);
+    TestGradient(dx, c_x, func, 1e-11, 1e-12, __LINE__);
+    TestGradient(grad, c_weights, func, 1e-14, 1e-12, __LINE__);
+  }
+}
+TEST(BackPropTest, MultiHeadMatMulVJP) {
+  static const size_t kRows = 2;
+  static const size_t kCols = 16;
+  static const size_t kHeads = 4;
+  static const size_t kTokens = 3;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, kRows * kCols * kHeads> weights;
+  std::array<T, kTokens * kCols * kHeads> x;
+  std::array<T, kRows * kCols * kHeads> grad;
+  std::array<T, kTokens * kCols * kHeads> dx;
+  std::array<TC, kRows * kCols * kHeads> c_weights;
+  std::array<TC, kTokens * kCols * kHeads> c_x;
+  std::array<TC, kTokens * kRows> c_y;
+  std::array<T, kTokens * kRows> dy;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0 * (1 << iter), gen);
+    RandInit(x, 1.0 * (1 << iter), gen);
+    RandInit(dy, 1.0, gen);
+    Complexify(weights, c_weights);
+    Complexify(x, c_x);
+    auto func = [&]() {
+      MultiHeadMatMul(c_weights.data(), c_x.data(), c_y.data(), kHeads, kRows,
+                      kCols, kTokens);
+      return DotT(dy.data(), c_y.data(), kTokens * kRows);
+    };
+    memset(&grad, 0, sizeof(grad));
+    MultiHeadMatMulVJPT(weights.data(), x.data(), dy.data(), grad.data(),
+                        dx.data(), kHeads, kRows, kCols, kTokens);
+    TestGradient(dx, c_x, func, 1e-15, 1e-13, __LINE__);
+    TestGradient(grad, c_weights, func, 1e-15, 1e-13, __LINE__);
+  }
+}
+TEST(BackPropTest, RMSNormVJP) {
+  static const size_t K = 2;
+  static const size_t N = 64;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, N> weights;
+  std::array<T, N> grad;
+  std::array<T, K * N> x;
+  std::array<T, K * N> dx;
+  std::array<T, K * N> dy;
+  std::array<TC, N> c_weights;
+  std::array<TC, K * N> c_x;
+  std::array<TC, K * N> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0 * (1 << iter), gen);
+    RandInit(x, 1.0 * (1 << iter), gen);
+    Complexify(weights, c_weights);
+    Complexify(x, c_x);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      RMSNormT(c_weights.data(), c_x.data(), c_y.data(), N, K);
+      return DotT(dy.data(), c_y.data(), K * N);
+    };
+    memset(&grad, 0, sizeof(grad));
+    RMSNormVJPT(weights.data(), x.data(), dy.data(), grad.data(), dx.data(),
+                N, K);
+    TestGradient(dx, c_x, func, 1e-15, 1e-14, __LINE__);
+    TestGradient(grad, c_weights, func, 1e-15, 1e-14, __LINE__);
+  }
+}
+TEST(BackPropTest, SoftmaxVJP) {
+  static const size_t N = 64;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, N> x;
+  std::array<T, N> dx;
+  std::array<T, N> dy;
+  std::array<TC, N> c_x;
+  std::array<TC, N> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(x, 1.0 * (1 << iter), gen);
+    Complexify(x, c_x);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      memcpy(c_y.data(), c_x.data(), sizeof(c_x));
+      Softmax(c_y.data(), N);
+      return DotT(dy.data(), c_y.data(), N);
+    };
+    Softmax(x.data(), N);
+    memcpy(dx.data(), dy.data(), N * sizeof(dx[0]));
+    SoftmaxVJPT(x.data(), dx.data(), N);
+    TestGradient(dx, c_x, func, 1e-15, 1e-15, __LINE__);
+  }
+}
+TEST(BackPropTest, MaskedSoftmaxVJP) {
+  static const size_t kSeqLen = 16;
+  static const size_t kHeads = 2;
+  static const size_t kTokens = 14;
+  static const size_t N = kHeads * kSeqLen * kSeqLen;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, N> x;
+  std::array<T, N> dy;
+  std::array<T, N> dx = {};
+  std::array<TC, N> c_x;
+  std::array<TC, N> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(x, 1.0 * (1 << iter), gen);
+    Complexify(x, c_x);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      memcpy(c_y.data(), c_x.data(),
+             kTokens * kHeads * kSeqLen * sizeof(c_x[0]));
+      MaskedSoftmax(c_y.data(), kTokens, kHeads, kSeqLen);
+      return DotT(dy.data(), c_y.data(), N);
+    };
+    MaskedSoftmax(x.data(), kTokens, kHeads, kSeqLen);
+    memcpy(dx.data(), dy.data(), kTokens * kHeads * kSeqLen * sizeof(dx[0]));
+    MaskedSoftmaxVJPT(x.data(), dx.data(), kTokens, kHeads, kSeqLen);
+    TestGradient(dx, c_x, func, 1e-14, 1e-15, __LINE__);
+  }
+}
+TEST(BackPropTest, SoftcapVJP) {
+  static const size_t N = 64;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, N> x;
+  std::array<T, N> dx;
+  std::array<T, N> dy;
+  std::array<TC, N> c_x;
+  std::array<TC, N> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(x, 1.0 * (1 << iter), gen);
+    Complexify(x, c_x);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      memcpy(c_y.data(), c_x.data(), N * sizeof(c_x[0]));
+      Softcap(c_y.data(), N);
+      return DotT(dy.data(), c_y.data(), N);
+    };
+    Softcap(x.data(), N);
+    memcpy(dx.data(), dy.data(), N * sizeof(dx[0]));
+    SoftcapVJPT(x.data(), dx.data(), N);
+    TestGradient(dx, c_x, func, 1e-15, 1e-14, __LINE__);
+  }
+}
+TEST(BackPropTest, CrossEntropyLossGrad) {
+  static const size_t K = 8;
+  static const size_t V = 64;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, K * V> x;
+  std::array<T, K * V> dx;
+  std::array<TC, K * V> c_x;
+  Prompt prompt;
+  prompt.tokens = { 0, 1, 2, 3, 0, 3, 2, 1, 0 };
+  for (int iter = 0; iter < 10; ++iter) {
+    prompt.context_size = 1 + (iter % 6);
+    RandInit(x, 1.0 * (1 << iter), gen);
+    Softcap(x.data(), V * K);
+    Softmax(x.data(), V, K);
+    CrossEntropyLossGrad(x.data(), dx.data(), prompt, V);
+    Complexify(x, c_x);
+    auto func = [&]() {
+      return CrossEntropyLoss(c_x.data(), prompt, V);
+    };
+    TestGradient(dx, c_x, func, 1e-100, 1e-15, __LINE__);
+  }
+}
+TEST(BackPropTest, GatedGeluVJP) {
+  static const size_t K = 2;
+  static const size_t N = 64;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, K * 2 * N> x;
+  std::array<T, K * 2 * N> dx;
+  std::array<T, K * N> dy;
+  std::array<TC, K * 2 * N> c_x;
+  std::array<TC, K * N> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(x, 1.0, gen);
+    Complexify(x, c_x);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      GatedGelu(c_x.data(), c_y.data(), N, K);
+      return DotT(dy.data(), c_y.data(), N * K);
+    };
+    GatedGeluVJP(x.data(), dy.data(), dx.data(), N, K);
+    TestGradient(dx, c_x, func, 1e-15, 1e-15, __LINE__);
+  }
+}
+TEST(BackPropTest, MaskedAttentionVJP) {
+  static const size_t kSeqLen = 16;
+  static const size_t kHeads = 2;
+  static const size_t kQKVDim = 8;
+  static const size_t kTokens = 14;
+  static const size_t kQKVSize = kSeqLen * (kHeads + 2) * kQKVDim;
+  static const size_t kOutSize = kSeqLen * kHeads * kSeqLen;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, kQKVSize> x;
+  std::array<T, kQKVSize> dx = {};
+  std::array<T, kOutSize> dy;
+  std::array<TC, kQKVSize> c_x;
+  std::array<TC, kOutSize> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(x, 1.0, gen);
+    Complexify(x, c_x);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      MaskedAttention(c_x.data(), c_y.data(), kTokens, kHeads, kQKVDim,
+                      kSeqLen);
+      return DotT(dy.data(), c_y.data(), kOutSize);
+    };
+    MaskedAttentionVJP(x.data(), dy.data(), dx.data(),
+                       kTokens, kHeads, kQKVDim, kSeqLen);
+    TestGradient(dx, c_x, func, 1e-14, 1e-15, __LINE__);
+  }
+}
+TEST(BackPropTest, MixByAttentionVJP) {
+  static const size_t kSeqLen = 16;
+  static const size_t kHeads = 2;
+  static const size_t kQKVDim = 8;
+  static const size_t kTokens = 14;
+  static const size_t kQKVSize = kSeqLen * (kHeads + 2) * kQKVDim;
+  static const size_t kAttnSize = kSeqLen * kHeads * kSeqLen;
+  static const size_t kOutSize = kSeqLen * kHeads * kQKVDim;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, kQKVSize> qkv;
+  std::array<T, kQKVSize> dqkv = {};
+  std::array<T, kAttnSize> attn;
+  std::array<T, kAttnSize> dattn = {};
+  std::array<T, kOutSize> dy;
+  std::array<TC, kQKVSize> c_qkv;
+  std::array<TC, kAttnSize> c_attn;
+  std::array<TC, kOutSize> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(qkv, 1.0, gen);
+    RandInit(attn, 1.0, gen);
+    Complexify(qkv, c_qkv);
+    Complexify(attn, c_attn);
+    RandInit(dy, 1.0, gen);
+    auto func = [&]() {
+      MixByAttention(c_qkv.data(), c_attn.data(), c_y.data(),
+                     kTokens, kHeads, kQKVDim, kSeqLen);
+      return DotT(dy.data(), c_y.data(), kOutSize);
+    };
+    MixByAttentionVJP(qkv.data(), attn.data(), dy.data(), dqkv.data(),
+                      dattn.data(), kTokens, kHeads, kQKVDim, kSeqLen);
+    TestGradient(dqkv, c_qkv, func, 1e-14, 1e-15, __LINE__);
+    TestGradient(dattn, c_attn, func, 1e-14, 1e-15, __LINE__);
+  }
+}
+TEST(BackPropTest, InputEmbeddingVJP) {
+  static const size_t kSeqLen = 8;
+  static const size_t kVocabSize = 4;
+  static const size_t kModelDim = 16;
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  std::array<T, kVocabSize * kModelDim> weights;
+  std::array<T, kVocabSize * kModelDim> grad;
+  std::array<T, kSeqLen * kModelDim> dy;
+  std::array<TC, kVocabSize * kModelDim> c_weights;
+  std::array<TC, kSeqLen * kModelDim> c_y;
+  std::vector<int> tokens = { 0, 1, 2, 3, 0, 1, 2 };
+  size_t num_tokens = tokens.size() - 1;
+  for (size_t iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0, gen);
+    RandInit(dy, 1.0, gen);
+    Complexify(weights, c_weights);
+    auto func = [&]() {
+      InputEmbedding(c_weights.data(), tokens, TC(3.0), c_y.data(), kModelDim);
+      return DotT(dy.data(), c_y.data(), num_tokens * kModelDim);
+    };
+    memset(&grad, 0, sizeof(grad));
+    InputEmbeddingVJPT(weights.data(), tokens, 3.0, dy.data(), grad.data(),
+                       kModelDim);
+    TestGradient(grad, c_weights, func, 1e-16, 1e-14, __LINE__);
+  }
+}
+struct TestConfig {
+  static constexpr int kSeqLen = 18;
+  static constexpr int kVocabSize = 12;
+  static constexpr int kModelDim = 32;
+  static constexpr int kHeads = 3;
+  static constexpr int kQKVDim = 12;
+  static constexpr int kFFHiddenDim = 48;
+  static constexpr std::array<LayerAttentionType, 2> kLayerConfig =
+      FixedLayerConfig<2>(LayerAttentionType::kGemma);
+  static constexpr int kLayers = kLayerConfig.size();
+  static constexpr bool kAbsolutePE = false;
+  static constexpr bool kPostNormScale = false;
+  static constexpr int kKVHeads = 1;
+  static constexpr int kConv1dWidth = 0;
+  static constexpr bool kFFBiases = false;
+  static constexpr bool kSoftmaxAttnOutputBiases = false;
+  static constexpr int kGemmaLayers = kLayers;
+  static constexpr int kGriffinLayers = 0;
+  static constexpr int kNumTensorScales = 0;
+};
+TEST(BackPropTest, LayerVJP) {
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  const size_t kOutputSize = TestConfig::kSeqLen * TestConfig::kModelDim;
+  Layer<T, TestConfig> weights;
+  Layer<T, TestConfig> grad;
+  ForwardLayer<T, TestConfig> forward;
+  ForwardLayer<T, TestConfig> backward = {};
+  Layer<TC, TestConfig> c_weights;
+  ForwardLayer<TC, TestConfig> c_forward;
+  std::array<T, kOutputSize> y;
+  std::array<T, kOutputSize> dy;
+  std::array<TC, kOutputSize> c_y;
+  const size_t num_tokens = 3;
+  for (size_t iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0, gen);
+    RandInit(forward.input, 1.0, gen);
+    RandInit(dy, 1.0, gen);
+    Complexify(weights, c_weights);
+    Complexify(forward.input, c_forward.input);
+    auto func = [&]() {
+      ApplyLayer(c_weights, c_forward, num_tokens, c_y.data());
+      return DotT(dy.data(), c_y.data(), num_tokens * TestConfig::kModelDim);
+    };
+    memset(&grad, 0, sizeof(grad));
+    ApplyLayer(weights, forward, num_tokens, y.data());
+    LayerVJP(weights, forward, dy.data(), grad, backward, num_tokens);
+    TestGradient(backward.input, c_forward.input, func, 1e-11, 5e-11,
+                 __LINE__);
+    TestGradient(grad, c_weights, func, 1e-11);
+  }
+}
+TEST(BackPropTest, EndToEnd) {
+  std::mt19937 gen(42);
+  using T = double;
+  using TC = std::complex<T>;
+  WeightsWrapper<T, TestConfig> weights;
+  WeightsWrapper<T, TestConfig> grad;
+  ForwardPass<T, TestConfig> forward;
+  ForwardPass<T, TestConfig> backward;
+  WeightsWrapper<TC, TestConfig> c_weights;
+  ForwardPass<TC, TestConfig> c_forward;
+  ReverseSequenceSampler training_task({0, 0, 1, 1});
+  std::vector<Prompt> batch = training_task.SampleBatch(3, gen);
+  for (const Prompt& prompt : batch) {
+    ReverseSequenceSampler::LogPrompt(prompt);
+    RandInit(weights.get(), 1.0, gen);
+    CrossEntropyLossForwardPass(prompt, weights.get(), forward);
+    grad.clear();
+    CrossEntropyLossBackwardPass(
+        prompt, weights.get(), forward, grad.get(), backward);
+    Complexify(weights.get(), c_weights.get());
+    auto func = [&]() {
+      return CrossEntropyLossForwardPass(prompt, c_weights.get(), c_forward);
+    };
+    TestGradient(grad.get(), c_weights.get(), func, 1e-11);
+  }
+}
+template<typename T, typename TConfig>
+void MulByConstAndAddT(T c, const Layer<T, TConfig>& x,
+                      Layer<T, TConfig>& out) {
+  MulByConstAndAddT(c, x.pre_attention_norm_scale,
+                    out.pre_attention_norm_scale);
+  MulByConstAndAddT(c, x.attn_vec_einsum_w, out.attn_vec_einsum_w);
+  MulByConstAndAddT(c, x.qkv_einsum_w, out.qkv_einsum_w);
+  MulByConstAndAddT(c, x.pre_ffw_norm_scale, out.pre_ffw_norm_scale);
+  MulByConstAndAddT(c, x.gating_einsum_w, out.gating_einsum_w);
+  MulByConstAndAddT(c, x.linear_w, out.linear_w);
+}
+template<typename T, typename TConfig>
+void MulByConstAndAddT(T c, const Weights<T, TConfig>& x,
+                       Weights<T, TConfig>& out) {
+  static constexpr size_t kLayers = TConfig::kLayers;
+  MulByConstAndAddT(c, x.embedder_input_embedding,
+                    out.embedder_input_embedding);
+  MulByConstAndAddT(c, x.final_norm_scale, out.final_norm_scale);
+  for (size_t i = 0; i < kLayers; ++i) {
+    MulByConstAndAddT(c, *x.GetLayer(i), *out.GetLayer(i));
+  }
+}
+// Evaluates forward pass on a batch.
+template<typename T, typename TConfig>
+T CrossEntropyLossForwardPass(const std::vector<Prompt>& batch,
+                              const WeightsWrapper<T, TConfig>& weights,
+                              ForwardPass<T, TConfig>& forward) {
+  T loss = 0.0;
+  for (const Prompt& prompt : batch) {
+    loss += CrossEntropyLossForwardPass(prompt, weights.get(), forward);
+  }
+  T scale = 1.0 / batch.size();
+  return loss * scale;
+}
+// Evaluates forward pass on a batch by applying gradient with the given
+// learning rate. Does not update weights, but uses the given tmp weights
+// instead.
+template<typename T, typename TConfig>
+T CrossEntropyLossForwardPass(T learning_rate,
+                              const std::vector<Prompt>& batch,
+                              const WeightsWrapper<T, TConfig>& weights,
+                              const WeightsWrapper<T, TConfig>& grad,
+                              WeightsWrapper<T, TConfig>& tmp,
+                              ForwardPass<T, TConfig>& forward) {
+  tmp.copy(weights);
+  const T scale = -learning_rate / batch.size();
+  MulByConstAndAddT(scale, grad.get(), tmp.get());
+  return CrossEntropyLossForwardPass(batch, tmp, forward);
+}
+// Uses line search in the negative gradient direction to update weights. We do
+// this so that we can test that each step during the gradient descent can
+// decrease the objective function value.
+template<typename T, typename TConfig>
+T FindOptimalUpdate(const WeightsWrapper<T, TConfig>& grad,
+                    WeightsWrapper<T, TConfig>& weights,
+                    WeightsWrapper<T, TConfig>& tmp,
+                    ForwardPass<T, TConfig>& forward,
+                    const std::vector<Prompt>& batch,
+                    T loss, T initial_learning_rate) {
+  T lr0 = initial_learning_rate;
+  T loss0 = CrossEntropyLossForwardPass(
+      lr0, batch, weights, grad, tmp, forward);
+  for (size_t iter = 0; iter < 30; ++iter) {
+    T lr1 = lr0 * 0.5;
+    T loss1 = CrossEntropyLossForwardPass(
+        lr1, batch, weights, grad, tmp, forward);
+    if (loss0 < loss && loss1 >= loss0) {
+      break;
+    }
+    loss0 = loss1;
+    lr0 = lr1;
+  }
+  for (size_t iter = 0; iter < 30; ++iter) {
+    T lr1 = lr0 * 2.0;
+    T loss1 = CrossEntropyLossForwardPass(
+        lr1, batch, weights, grad, tmp, forward);
+    if (loss1 >= loss0) {
+      break;
+    }
+    loss0 = loss1;
+    lr0 = lr1;
+  }
+  const T scale = -lr0 / batch.size();
+  MulByConstAndAddT(scale, grad.get(), weights.get());
+  return lr0;
+}
+TEST(BackProptest, Convergence) {
+  std::mt19937 gen(42);
+  using T = float;
+  using TC = std::complex<double>;
+  WeightsWrapper<T, TestConfig> weights;
+  WeightsWrapper<T, TestConfig> grad;
+  WeightsWrapper<T, TestConfig> tmp;
+  ForwardPass<T, TestConfig> forward;
+  ForwardPass<T, TestConfig> backward;
+  WeightsWrapper<TC, TestConfig> c_weights;
+  ForwardPass<TC, TestConfig> c_forward;
+  constexpr size_t kBatchSize = 5;
+  ReverseSequenceSampler training_task({0, 0, 0, 1, 1});
+  T learning_rate = 0.01;
+  RandInit(weights.get(), T(1.0), gen);
+  printf("Sample batch:\n");
+  for (size_t i = 0; i < 10; ++i) {
+    ReverseSequenceSampler::LogPrompt(training_task.Sample(gen));
+  }
+  T prev_loss = std::numeric_limits<T>::max();
+  bool stop = false;
+  size_t step = 0;
+  while (!stop) {
+    T loss = 0.0;
+    grad.clear();
+    std::mt19937 sgen(42);
+    std::vector<Prompt> batch = training_task.SampleBatch(kBatchSize, sgen);
+    for (const Prompt& prompt : batch) {
+      loss += CrossEntropyLossForwardPass(prompt, weights.get(), forward);
+      CrossEntropyLossBackwardPass(
+          prompt, weights.get(), forward, grad.get(), backward);
+    }
+    if (step % 250 == 0) {
+      printf("Checking gradient...\n");
+      Complexify(weights.get(), c_weights.get());
+      auto func = [&]() {
+        TC scale = batch.size();
+        return CrossEntropyLossForwardPass(batch, c_weights, c_forward) * scale;
+      };
+      TestGradient(grad.get(), c_weights.get(), func, 5e-3f);
+    }
+    loss /= batch.size();
+    EXPECT_LT(loss, prev_loss);
+    stop = step >= 10000 || loss < 1e-2;
+    if (step % 10 == 0 || stop) {
+      printf("step: %5zu  loss: %.15f  learning_rate: %.15f\n",
+             step, loss, learning_rate);
+    }
+    if (!stop) {
+      learning_rate = FindOptimalUpdate(
+          grad, weights, tmp, forward, batch, loss, learning_rate);
+      ++step;
+    }
+    prev_loss = loss;
+  }
+  EXPECT_LT(step, 1000);
+}
+}  // namespace gcpp

gemma.cpp/backprop/backward_test.cc ADDED Viewed

	@@ -0,0 +1,264 @@

+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef HWY_DISABLED_TARGETS
+#define HWY_DISABLED_TARGETS HWY_SCALAR
+#endif
+#include <stddef.h>
+#include <array>
+#include <complex>
+#include <random>
+#include <vector>
+#include "backprop/backward_scalar.h"
+#include "backprop/forward_scalar.h"
+#include "backprop/sampler.h"
+#include "backprop/test_util.h"
+#include "gemma/activations.h"
+#include "gemma/weights_raw.h"
+#include "hwy/base.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "backprop/backward_test.cc"  //NOLINT
+// clang-format on
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+// After highway.h
+#include "backprop/backward-inl.h"
+#include "backprop/forward-inl.h"
+#include "gemma/ops.h"
+HWY_BEFORE_NAMESPACE();
+namespace gcpp {
+namespace HWY_NAMESPACE {
+void TestMatMulVJP() {
+  static const size_t kRows = 8;
+  static const size_t kCols = 64;
+  static const size_t kTokens = 5;
+  hwy::ThreadPool pool(8);
+  std::mt19937 gen(42);
+  HWY_ALIGN std::array<float, kRows * kCols> weights;
+  HWY_ALIGN std::array<float, kTokens * kCols> x;
+  HWY_ALIGN std::array<float, kTokens * kRows> dy;
+  HWY_ALIGN std::array<float, kRows * kCols> grad;
+  HWY_ALIGN std::array<float, kTokens * kCols> dx;
+  HWY_ALIGN std::array<float, kRows * kCols> grad_scalar;
+  HWY_ALIGN std::array<float, kTokens * kCols> dx_scalar;
+  using TC = std::complex<double>;
+  std::array<TC, kRows * kCols> c_weights;
+  std::array<TC, kTokens * kCols> c_x;
+  std::array<TC, kTokens * kRows> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0f * (1 << iter), gen);
+    RandInit(x, 1.0f * (1 << iter), gen);
+    RandInit(dy, 1.0f, gen);
+    Complexify(weights, c_weights);
+    Complexify(x, c_x);
+    auto func = [&]() {
+      MatMulT(c_weights.data(), c_x.data(), c_y.data(), kRows, kCols, kTokens);
+      return DotT(dy.data(), c_y.data(), kTokens * kRows);
+    };
+    hwy::ZeroBytes(&grad, sizeof(grad));
+    MatMulVJP<kCols, kRows>(weights.data(), x.data(), dy.data(), kTokens,
+                            grad.data(), dx.data(), pool);
+    TestGradient(dx, c_x, func, 5e-5, 5e-5, __LINE__);
+    TestGradient(grad, c_weights, func, 5e-5, 5e-5, __LINE__);
+    hwy::ZeroBytes(&grad_scalar, sizeof(grad_scalar));
+    MatMulVJPT(weights.data(), x.data(), dy.data(), grad_scalar.data(),
+               dx_scalar.data(), kRows, kCols, kTokens);
+    TestNear(dx, dx_scalar, 5e-5, 1e-4, __LINE__);
+    TestNear(grad, grad_scalar, 5e-5, 5e-5, __LINE__);
+  }
+}
+void TestMultiHeadMatMulVJP() {
+  static const size_t kRows = 2;
+  static const size_t kCols = 16;
+  static const size_t kHeads = 4;
+  static const size_t kTokens = 3;
+  hwy::ThreadPool pool(8);
+  std::mt19937 gen(42);
+  HWY_ALIGN std::array<float, kRows * kCols * kHeads> weights;
+  HWY_ALIGN std::array<float, kTokens * kCols * kHeads> x;
+  HWY_ALIGN std::array<float, kRows * kCols * kHeads> grad;
+  HWY_ALIGN std::array<float, kTokens * kCols * kHeads> dx;
+  HWY_ALIGN std::array<float, kTokens * kRows> dy;
+  HWY_ALIGN std::array<float, kRows * kCols * kHeads> grad_scalar;
+  HWY_ALIGN std::array<float, kTokens * kCols * kHeads> dx_scalar;
+  using TC = std::complex<double>;
+  std::array<TC, kRows * kCols * kHeads> c_weights;
+  std::array<TC, kTokens * kCols * kHeads> c_x;
+  std::array<TC, kTokens * kRows> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0f * (1 << iter), gen);
+    RandInit(x, 1.0f * (1 << iter), gen);
+    RandInit(dy, 1.0f, gen);
+    Complexify(weights, c_weights);
+    Complexify(x, c_x);
+    auto func = [&]() {
+      MultiHeadMatMul(c_weights.data(), c_x.data(), c_y.data(), kHeads, kRows,
+                      kCols, kTokens);
+      return DotT(dy.data(), c_y.data(), kTokens * kRows);
+    };
+    hwy::ZeroBytes(&grad, sizeof(grad));
+    MultiHeadMatMulVJP<kHeads, kCols, kRows>(
+        weights.data(), x.data(), dy.data(), kTokens, grad.data(), dx.data(),
+        pool);
+    TestGradient(dx, c_x, func, 5e-5, 5e-5, __LINE__);
+    TestGradient(grad, c_weights, func, 5e-5, 5e-5, __LINE__);
+    hwy::ZeroBytes(&grad_scalar, sizeof(grad_scalar));
+    MultiHeadMatMulVJPT(weights.data(), x.data(), dy.data(), grad_scalar.data(),
+                        dx_scalar.data(), kHeads, kRows, kCols, kTokens);
+    TestNear(dx, dx_scalar, 5e-5, 5e-5, __LINE__);
+    TestNear(grad, grad_scalar, 5e-5, 5e-5, __LINE__);
+  }
+}
+void TestRMSNormVJP() {
+  static const size_t K = 2;
+  static const size_t N = 64;
+  hwy::ThreadPool pool(8);
+  std::mt19937 gen(42);
+  HWY_ALIGN std::array<float, N> weights;
+  HWY_ALIGN std::array<float, K * N> x;
+  HWY_ALIGN std::array<float, N> grad;
+  HWY_ALIGN std::array<float, K * N> dx;
+  HWY_ALIGN std::array<float, K * N> dy;
+  HWY_ALIGN std::array<float, N> grad_scalar;
+  HWY_ALIGN std::array<float, K * N> dx_scalar;
+  using TC = std::complex<double>;
+  std::array<TC, N> c_weights;
+  std::array<TC, K * N> c_x;
+  std::array<TC, K * N> c_y;
+  for (int iter = 0; iter < 10; ++iter) {
+    RandInit(weights, 1.0f * (1 << iter), gen);
+    RandInit(x, 1.0f * (1 << iter), gen);
+    RandInit(dy, 1.0f, gen);
+    Complexify(weights, c_weights);
+    Complexify(x, c_x);
+    auto func = [&]() {
+      RMSNormT(c_weights.data(), c_x.data(), c_y.data(), N, K);
+      return DotT(dy.data(), c_y.data(), K * N);
+    };
+    hwy::ZeroBytes(&grad, sizeof(grad));
+    RMSNormVJP(weights.data(), x.data(), dy.data(), N, K, grad.data(),
+               dx.data(), pool);
+    TestGradient(dx, c_x, func, 5e-5, 5e-5, __LINE__);
+    TestGradient(grad, c_weights, func, 5e-5, 5e-5, __LINE__);
+    hwy::ZeroBytes(&grad_scalar, sizeof(grad_scalar));
+    RMSNormVJPT(weights.data(), x.data(), dy.data(), grad_scalar.data(),
+                dx_scalar.data(), N, K);
+    TestNear(dx, dx_scalar, 0, 2e-5, __LINE__);
+    TestNear(grad, grad_scalar, 0, 2e-5, __LINE__);
+  }
+}
+struct TestConfig {
+  static constexpr int kSeqLen = 24;
+  static constexpr int kVocabSize = 16;
+  static constexpr int kModelDim = 32;
+  static constexpr int kHeads = 3;
+  static constexpr int kQKVDim = 16;
+  static constexpr int kFFHiddenDim = 64;
+  static constexpr std::array<LayerAttentionType, 2> kLayerConfig =
+      FixedLayerConfig<2>(LayerAttentionType::kGemma);
+  static constexpr int kLayers = kLayerConfig.size();
+  static constexpr bool kAbsolutePE = false;
+  static constexpr bool kPostNormScale = false;
+  static constexpr int kKVHeads = 1;
+  static constexpr int kConv1dWidth = 0;
+  static constexpr bool kFFBiases = false;
+  static constexpr bool kSoftmaxAttnOutputBiases = false;
+  static constexpr int kGemmaLayers = kLayers;
+  static constexpr int kGriffinLayers = 0;
+  static constexpr int kNumTensorScales = 0;
+};
+void TestEndToEnd() {
+  std::mt19937 gen(42);
+  hwy::ThreadPool pool(0);
+  WeightsWrapper<float, TestConfig> weights;
+  WeightsWrapper<float, TestConfig> grad;
+  ActivationsWrapper<float, TestConfig> forward0;
+  ActivationsWrapper<float, TestConfig> forward1;
+  ActivationsWrapper<float, TestConfig> backward;
+  using TC = std::complex<double>;
+  WeightsWrapper<TC, TestConfig> c_weights;
+  ForwardPass<TC, TestConfig> c_forward;
+  ReverseSequenceSampler training_task({0, 0, 1, 1});
+  std::vector<Prompt> batch = training_task.SampleBatch(3, gen);
+  for (const Prompt& prompt : batch) {
+    ReverseSequenceSampler::LogPrompt(prompt);
+    RandInit(weights.get(), 1.0f, gen);
+    float loss0 = CrossEntropyLossForwardPass(
+        prompt, weights.get(), forward0.get());
+    float loss1 = CrossEntropyLossForwardPass<TestConfig, WeightsF, LayerF>(
+        prompt.tokens, prompt.context_size, weights.get(), forward1.get(),
+        pool);
+    EXPECT_NEAR(loss1, loss0, std::abs(loss0) * 2e-5);
+    grad.clear();
+    CrossEntropyLossBackwardPass<TestConfig, WeightsF, LayerF>(
+        prompt, weights.get(), forward1.get(), grad.get(), backward.get(),
+        pool);
+    Complexify(weights.get(), c_weights.get());
+    auto func = [&]() {
+      return CrossEntropyLossForwardPass(prompt, c_weights.get(), c_forward);
+    };
+    TestGradient(grad.get(), c_weights.get(), func, 2e-3f);
+  }
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace gcpp
+HWY_AFTER_NAMESPACE();
+#if HWY_ONCE
+namespace gcpp {
+HWY_BEFORE_TEST(BackwardTest);
+HWY_EXPORT_AND_TEST_P(BackwardTest, TestMatMulVJP);
+HWY_EXPORT_AND_TEST_P(BackwardTest, TestMultiHeadMatMulVJP);
+HWY_EXPORT_AND_TEST_P(BackwardTest, TestRMSNormVJP);
+HWY_EXPORT_AND_TEST_P(BackwardTest, TestEndToEnd);
+HWY_AFTER_TEST();
+}  // namespace gcpp
+#endif

gemma.cpp/backprop/common_scalar.h ADDED Viewed

	@@ -0,0 +1,120 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_COMMON_SCALAR_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_COMMON_SCALAR_H_
+#include <stddef.h>
+#include <array>
+#include <complex>
+namespace gcpp {
+template<typename T, typename U>
+U DotT(const T* a, const U* b, size_t N) {
+  U sum = {};
+  for (size_t i = 0; i < N; ++i) {
+    sum += a[i] * b[i];
+  }
+  return sum;
+}
+template<>
+inline std::complex<double> DotT(const float* a, const std::complex<double>* b,
+                                 size_t N) {
+  std::complex<double> sum = {};
+  for (size_t i = 0; i < N; ++i) {
+    sum += static_cast<double>(a[i]) * b[i];
+  }
+  return sum;
+}
+template<typename T>
+void MulByConstT(T c, T* x, size_t N) {
+  for (size_t i = 0; i < N; ++i) {
+    x[i] *= c;
+  }
+}
+// out += c * x
+template<typename T>
+void MulByConstAndAddT(T c, const T* x, T* out, size_t N) {
+  for (size_t i = 0; i < N; ++i) {
+    out[i] += c * x[i];
+  }
+}
+template<typename T, size_t N>
+void MulByConstAndAddT(T c, const std::array<T, N>& x, std::array<T, N>& out) {
+  MulByConstAndAddT(c, x.data(), out.data(), N);
+}
+template<typename T>
+void AddFromT(const T* a, T* out, size_t N) {
+  for (size_t i = 0; i < N; ++i) {
+    out[i] += a[i];
+  }
+}
+template<typename T>
+T SquaredL2(const T* x, size_t N) {
+  T sum = {};
+  for (size_t i = 0; i < N; ++i) {
+    sum += x[i] * x[i];
+  }
+  return sum;
+}
+template<typename T>
+T Gelu(T x) {
+  static const T kMul = 0.044715;
+  static const T kSqrt2OverPi = 0.797884560804236;
+  const T x3 = x * x * x;
+  const T arg = kSqrt2OverPi * (kMul * x3 + x);
+  const T cdf = T(0.5) * (T(1.0) + std::tanh(arg));
+  return x * cdf;
+}
+template<typename T, typename U>
+void Rope(T* x, U base, size_t N, int i) {
+  const size_t N2 = N / 2;
+  for (size_t dim = 0; dim < N2; ++dim) {
+    const T freq_exponents = T(2 * dim) / T(N);
+    const T timescale = std::pow(base, freq_exponents);
+    const T theta = T(i) / timescale;
+    const T cos_val = std::cos(theta);
+    const T sin_val = std::sin(theta);
+    const T x0 = x[dim];
+    const T x1 = x[dim + N2];
+    x[dim] = x0 * cos_val - x1 * sin_val;
+    x[dim + N2] = x0 * sin_val + x1 * cos_val;
+  }
+}
+template<typename T>
+void Rope(T* x, size_t N, int i) {
+  Rope(x, T(10000.0), N, i);
+}
+template<typename T>
+void Rope(std::complex<T>* x, size_t N, int i) {
+  Rope(x, T(10000.0), N, i);
+}
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_COMMON_SCALAR_H_

gemma.cpp/backprop/forward-inl.h ADDED Viewed

	@@ -0,0 +1,289 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Include guard for non-SIMD code.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_INL_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_INL_H_
+#include <stddef.h>
+#include <stdint.h>
+#include <cmath>
+#include <vector>
+#include "gemma/activations.h"
+#include "gemma/common.h"
+#include "gemma/configs.h"
+#include "hwy/base.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_INL_H_
+// Include guard for (potentially) SIMD code.
+#if defined(THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE) == defined(HWY_TARGET_TOGGLE)
+#ifdef THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE
+#undef THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE
+#else
+#define THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE
+#endif
+#include "gemma/ops.h"
+#include "hwy/highway.h"
+HWY_BEFORE_NAMESPACE();
+namespace gcpp {
+namespace HWY_NAMESPACE {
+template <typename ArrayT>
+void InputEmbedding(const ArrayT& weights, const std::vector<int>& prompt,
+                    const float scaling, float* HWY_RESTRICT output,
+                    size_t model_dim) {
+  HWY_ASSERT(!prompt.empty());
+  for (size_t pos = 0; pos < prompt.size() - 1; ++pos) {
+    int token = prompt[pos];
+    Decompress(weights, token * model_dim, output + pos * model_dim, model_dim);
+    MulByConst(scaling, output + pos * model_dim, model_dim);
+  }
+}
+template<typename WT, typename XT, typename OutT>
+void ApplyRMSNorm(const WT* HWY_RESTRICT weights, const XT* HWY_RESTRICT x,
+                  size_t model_dim, size_t num_tokens,
+                  OutT* HWY_RESTRICT output,
+                  hwy::ThreadPool& pool) {
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    const size_t offset = pos * model_dim;
+    RMSNorm(x + offset, weights, output + offset, model_dim);
+  }
+}
+static HWY_NOINLINE float CrossEntropyLoss(const float* HWY_RESTRICT probs,
+                                           const std::vector<int>& prompt,
+                                           size_t context_size,
+                                           size_t vocab_size,
+                                           hwy::ThreadPool& pool) {
+  HWY_ASSERT(!prompt.empty());
+  float loss = 0.0f;
+  for (size_t pos = 0; pos < prompt.size() - 1; ++pos) {
+    if (pos + 1 < context_size) {
+      continue;  // next token is part of context, don't try to predict it
+    }
+    const int next_token = prompt[pos + 1];
+    loss += std::log(probs[pos * vocab_size + next_token]);
+  }
+  float scaling = -1.0 / std::log(2.0);
+  return loss * scaling;
+}
+template <typename TConfig, template<typename> typename LayerT>
+void ApplyForwardLayer(const LayerT<TConfig>& weights,
+                       ForwardLayer<float, TConfig>& activations,
+                       size_t num_tokens,
+                       float* HWY_RESTRICT output,
+                       hwy::ThreadPool& pool) {
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kSeqLen = TConfig::kSeqLen;
+  static constexpr size_t kQKVDim = TConfig::kQKVDim;
+  static constexpr size_t kHeads = TConfig::kHeads;
+  static const float kQueryScale =
+      static_cast<float>(1.0 / sqrt(static_cast<double>(kQKVDim)));
+  HWY_ASSERT(num_tokens <= kSeqLen);
+  ApplyRMSNorm(weights.pre_attention_norm_scale.data(),
+               activations.input.data(), kModelDim, num_tokens,
+               activations.pre_att_rms_out.data(), pool);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    MatVec<(kHeads + 2) * kQKVDim, kModelDim>(
+        weights.qkv_einsum_w, 0,
+        activations.pre_att_rms_out.data() + pos * kModelDim, nullptr,
+        activations.qkv.data() + pos * (kHeads + 2) * kQKVDim, pool);
+  }
+  const size_t num_tasks = kHeads * num_tokens;
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    float* HWY_RESTRICT k =
+        activations.qkv.data() + (pos * (kHeads + 2) + kHeads) * kQKVDim;
+    Rope(k, kQKVDim, pos);
+  }
+  pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
+    const size_t head = task % kHeads;
+    const size_t pos = task / kHeads;
+    float* HWY_RESTRICT q =
+        activations.qkv.data() + (pos * (kHeads + 2) + head) * kQKVDim;
+    Rope(q, kQKVDim, pos);
+    MulByConst(kQueryScale, q, kQKVDim);
+  });
+  pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
+    const size_t head = task % kHeads;
+    const size_t pos = task / kHeads;
+    const float* HWY_RESTRICT q =
+        activations.qkv.data() + (pos * (kHeads + 2) + head) * kQKVDim;
+    float* HWY_RESTRICT head_att =
+        activations.att.data() + (pos * kHeads + head) * kSeqLen;
+    for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+      const float* HWY_RESTRICT k2 =
+          activations.qkv.data() + (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
+      const float score = Dot(q, k2, kQKVDim);
+      head_att[pos2] = score;
+    }
+  });
+  pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
+    const size_t head = task % kHeads;
+    const size_t pos = task / kHeads;
+    float* HWY_RESTRICT head_att =
+        activations.att.data() + (pos * kHeads + head) * kSeqLen;
+    Softmax(head_att, pos + 1);
+  });
+  pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
+    const size_t head = task % kHeads;
+    const size_t pos = task / kHeads;
+    const float* HWY_RESTRICT head_att =
+        activations.att.data() + (pos * kHeads + head) * kSeqLen;
+    float* HWY_RESTRICT att_out =
+        activations.att_out.data() + (pos * kHeads + head) * kQKVDim;
+    hwy::ZeroBytes(att_out, kQKVDim * sizeof(*att_out));
+    for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+      float* HWY_RESTRICT v2 =
+          activations.qkv.data() + (pos2 * (kHeads + 2) + kHeads + 1) * kQKVDim;
+      MulByConstAndAdd(head_att[pos2], v2, att_out, kQKVDim);
+    }
+  });
+  hwy::ZeroBytes(activations.attention_out.data(),
+                 num_tokens * kModelDim * sizeof(activations.attention_out[0]));
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    for (size_t head = 0; head < kHeads; ++head) {
+      MatVec<kModelDim, kQKVDim>(
+          weights.attn_vec_einsum_w, head * kModelDim * kQKVDim,
+          activations.att_out.data() + pos * kHeads * kQKVDim + head * kQKVDim,
+          nullptr, activations.att_post1.data() + pos * kModelDim, pool);
+      AddFrom(activations.att_post1.data() + pos * kModelDim,
+              activations.attention_out.data() + pos * kModelDim, kModelDim);
+    }
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    AddFrom(activations.input.data() + pos * kModelDim,
+            activations.attention_out.data() + pos * kModelDim, kModelDim);
+  }
+  ApplyRMSNorm(weights.pre_ffw_norm_scale.data(),
+               activations.attention_out.data(), kModelDim, num_tokens,
+               activations.bf_pre_ffw_rms_out.data(), pool);
+  static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    MatVec<kFFHiddenDim * 2, kModelDim>(
+        weights.gating_einsum_w, 0,
+        activations.bf_pre_ffw_rms_out.data() + pos * kModelDim, nullptr,
+        activations.ffw_hidden.data() + pos * kFFHiddenDim * 2, pool);
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    const size_t hidden_offset = pos * kFFHiddenDim * 2;
+    const float* HWY_RESTRICT out =
+        activations.ffw_hidden.data() + hidden_offset;
+    const float* HWY_RESTRICT out_mul = out + kFFHiddenDim;
+    float* HWY_RESTRICT out_gated =
+        activations.ffw_hidden_gated.data() + pos * kFFHiddenDim;
+    namespace hn = hwy::HWY_NAMESPACE;
+    using DF = hn::ScalableTag<float>;
+    DF df;
+    for (size_t i = 0; i < kFFHiddenDim; i += Lanes(df)) {
+      const auto y = hn::Load(df, out + i);
+      const auto x = hn::Load(df, out_mul + i);
+      hn::Store(hn::Mul(x, Gelu(df, y)), df, out_gated + i);
+    }
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    MatVec<kModelDim, kFFHiddenDim>(
+        weights.linear_w, 0,
+        activations.ffw_hidden_gated.data() + pos * kFFHiddenDim,
+        nullptr, output + pos * kModelDim, pool);
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    AddFrom(activations.attention_out.data() + pos * kModelDim,
+            output + pos * kModelDim, kModelDim);
+  }
+}
+template <typename TConfig, template<typename...> typename WeightsT,
+          template<typename> typename LayerT>
+float CrossEntropyLossForwardPass(const std::vector<int>& prompt,
+                                  size_t context_size,
+                                  const WeightsT<TConfig>& weights,
+                                  ForwardPass<float, TConfig>& forward,
+                                  hwy::ThreadPool& pool) {
+  static constexpr size_t kVocabSize = TConfig::kVocabSize;
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kLayers = TConfig::kLayers;
+  const float kEmbScaling = EmbeddingScaling<TConfig>();
+  static_assert(!TConfig::kAbsolutePE);
+  static_assert(!TConfig::kPostNormScale);
+  static_assert(TConfig::kKVHeads == 1);
+  HWY_DASSERT(context_size > 0);
+  HWY_DASSERT(context_size < prompt.size());
+  const size_t num_tokens = prompt.size() - 1;
+  InputEmbedding(weights.embedder_input_embedding, prompt, kEmbScaling,
+                 forward.layers[0].input.data(), kModelDim);
+  for (size_t layer = 0; layer < kLayers; ++layer) {
+    auto type = TConfig::kLayerConfig[layer];
+    // TODO(szabadka) Implement Griffin layer.
+    HWY_ASSERT(type == LayerAttentionType::kGemma);
+    float* HWY_RESTRICT output = layer + 1 < kLayers ?
+                                 forward.layers[layer + 1].input.data() :
+                                 forward.final_layer_output.data();
+    ApplyForwardLayer<TConfig, LayerT>(
+        *weights.GetLayer(layer), forward.layers[layer],
+        num_tokens, output, pool);
+  }
+  ApplyRMSNorm(weights.final_norm_scale.data(),
+               forward.final_layer_output.data(),
+               kModelDim, num_tokens, forward.final_norm_output.data(), pool);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    MatVec<kVocabSize, kModelDim>(
+        weights.embedder_input_embedding, 0,
+        forward.final_norm_output.data() + pos * kModelDim, nullptr,
+        forward.logits.data() + pos * kVocabSize, pool);
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    LogitsSoftCap(30.0f, forward.logits.data() + pos * kVocabSize, kVocabSize);
+  }
+  hwy::CopyBytes(forward.logits.data(), forward.probs.data(),
+                 num_tokens * kVocabSize * sizeof(forward.logits[0]));
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    Softmax(forward.probs.data() + pos * kVocabSize, kVocabSize);
+  }
+  return CrossEntropyLoss(forward.probs.data(), prompt, context_size,
+                          kVocabSize, pool);
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace gcpp
+HWY_AFTER_NAMESPACE();
+#endif  // NOLINT

gemma.cpp/backprop/forward.cc ADDED Viewed

	@@ -0,0 +1,86 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "backprop/forward.h"
+#include "backprop/prompt.h"
+#include "gemma/activations.h"
+#include "gemma/common.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+// Compiles this file for multiple architectures via "foreach_target.h", to
+// which we pass the filename via macro 'argument'.
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "backprop/forward.cc"  // NOLINT
+#include "hwy/foreach_target.h"        // IWYU pragma: keep
+#include "hwy/highway.h"
+// After highway.h
+#include "backprop/forward-inl.h"
+#include "gemma/weights.h"
+HWY_BEFORE_NAMESPACE();
+namespace gcpp {
+namespace HWY_NAMESPACE {
+template <typename TConfig>
+float CrossEntropyLossForwardPass(const Prompt& prompt,
+                                  const ByteStorageT& weights_u8,
+                                  ByteStorageT& forward_u8,
+                                  hwy::ThreadPool& pool) {
+  const auto& weights =
+      *reinterpret_cast<CompressedWeights<TConfig>*>(weights_u8.get());
+  auto& forward =
+      *reinterpret_cast<ForwardPass<float, TConfig>*>(forward_u8.get());
+  return
+      CrossEntropyLossForwardPass<TConfig, CompressedWeights, CompressedLayer>(
+          prompt.tokens, prompt.context_size, weights, forward, pool);
+}
+float CrossEntropyLossForwardPassT(Model model, const Prompt& prompt,
+                                   const ByteStorageT& weights,
+                                   ByteStorageT& forward,
+                                   hwy::ThreadPool& pool) {
+  // TODO(janwas): use CallFunctorForModel
+  switch (model) {
+    case Model::GEMMA_2B:
+      return CrossEntropyLossForwardPass<ConfigGemma2B<float>>(prompt, weights,
+                                                               forward, pool);
+    case Model::GEMMA_TINY:
+      return CrossEntropyLossForwardPass<ConfigGemmaTiny<float>>(
+          prompt, weights, forward, pool);
+    default:
+      HWY_ABORT("Model type %d unknown.", static_cast<int>(model));
+  }
+}
+}  // namespace HWY_NAMESPACE
+}  // namespace gcpp
+HWY_AFTER_NAMESPACE();
+#if HWY_ONCE
+namespace gcpp {
+HWY_EXPORT(CrossEntropyLossForwardPassT);
+float CrossEntropyLossForwardPass(
+    const Model& model, const Prompt& prompt, const ByteStorageT& weights,
+    ByteStorageT& forward, hwy::ThreadPool& pool) {
+  return HWY_DYNAMIC_DISPATCH(CrossEntropyLossForwardPassT)(
+      model, prompt, weights, forward, pool);
+}
+}  // namespace gcpp
+#endif  // HWY_ONCE

gemma.cpp/backprop/forward.h ADDED Viewed

	@@ -0,0 +1,33 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_H_
+#include <vector>
+#include "backprop/prompt.h"
+#include "gemma/common.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+namespace gcpp {
+float CrossEntropyLossForwardPass(
+    const Model& model, const Prompt& prompt, const ByteStorageT& weights,
+    ByteStorageT& forward, hwy::ThreadPool& pool);
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_H_

gemma.cpp/backprop/forward_scalar.h ADDED Viewed

	@@ -0,0 +1,300 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_SCALAR_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_SCALAR_H_
+#include <stddef.h>
+#include <string.h>
+#include <cmath>
+#include <complex>
+#include <vector>
+#include "backprop/common_scalar.h"
+#include "backprop/prompt.h"
+#include "gemma/activations.h"
+#include "gemma/common.h"  // EmbeddingScaling
+#include "gemma/weights_raw.h"
+namespace gcpp {
+// w is N x M matrix in row-major order, x is M x K matrix in column-major order
+// y = w * x is N x K matrix in column-major order.
+template<typename T>
+void MatMulT(const T* w, const T* x, T* y, size_t N, size_t M, size_t K) {
+  for (size_t i = 0; i < K; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      y[i * N + j] = DotT(&w[j * M], &x[i * M], M);
+    }
+  }
+}
+// w is H concatenated N x M matrix in row-major order, x is HM x K matrix in
+// column-major order and y = w' * x is N x K matrix in column-major order,
+// where w' is the rearrangement of w into an N x HM matrix.
+template<typename T>
+void MultiHeadMatMul(const T* w, const T* x, T* y, size_t H, size_t N,
+                     size_t M, size_t K) {
+  memset(y, 0, N * K * sizeof(y[0]));
+  for (size_t i = 0; i < K; ++i) {
+    for (size_t h = 0; h < H; ++h) {
+      for (size_t j = 0; j < N; ++j) {
+        y[i * N + j] += DotT(&w[h * N * M + j * M], &x[i * H * M + h * M], M);
+      }
+    }
+  }
+}
+template<typename T>
+void RMSNormT(const T* w, const T* x, T* out, size_t N, size_t K) {
+  constexpr T eps(1e-6);
+  for (size_t i = 0; i < K; ++i) {
+    T ss = SquaredL2(x + i * N, N);
+    ss = T(1.0) / std::sqrt(ss / T(N) + eps);
+    for (size_t j = 0; j < N; j++) {
+      out[i * N + j] = (T(1.0) + w[j]) * (ss * x[i * N + j]);
+    }
+  }
+}
+template<typename T>
+void Softmax(T* x, size_t N) {
+  T sum = {};
+  auto maxreal = std::real(x[0]);
+  for (size_t i = 1; i < N; ++i) {
+    if (std::real(x[i]) > maxreal) {
+      maxreal = std::real(x[i]);
+    }
+  }
+  for (size_t i = 0; i < N; ++i) {
+    x[i] = std::exp(x[i] - maxreal);
+    sum += x[i];
+  }
+  T scale = T(1.0) / sum;
+  for (size_t i = 0; i < N; ++i) {
+    x[i] *= scale;
+  }
+}
+template<typename T>
+void Softmax(T* x, size_t N, size_t K) {
+  for (size_t i = 0; i < K; ++i) {
+    Softmax(x + i * N, N);
+  }
+}
+template<typename T>
+void Softcap(T* x, size_t N) {
+  auto maxreal = std::real(x[0]);
+  size_t imax = 0;
+  for (size_t i = 1; i < N; ++i) {
+    if (std::real(x[i]) > maxreal) {
+      maxreal = std::real(x[i]);
+      imax = i;
+    }
+  }
+  T cap = 30.0;
+  T inv_cap = T(1.0) / cap;
+  T xmax = x[imax];
+  for (size_t i = 0; i < N; ++i) {
+    x[i] = cap * std::tanh((x[i] - xmax) * inv_cap);
+  }
+}
+template<typename T>
+void GatedGelu(const T* in, T* out, size_t N, size_t K) {
+  for (size_t i = 0; i < K; ++i) {
+    const T* x1 = in + i * 2 * N;
+    const T* x2 = x1 + N;
+    T* y = out + i * N;
+    for (size_t j = 0; j < N; ++j) {
+      y[j] = x2[j] * Gelu(x1[j]);
+    }
+  }
+}
+template<typename T>
+void InputEmbedding(const T* w, const std::vector<int>& tokens, T scaling,
+                    T* y, size_t N) {
+  const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
+  for (size_t i = 0; i < num_tokens; ++i) {
+    int token = tokens[i];
+    memcpy(y + i * N, w + token * N, N * sizeof(y[0]));
+    MulByConstT(scaling, y + i * N, N);
+  }
+}
+template<typename T>
+void MaskedAttention(const T* qkv, T* output, size_t num_tokens,
+                     size_t kHeads, size_t kQKVDim, size_t kSeqLen) {
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    for (size_t head = 0; head < kHeads; ++head) {
+      const size_t qoffset = pos * (kHeads + 2) * kQKVDim;
+      const size_t aoffset = pos * kHeads * kSeqLen + head * kSeqLen;
+      const T* q = qkv + qoffset + head * kQKVDim;
+      for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+        const T* k = qkv + (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
+        output[aoffset + pos2] = DotT(q, k, kQKVDim);
+      }
+    }
+  }
+}
+template<typename T>
+void MaskedSoftmax(T* x, size_t num_tokens, size_t kHeads, size_t kSeqLen) {
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    for (size_t head = 0; head < kHeads; ++head) {
+      size_t offset = pos * kHeads * kSeqLen + head * kSeqLen;
+      Softmax(x + offset, pos + 1);
+      memset(x + offset + pos + 1, 0, (kSeqLen - pos - 1) * sizeof(T));
+    }
+  }
+}
+template<typename T>
+void MixByAttention(const T* qkv, const T* attention, T* output,
+                    size_t num_tokens, size_t kHeads, size_t kQKVDim,
+                    size_t kSeqLen) {
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    for (size_t head = 0; head < kHeads; ++head) {
+      const T* att = &attention[pos * kHeads * kSeqLen + head * kSeqLen];
+      T* out = &output[head * kQKVDim + pos * kHeads * kQKVDim];
+      memset(out, 0, kQKVDim * sizeof(out[0]));
+      for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
+        size_t v_offset = (pos2 * (kHeads + 2) + kHeads + 1) * kQKVDim;
+        const T* v = &qkv[v_offset];
+        MulByConstAndAddT(att[pos2], v, out, kQKVDim);
+      }
+    }
+  }
+}
+template<typename T, typename TConfig>
+void ApplyLayer(const Layer<T, TConfig>& weights,
+                ForwardLayer<T, TConfig>& activations,
+                size_t num_tokens, T* output) {
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kSeqLen = TConfig::kSeqLen;
+  static constexpr size_t kQKVDim = TConfig::kQKVDim;
+  static constexpr size_t kHeads = TConfig::kHeads;
+  static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
+  static const T kQueryScale = T(1.0) / std::sqrt(T(kQKVDim));
+  RMSNormT(weights.pre_attention_norm_scale.data(), activations.input.data(),
+           activations.pre_att_rms_out.data(), kModelDim, num_tokens);
+  MatMulT(weights.qkv_einsum_w.data(), activations.pre_att_rms_out.data(),
+          activations.qkv.data(), (kHeads + 2) * kQKVDim, kModelDim,
+          num_tokens);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    T* qkv = activations.qkv.data() + pos * (kHeads + 2) * kQKVDim;
+    for (size_t h = 0; h <= kHeads; ++h) {
+      Rope(qkv + h * kQKVDim, kQKVDim, pos);
+    }
+  }
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    T* qkv = activations.qkv.data() + pos * (kHeads + 2) * kQKVDim;
+    MulByConstT(kQueryScale, qkv, kHeads * kQKVDim);
+  }
+  MaskedAttention(activations.qkv.data(), activations.att.data(),
+                  num_tokens, kHeads, kQKVDim, kSeqLen);
+  MaskedSoftmax(activations.att.data(), num_tokens, kHeads, kSeqLen);
+  MixByAttention(activations.qkv.data(), activations.att.data(),
+                 activations.att_out.data(), num_tokens, kHeads, kQKVDim,
+                 kSeqLen);
+  MultiHeadMatMul(weights.attn_vec_einsum_w.data(), activations.att_out.data(),
+                  activations.attention_out.data(), kHeads, kModelDim, kQKVDim,
+                  num_tokens);
+  AddFromT(activations.input.data(), activations.attention_out.data(),
+           num_tokens * kModelDim);
+  RMSNormT(weights.pre_ffw_norm_scale.data(), activations.attention_out.data(),
+           activations.bf_pre_ffw_rms_out.data(), kModelDim, num_tokens);
+  MatMulT(weights.gating_einsum_w.data(), activations.bf_pre_ffw_rms_out.data(),
+          activations.ffw_hidden.data(), kFFHiddenDim * 2, kModelDim,
+          num_tokens);
+  GatedGelu(activations.ffw_hidden.data(), activations.ffw_hidden_gated.data(),
+            kFFHiddenDim, num_tokens);
+  MatMulT(weights.linear_w.data(), activations.ffw_hidden_gated.data(),
+          output, kModelDim, kFFHiddenDim, num_tokens);
+  AddFromT(activations.attention_out.data(), output, num_tokens * kModelDim);
+}
+template<typename T>
+T CrossEntropyLoss(const T* x, const Prompt& prompt, size_t V) {
+  T loss = {};
+  const std::vector<int> tokens = prompt.tokens;
+  const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
+  for (size_t i = 0; i < num_tokens; ++i) {
+    if (i + 1 < prompt.context_size) {
+      continue;  // next token is part of context, don't try to predict it
+    }
+    const int next_token = tokens[i + 1];
+    loss += std::log(x[i * V + next_token]);
+  }
+  T scaling = -1.0 / std::log(2.0);
+  return loss * scaling;
+}
+template<typename T, typename TConfig>
+T CrossEntropyLossForwardPass(const Prompt& prompt,
+                              const Weights<T, TConfig>& weights,
+                              ForwardPass<T, TConfig>& forward) {
+  static constexpr size_t kModelDim = TConfig::kModelDim;
+  static constexpr size_t kVocabSize = TConfig::kVocabSize;
+  static constexpr size_t kLayers = TConfig::kLayers;
+  const std::vector<int> tokens = prompt.tokens;
+  const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
+  const T kEmbScaling = EmbeddingScaling(kModelDim);
+  InputEmbedding(weights.embedder_input_embedding.data(), tokens,
+                 kEmbScaling, forward.layers[0].input.data(), kModelDim);
+  for (size_t layer = 0; layer < kLayers; ++layer) {
+    T* output = layer + 1 < kLayers ?
+                forward.layers[layer + 1].input.data() :
+                forward.final_layer_output.data();
+    ApplyLayer(*weights.GetLayer(layer), forward.layers[layer], num_tokens,
+               output);
+  }
+  RMSNormT(weights.final_norm_scale.data(),
+           forward.final_layer_output.data(),
+           forward.final_norm_output.data(), kModelDim, num_tokens);
+  MatMulT(weights.embedder_input_embedding.data(),
+          forward.final_norm_output.data(),
+          forward.logits.data(), kVocabSize, kModelDim, num_tokens);
+  for (size_t pos = 0; pos < num_tokens; ++pos) {
+    Softcap(forward.logits.data() + pos * kVocabSize, kVocabSize);
+  }
+  memcpy(forward.probs.data(), forward.logits.data(),
+         num_tokens * kVocabSize * sizeof(forward.logits[0]));
+  Softmax(forward.probs.data(), kVocabSize, num_tokens);
+  return CrossEntropyLoss(forward.probs.data(), prompt, kVocabSize);
+}
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_SCALAR_H_

gemma.cpp/backprop/optimize_test.cc ADDED Viewed

	@@ -0,0 +1,144 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <stddef.h>
+#include <limits>
+#include <random>
+#include <vector>
+#include "gtest/gtest.h"
+#include "backprop/backward.h"
+#include "backprop/forward.h"
+#include "backprop/optimizer.h"
+#include "backprop/prompt.h"
+#include "backprop/sampler.h"
+#include "gemma/activations.h"
+#include "gemma/common.h"
+#include "gemma/gemma.h"
+#include "gemma/weights.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+namespace gcpp {
+TEST(OptimizeTest, GradientDescent) {
+  hwy::ThreadPool pool(0);
+  std::mt19937 gen(42);
+  Model model_type = Model::GEMMA_TINY;
+  Type weight_type = Type::kF32;
+  ByteStorageT grad = CallForModelAndWeight<AllocateCompressedWeights>(
+      model_type, weight_type, pool);
+  ByteStorageT grad_m = CallForModelAndWeight<AllocateCompressedWeights>(
+      model_type, weight_type, pool);
+  ByteStorageT grad_v = CallForModelAndWeight<AllocateCompressedWeights>(
+      model_type, weight_type, pool);
+  ByteStorageT forward =
+      CallForModelAndWeight<AllocateForwardPass>(model_type, weight_type);
+  ByteStorageT backward =
+      CallForModelAndWeight<AllocateForwardPass>(model_type, weight_type);
+  KVCache kv_cache = KVCache::Create(model_type);
+  Gemma gemma(GemmaTokenizer(), model_type, weight_type, pool);
+  const auto generate = [&](const std::vector<int>& prompt) {
+    std::vector<int> reply;
+    auto stream_token = [&reply](int token, float) {
+      reply.push_back(token);
+      return token != ReverseSequenceSampler::kEndToken;
+    };
+    RuntimeConfig runtime = {
+        .max_tokens = 32,
+        .max_generated_tokens = 16,
+        .temperature = 1.0f,
+        .verbosity = 0,
+        .gen = &gen,
+        .stream_token = stream_token,
+        .eos_id = ReverseSequenceSampler::kEndToken,
+    };
+    TimingInfo timing_info;
+    gemma.Generate(runtime, prompt, 0, kv_cache, timing_info);
+    return reply;
+  };
+  auto verify = [&](const Prompt& prompt) {
+    auto context = prompt.context();
+    std::vector<int> reply = generate(context);
+    bool ok = true;
+    for (size_t i = 0; ok && i < prompt.tokens.size(); ++i) {
+      if (i >= reply.size() || reply[i] != prompt.tokens[i]) {
+        ok = false;
+      }
+    }
+    return ok;
+  };
+  RandInitWeights(model_type, weight_type, gemma.Weights(), pool, gen);
+  CallForModelAndWeight<ZeroInitCompressedWeights>(
+      model_type, weight_type, grad_m, pool);
+  CallForModelAndWeight<ZeroInitCompressedWeights>(
+      model_type, weight_type, grad_v, pool);
+  printf("Initial weights:\n");
+  LogWeightStats(model_type, weight_type, gemma.Weights());
+  constexpr size_t kBatchSize = 8;
+  const float alpha = 0.001f;
+  const float beta1 = 0.9f;
+  const float beta2 = 0.999f;
+  const float epsilon = 1e-8f;
+  ReverseSequenceSampler training_task({
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1});
+  size_t steps = 0;
+  float prev_loss = std::numeric_limits<float>::max();
+  size_t num_ok;
+  for (; steps < 1000000; ++steps) {
+    std::mt19937 sgen(42);
+    CallForModelAndWeight<ZeroInitCompressedWeights>(
+        model_type, weight_type, grad, pool);
+    float total_loss = 0.0f;
+    num_ok = 0;
+    for (size_t i = 0; i < kBatchSize; ++i) {
+      Prompt prompt = training_task.Sample(sgen);
+      total_loss += CrossEntropyLossForwardPass(model_type, prompt,
+                                                gemma.Weights(), forward, pool);
+      CrossEntropyLossBackwardPass(model_type, prompt, gemma.Weights(), forward,
+                                   grad, backward, pool);
+      num_ok += verify(prompt) ? 1 : 0;
+    }
+    total_loss /= kBatchSize;
+    AdamUpdate(model_type, weight_type, grad, alpha, beta1, beta2, epsilon,
+               steps + 1, gemma.Weights(), grad_m, grad_v, pool);
+    printf("step: %zu  total_loss: %.15f   num_ok: %zu/%zu\n",
+           steps, total_loss, num_ok, kBatchSize);
+    if (steps % 100 == 0) {
+      printf("Batch gradient:\n");
+      LogWeightStats(model_type, weight_type, grad);
+    }
+    if (total_loss < 0.5f) {
+      break;
+    }
+    prev_loss = total_loss;
+  }
+  printf("Num steps: %zu\n", steps);
+  printf("Final weights:\n");
+  LogWeightStats(model_type, weight_type, gemma.Weights());
+  EXPECT_LT(steps, 300);
+  EXPECT_EQ(num_ok, kBatchSize);
+}
+}  // namespace gcpp

gemma.cpp/backprop/optimizer.cc ADDED Viewed

	@@ -0,0 +1,135 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "backprop/optimizer.h"
+#include <cmath>
+#include <random>
+#include "compression/compress.h"
+#include "gemma/common.h"
+#include "gemma/weights.h"
+#include "hwy/base.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+namespace gcpp {
+namespace {
+class WeightInitializer {
+ public:
+  WeightInitializer(std::mt19937& gen) : dist_(0.0f, 1.0f), gen_(gen) {}
+  template <size_t N>
+  void operator()(const char* name, CompressedArray<float, N>& tensor) {
+    float* data = tensor.data();
+    for (size_t i = 0; i < N; ++i) {
+      data[i] = dist_(gen_);
+    }
+    tensor.set_scale(1.0f);
+  }
+ private:
+  std::normal_distribution<float> dist_;
+  std::mt19937& gen_;
+};
+template <typename TConfig>
+struct RandInitWeightsT {
+  void operator()(const ByteStorageT& weights_u8, hwy::ThreadPool& pool,
+                  std::mt19937& gen) const {
+    auto& weights =
+        *reinterpret_cast<CompressedWeights<TConfig>*>(weights_u8.get());
+    // TODO(szabadka) Use the same weight initialization method as in the python
+    // version.
+    WeightInitializer init(gen);
+    ForEachTensor1<TConfig>(init, weights);
+  }
+};
+class AdamUpdater {
+ public:
+  explicit AdamUpdater(float alpha, float beta1, float beta2, float epsilon,
+                       size_t t)
+      : alpha_(alpha), beta1_(beta1), beta2_(beta2), cbeta1_(1.0f - beta1),
+        cbeta2_(1.0f - beta2), norm1_(1.0 / (1.0 - std::pow(beta1, t))),
+        norm2_(1.0 / (1.0 - std::pow(beta2, t))), epsilon_(epsilon) {}
+  template <size_t kCapacity>
+  void operator()(const char* name,
+                  const CompressedArray<float, kCapacity>& grad,
+                  CompressedArray<float, kCapacity>& weights,
+                  CompressedArray<float, kCapacity>& grad_m,
+                  CompressedArray<float, kCapacity>& grad_v) {
+    const float* HWY_RESTRICT g = grad.data();
+    float* HWY_RESTRICT w = weights.data();
+    float* HWY_RESTRICT m = grad_m.data();
+    float* HWY_RESTRICT v = grad_v.data();
+    for (size_t i = 0; i < kCapacity; ++i) {
+      m[i] *= beta1_;
+      m[i] += cbeta1_ * g[i];
+      v[i] *= beta2_;
+      v[i] += cbeta2_ * g[i] * g[i];
+      const float mhat = m[i] * norm1_;
+      const float vhat = v[i] * norm2_;
+      w[i] -= alpha_ * mhat / (std::sqrt(vhat) + epsilon_);
+    }
+  }
+ private:
+  float alpha_;
+  float beta1_;
+  float beta2_;
+  float cbeta1_;
+  float cbeta2_;
+  float norm1_;
+  float norm2_;
+  float epsilon_;
+};
+template <typename TConfig>
+struct AdamUpdateT {
+  void operator()(const ByteStorageT& grad_u8, float alpha, float beta1,
+                  float beta2, float epsilon, size_t t,
+                  const ByteStorageT& weights_u8, const ByteStorageT& grad_m_u8,
+                  const ByteStorageT& grad_v_u8, hwy::ThreadPool& pool) const {
+    using TWeights = CompressedWeights<TConfig>;
+    const auto& grad = *reinterpret_cast<const TWeights*>(grad_u8.get());
+    auto& weights = *reinterpret_cast<TWeights*>(weights_u8.get());
+    auto& grad_m = *reinterpret_cast<TWeights*>(grad_m_u8.get());
+    auto& grad_v = *reinterpret_cast<TWeights*>(grad_v_u8.get());
+    AdamUpdater updater(alpha, beta1, beta2, epsilon, t);
+    ForEachTensor4<TConfig>(updater, grad, weights, grad_m, grad_v);
+  }
+};
+}  // namespace
+void RandInitWeights(Model model_type, Type weight_type,
+                     const ByteStorageT& weights, hwy::ThreadPool& pool,
+                     std::mt19937& gen) {
+  HWY_ASSERT(weight_type == Type::kF32);
+  CallForModel<float, RandInitWeightsT>(model_type, weights, pool, gen);
+}
+void AdamUpdate(Model model_type, Type weight_type, const ByteStorageT& grad,
+                float alpha, float beta1, float beta2, float epsilon, size_t t,
+                const ByteStorageT& weights, const ByteStorageT& grad_m,
+                const ByteStorageT& grad_v, hwy::ThreadPool& pool) {
+  HWY_ASSERT(weight_type == Type::kF32);
+  CallForModel<float, AdamUpdateT>(model_type, grad, alpha, beta1, beta2,
+                                   epsilon, t, weights, grad_m, grad_v, pool);
+}
+}  // namespace gcpp

gemma.cpp/backprop/optimizer.h ADDED Viewed

	@@ -0,0 +1,37 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_OPTIMIZER_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_OPTIMIZER_H_
+#include <random>
+#include "gemma/common.h"
+#include "hwy/contrib/thread_pool/thread_pool.h"
+namespace gcpp {
+void RandInitWeights(Model model_type, Type weight_type,
+                     const ByteStorageT& weights, hwy::ThreadPool& pool,
+                     std::mt19937& gen);
+void AdamUpdate(Model model_type, Type weight_type, const ByteStorageT& grad,
+                float alpha, float beta1, float beta2, float epsilon, size_t t,
+                const ByteStorageT& weights, const ByteStorageT& grad_m,
+                const ByteStorageT& grad_v, hwy::ThreadPool& pool);
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_OPTIMIZER_H_

gemma.cpp/backprop/prompt.h ADDED Viewed

	@@ -0,0 +1,34 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_PROMPT_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_PROMPT_H_
+#include <stddef.h>
+#include <vector>
+namespace gcpp {
+struct Prompt {
+  std::vector<int> tokens;
+  size_t context_size;
+  std::vector<int> context() const {
+    return std::vector<int>(tokens.begin(), tokens.begin() + context_size);
+  }
+};
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_PROMPT_H_

gemma.cpp/backprop/sampler.h ADDED Viewed

	@@ -0,0 +1,87 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_SAMPLER_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_SAMPLER_H_
+#include <random>
+#include <vector>
+#include "backprop/prompt.h"
+namespace gcpp {
+class PromptSampler {
+ public:
+  virtual Prompt Sample(std::mt19937& gen) = 0;
+  virtual ~PromptSampler() = default;
+  std::vector<Prompt> SampleBatch(size_t batch_size, std::mt19937& gen) {
+    std::vector<Prompt> batch;
+    batch.reserve(batch_size);
+    for (size_t i = 0; i < batch_size; ++i) {
+      batch.emplace_back(Sample(gen));
+    }
+    return batch;
+  }
+};
+class ReverseSequenceSampler : public PromptSampler {
+ public:
+  explicit ReverseSequenceSampler(const std::vector<int>& length_histo)
+      : token_dist_(0, 9) {
+    for (int i = 0; i < length_histo.size(); ++i) {
+      const int count = length_histo[i];
+      for (int j = 0; j < count; ++j) {
+        length_lut_.push_back(i + 1);
+      }
+    }
+    length_dist_ = std::uniform_int_distribution<>(0, length_lut_.size() - 1);
+  }
+  virtual ~ReverseSequenceSampler() = default;
+  static constexpr int kReverseToken = 10;
+  static constexpr int kEndToken = 11;
+  Prompt Sample(std::mt19937& gen) override {
+    Prompt prompt;
+    int len = length_lut_[length_dist_(gen)];
+    prompt.tokens.resize(2 * len + 2);
+    prompt.tokens[len] = kReverseToken;
+    prompt.tokens[2 * len + 1] = kEndToken;
+    for (size_t i = 0; i < len; ++i) {
+      prompt.tokens[i] = prompt.tokens[2 * len - i] = token_dist_(gen);
+    }
+    prompt.context_size = len + 1;
+    return prompt;
+  }
+  static void LogPrompt(const Prompt& prompt) {
+    static const char* kVocab[] = {
+      "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "-->", "|",
+    };
+    for (int token : prompt.tokens) printf("%s", kVocab[token]);
+    printf("  [context_size: %zu]\n", prompt.context_size);
+  }
+ private:
+  std::uniform_int_distribution<> token_dist_;
+  std::uniform_int_distribution<> length_dist_;
+  std::vector<int> length_lut_;
+};
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_SAMPLER_H_

gemma.cpp/backprop/test_util.h ADDED Viewed

	@@ -0,0 +1,168 @@

+// Copyright 2024 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_
+#define THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_
+#include <stddef.h>
+#include <array>
+#include <complex>
+#include "gtest/gtest.h"
+#include "gemma/weights_raw.h"
+namespace gcpp {
+template<typename T, typename U, size_t kLen>
+void Complexify(const std::array<T, kLen>& x,
+                std::array<std::complex<U>, kLen>& c_x) {
+  for (size_t i = 0; i < kLen; ++i) {
+    c_x[i] = std::complex<U>(x[i], 0.0);
+  }
+}
+template<typename T, typename U, typename TConfig>
+void Complexify(const Layer<T, TConfig>& w,
+                Layer<std::complex<U>, TConfig>& c_w) {
+  Complexify(w.pre_attention_norm_scale, c_w.pre_attention_norm_scale);
+  Complexify(w.attn_vec_einsum_w, c_w.attn_vec_einsum_w);
+  Complexify(w.qkv_einsum_w, c_w.qkv_einsum_w);
+  Complexify(w.pre_ffw_norm_scale, c_w.pre_ffw_norm_scale);
+  Complexify(w.gating_einsum_w, c_w.gating_einsum_w);
+  Complexify(w.linear_w, c_w.linear_w);
+}
+template<typename T, typename U, typename TConfig>
+void Complexify(const Weights<T, TConfig>& w,
+                Weights<std::complex<U>, TConfig>& c_w) {
+  static constexpr size_t kLayers = TConfig::kLayers;
+  Complexify(w.embedder_input_embedding, c_w.embedder_input_embedding);
+  Complexify(w.final_norm_scale, c_w.final_norm_scale);
+  for (size_t i = 0; i < kLayers; ++i) {
+    Complexify(*w.GetLayer(i), *c_w.GetLayer(i));
+  }
+}
+template<typename T, typename U, size_t N>
+void TestNear(const std::array<T, N>& actual, const std::array<U, N>& expected,
+              double max_abs_err, double max_rel_err, int line) {
+  double sum0 = 0;
+  double sum1 = 0;
+  double sum01 = 0;
+  for (size_t i = 0; i < N; ++i) {
+    sum0 += actual[i] * actual[i];
+    sum1 += expected[i] * expected[i];
+    sum01 += actual[i] * expected[i];
+    ASSERT_NEAR(actual[i], expected[i],
+                std::max(max_abs_err, std::abs(expected[i]) * max_rel_err))
+        << "line: " << line << " dim=" << N << " i=" << i;
+  }
+  if (sum0 > 1e-40) {
+    double norm_dot = sum01 / std::sqrt(sum0) / std::sqrt(sum1);
+    ASSERT_NEAR(norm_dot, 1.0, 1e-7)
+        << "line: " << line << " sum0: " << sum0  << " sum1: " << sum1
+        << " sum01: " << sum01;
+  }
+}
+// Compute gradient with the finite difference method in the complex plane.
+// If f : R->R is the tested function and F : C->C is its extension on the
+// complex plane so that F is complex differentiable in x, then
+//
+//   F(x + ih) = F(x) + ih F'(x) + O(h^2) F''(x)
+//
+// which means that
+//
+//   F'(x) ~= Imag(F(x + ih)) / h
+//
+// This method is more numerically stable than the real-valued finite difference
+// method since we don't need to subtract floating point numbers that are near
+// to each other.
+template<typename T, typename U, size_t N, typename FUNC>
+void TestGradient(const std::array<T, N>& grad,
+                  std::array<std::complex<U>, N>& x, FUNC func,
+                  U step, T max_abs_err, T max_rel_err, int line) {
+  std::array<T, N> exp_grad;
+  const U inv_step = 1.0 / step;
+  for (size_t i = 0; i < N; ++i) {
+    const U x0 = std::real(x[i]);
+    const std::complex<U> x1 = std::complex<U>(x0, step);
+    x[i] = x1;
+    const std::complex<U> f1 = func();
+    exp_grad [i] = std::imag(f1) * inv_step;
+    x[i] = x0;
+  }
+  TestNear(grad, exp_grad, max_abs_err, max_rel_err, line);
+}
+template<size_t N, typename FUNC>
+void TestGradient(const std::array<float, N>& grad,
+                  std::array<std::complex<float>, N>& x, FUNC func,
+                  float max_abs_err, float max_rel_error, int line) {
+  TestGradient(grad, x, func, 1e-30f, max_abs_err, max_rel_error, line);
+}
+template<size_t N, typename FUNC>
+void TestGradient(const std::array<float, N>& grad,
+                  std::array<std::complex<double>, N>& x, FUNC func,
+                  float max_abs_err, float max_rel_error, int line) {
+  TestGradient(grad, x, func, 1e-50, max_abs_err, max_rel_error, line);
+}
+template<size_t N, typename FUNC>
+void TestGradient(const std::array<double, N>& grad,
+                  std::array<std::complex<double>, N>& x, FUNC func,
+                  double max_abs_err, double max_rel_error, int line) {
+  TestGradient(grad, x, func, 1e-50, max_abs_err, max_rel_error, line);
+}
+template<typename T, typename U, typename TConfig, typename FUNC>
+void TestGradient(const Layer<T, TConfig>& grad,
+                  Layer<std::complex<U>, TConfig>& c_weights,
+                  FUNC func, T max_err) {
+  TestGradient(grad.pre_attention_norm_scale,
+               c_weights.pre_attention_norm_scale,
+               func, max_err, max_err, __LINE__);
+  TestGradient(grad.attn_vec_einsum_w, c_weights.attn_vec_einsum_w,
+               func, max_err, max_err, __LINE__);
+  TestGradient(grad.qkv_einsum_w, c_weights.qkv_einsum_w,
+               func, max_err, max_err, __LINE__);
+  TestGradient(grad.pre_ffw_norm_scale, c_weights.pre_ffw_norm_scale,
+               func, max_err, max_err, __LINE__);
+  TestGradient(grad.gating_einsum_w, c_weights.gating_einsum_w,
+               func, max_err, max_err, __LINE__);
+  TestGradient(grad.linear_w, c_weights.linear_w,
+               func, max_err, max_err, __LINE__);
+}
+template<typename T, typename U, typename TConfig, typename FUNC>
+void TestGradient(const Weights<T, TConfig>& grad,
+                  Weights<std::complex<U>, TConfig>& c_weights,
+                  FUNC func, T max_err) {
+  TestGradient(grad.embedder_input_embedding,
+                 c_weights.embedder_input_embedding,
+                 func,  2 * max_err, max_err, __LINE__);
+  TestGradient(grad.final_norm_scale, c_weights.final_norm_scale,
+               func, max_err, max_err, __LINE__);
+  for (int i = 0; i < TConfig::kLayers; ++i) {
+    TestGradient(*grad.GetLayer(i), *c_weights.GetLayer(i), func, max_err);
+  }
+}
+}  // namespace gcpp
+#endif  // THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_

gemma.cpp/bazel/BUILD ADDED Viewed

	@@ -0,0 +1,5 @@

+# Required for referencing bazel:sentencepiece.patch
+package(
+    default_applicable_licenses = ["//:license"],
+    default_visibility = ["//:__subpackages__"],
+)

gemma.cpp/bazel/sentencepiece.bazel ADDED Viewed

	@@ -0,0 +1,97 @@

+package(
+    default_visibility = ["//visibility:public"],
+    features = [
+        "layering_check",
+        "parse_headers",
+    ],
+)
+licenses(["notice"])  # Apache 2, BSD, MIT
+proto_library(
+    name = "sentencepiece_proto",
+    srcs = ["src/sentencepiece.proto"],
+)
+cc_proto_library(
+    name = "sentencepiece_cc_proto",
+    deps = [":sentencepiece_proto"],
+)
+proto_library(
+    name = "sentencepiece_model_proto",
+    srcs = ["src/sentencepiece_model.proto"],
+)
+cc_proto_library(
+    name = "sentencepiece_model_cc_proto",
+    deps = [":sentencepiece_model_proto"],
+)
+genrule(
+    name = "config_h",
+    srcs = ["config.h.in"],
+    outs = ["config.h"],
+    cmd = "cp $< $@",
+)
+cc_library(
+    name = "common",
+    hdrs = [
+        "config.h",
+        "src/common.h",
+    ],
+    deps = [
+        "@com_google_absl//absl/base",
+    ],
+)
+cc_library(
+    name = "sentencepiece_processor",
+    srcs = [
+        "src/bpe_model.cc",
+        "src/char_model.cc",
+        "src/error.cc",
+        "src/filesystem.cc",
+        "src/model_factory.cc",
+        "src/model_interface.cc",
+        "src/normalizer.cc",
+        "src/sentencepiece_processor.cc",
+        "src/unigram_model.cc",
+        "src/util.cc",
+        "src/word_model.cc",
+    ],
+    hdrs = [
+        "src/bpe_model.h",
+        "src/char_model.h",
+        "src/filesystem.h",
+        "src/freelist.h",
+        "src/model_factory.h",
+        "src/model_interface.h",
+        "src/normalizer.h",
+        "src/sentencepiece_processor.h",
+        "src/trainer_interface.h",
+        "src/unigram_model.h",
+        "src/util.h",
+        "src/word_model.h",
+    ],
+    defines = ["_USE_TF_STRING_VIEW"],
+    includes = [
+        ".",
+        "src",
+    ],
+    linkstatic = 1,
+    deps =
+        [
+            ":common",
+            ":sentencepiece_cc_proto",
+            ":sentencepiece_model_cc_proto",
+            "@com_google_absl//absl/container:flat_hash_map",
+            "@com_google_absl//absl/container:flat_hash_set",
+            "@com_google_absl//absl/memory",
+            "@com_google_absl//absl/status",
+            "@com_google_absl//absl/strings",
+            "@com_google_absl//absl/strings:str_format",
+            "@darts_clone",
+        ],
+)

gemma.cpp/bazel/sentencepiece.patch ADDED Viewed

	@@ -0,0 +1,2339 @@

+diff --git a/src/bpe_model.cc b/src/bpe_model.cc
+index 22cd115..97e0bda 100644
+--- a/src/bpe_model.cc
++++ b/src/bpe_model.cc
+@@ -21,7 +21,7 @@
+ #include "bpe_model.h"
+ #include "freelist.h"
+-#include "third_party/absl/container/flat_hash_map.h"
++#include "absl/container/flat_hash_map.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc
+index 964d44e..64878cd 100644
+--- a/src/bpe_model_trainer.cc
++++ b/src/bpe_model_trainer.cc
+@@ -18,7 +18,8 @@
+ #include <vector>
+ #include "bpe_model_trainer.h"
+-#include "third_party/absl/container/flat_hash_set.h"
++#include "absl/container/flat_hash_set.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -171,7 +172,7 @@ void Trainer::UpdateActiveSymbols() {
+   active_symbols_.insert(symbols.begin(), symbols.begin() + size);
+ }
+-util::Status Trainer::Train() {
++absl::Status Trainer::Train() {
+   RETURN_IF_ERROR(status());
+   CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
+diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h
+index e011a37..a17e580 100644
+--- a/src/bpe_model_trainer.h
++++ b/src/bpe_model_trainer.h
+@@ -20,7 +20,8 @@
+ #include <vector>
+ #include "sentencepiece_model.pb.h"
+-#include "third_party/absl/container/flat_hash_map.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/status/status.h"
+ #include "trainer_interface.h"
+ namespace sentencepiece {
+@@ -35,7 +36,7 @@ class Trainer : public TrainerInterface {
+       : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
+                                            denormalizer_spec) {}
+-  util::Status Train() override;
++  absl::Status Train() override;
+  private:
+   // Symbol represents a character or symbol bigram.
+diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc
+index 173eb9c..2a43c3a 100644
+--- a/src/bpe_model_trainer_test.cc
++++ b/src/bpe_model_trainer_test.cc
+@@ -20,8 +20,8 @@
+ #include "sentencepiece_processor.h"
+ #include "sentencepiece_trainer.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/builder.cc b/src/builder.cc
+index 378aaa0..fd8edf8 100644
+--- a/src/builder.cc
++++ b/src/builder.cc
+@@ -18,10 +18,11 @@
+ #include "builder.h"
+ #include "filesystem.h"
+-#include "third_party/absl/strings/str_join.h"
+-#include "third_party/absl/strings/str_replace.h"
+-#include "third_party/absl/strings/str_split.h"
+-#include "third_party/absl/strings/strip.h"
++#include "absl/strings/str_join.h"
++#include "absl/strings/str_replace.h"
++#include "absl/strings/str_split.h"
++#include "absl/strings/strip.h"
++#include "absl/status/status.h"
+ #ifdef ENABLE_NFKC_COMPILE
+ #include <unicode/errorcode.h>
+@@ -36,7 +37,7 @@
+ #include "normalization_rule.h"
+ #include "normalizer.h"
+-#include "third_party/darts_clone/darts.h"
++#include "include/darts.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -145,7 +146,7 @@ Builder::Chars Normalize(const Builder::CharsMap &chars_map,
+ }  // namespace
+ // static
+-util::Status Builder::CompileCharsMap(const CharsMap &chars_map,
++absl::Status Builder::CompileCharsMap(const CharsMap &chars_map,
+                                       std::string *output) {
+   CHECK_OR_RETURN(output);
+   CHECK_OR_RETURN(!chars_map.empty());
+@@ -212,7 +213,7 @@ util::Status Builder::CompileCharsMap(const CharsMap &chars_map,
+ }
+ // static
+-util::Status Builder::DecompileCharsMap(absl::string_view blob,
++absl::Status Builder::DecompileCharsMap(absl::string_view blob,
+                                         Builder::CharsMap *chars_map) {
+   CHECK_OR_RETURN(chars_map);
+   chars_map->clear();
+@@ -265,7 +266,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob,
+ }
+ // static
+-util::Status Builder::GetPrecompiledCharsMap(const std::string &name,
++absl::Status Builder::GetPrecompiledCharsMap(const std::string &name,
+                                              std::string *output) {
+   CHECK_OR_RETURN(output);
+@@ -282,12 +283,12 @@ util::Status Builder::GetPrecompiledCharsMap(const std::string &name,
+       return util::OkStatus();
+     }
+   }
+-  return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
++  return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
+          << "No precompiled charsmap is found: " << name;
+ }
+ // static
+-util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
++absl::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
+ #ifdef ENABLE_NFKC_COMPILE
+   LOG(INFO) << "Running BuildNFKCMap";
+@@ -345,7 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
+   return util::OkStatus();
+ }
+-util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
++absl::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
+ #ifdef ENABLE_NFKC_COMPILE
+   LOG(INFO) << "Running BuildNmtNFKCMap";
+@@ -420,7 +421,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
+ }
+ // static
+-util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
++absl::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
+ #ifdef ENABLE_NFKC_COMPILE
+   for (auto &c : *chars_map) {
+     std::vector<char32> trg;
+@@ -445,7 +446,7 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
+ }
+ // static
+-util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
++absl::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
+ #ifdef ENABLE_NFKC_COMPILE
+   CharsMap nfkc_map;
+   RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map));
+@@ -460,7 +461,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
+ }
+ //  static
+-util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
++absl::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
+ #ifdef ENABLE_NFKC_COMPILE
+   CharsMap nfkc_map;
+   RETURN_IF_ERROR(Builder::BuildNmtNFKCMap(&nfkc_map));
+@@ -475,7 +476,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
+ }
+ // static
+-util::Status Builder::LoadCharsMap(absl::string_view filename,
++absl::Status Builder::LoadCharsMap(absl::string_view filename,
+                                    CharsMap *chars_map) {
+   LOG(INFO) << "Loading mapping file: " << filename.data();
+   CHECK_OR_RETURN(chars_map);
+@@ -510,7 +511,7 @@ util::Status Builder::LoadCharsMap(absl::string_view filename,
+ }
+ // static
+-util::Status Builder::SaveCharsMap(absl::string_view filename,
++absl::Status Builder::SaveCharsMap(absl::string_view filename,
+                                    const Builder::CharsMap &chars_map) {
+   auto output = filesystem::NewWritableFile(filename);
+   RETURN_IF_ERROR(output->status());
+@@ -540,7 +541,7 @@ util::Status Builder::SaveCharsMap(absl::string_view filename,
+ }
+ // static
+-util::Status Builder::RemoveRedundantMap(CharsMap *chars_map) {
++absl::Status Builder::RemoveRedundantMap(CharsMap *chars_map) {
+   CHECK_OR_RETURN(chars_map);
+   CharsMap new_chars_map;
+diff --git a/src/builder.h b/src/builder.h
+index 49d2884..8ad872c 100644
+--- a/src/builder.h
++++ b/src/builder.h
+@@ -22,7 +22,8 @@
+ #include "common.h"
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+ namespace normalizer {
+@@ -43,15 +44,15 @@ class Builder {
+   // String-to-string mapping.
+   using CharsMap = std::map<Chars, Chars>;
+-  static util::Status CompileCharsMap(const CharsMap &chars_map,
++  static absl::Status CompileCharsMap(const CharsMap &chars_map,
+                                       std::string *output);
+   // Decompiles `blob` into `chars_map`.
+-  static util::Status DecompileCharsMap(absl::string_view blob,
++  static absl::Status DecompileCharsMap(absl::string_view blob,
+                                         CharsMap *chars_map);
+   // Returns a pre-compiled binary index with `name`.
+-  static util::Status GetPrecompiledCharsMap(const std::string &name,
++  static absl::Status GetPrecompiledCharsMap(const std::string &name,
+                                              std::string *output);
+   // Makes a normalization mapping based on NFKC.
+@@ -89,30 +90,30 @@ class Builder {
+   //     normalizer is the goal of SentencePiece.
+   //
+   // TODO(taku): Make NFC, NFD, and NFKD mapping if necessary.
+-  static util::Status BuildNFKCMap(CharsMap *chars_map);
++  static absl::Status BuildNFKCMap(CharsMap *chars_map);
+   // Makes an NFKC-based mapping with NMT specific modifications around
+   // whitespaces.
+-  static util::Status BuildNmtNFKCMap(CharsMap *chars_map);
++  static absl::Status BuildNmtNFKCMap(CharsMap *chars_map);
+   // Merge Unicode case folding mapping into `chars_map`.
+-  static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
++  static absl::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
+   // Makes NFKC with Unicode case folding.
+-  static util::Status BuildNFKC_CFMap(CharsMap *chars_map);
++  static absl::Status BuildNFKC_CFMap(CharsMap *chars_map);
+   // Makes NMT NFKC with Unicode case folding.
+-  static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
++  static absl::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
+   // Builds Chars map save in `filename`.
+   // Format:
+   // src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...
+   // (src|trg)_ucharX must be a hex of Unicode code point.
+-  static util::Status LoadCharsMap(absl::string_view filename,
++  static absl::Status LoadCharsMap(absl::string_view filename,
+                                    CharsMap *chars_map);
+   // Saves Chars map to `filename` as TSV.
+-  static util::Status SaveCharsMap(absl::string_view filename,
++  static absl::Status SaveCharsMap(absl::string_view filename,
+                                    const CharsMap &chars_map);
+  private:
+@@ -121,7 +122,7 @@ class Builder {
+   // Removes redundant rules from `chars_map`.
+   // When char_maps have "aa" => "bb" and "a" => "b", the first
+   // rule is not necessary since the second rule can cover the first rule.
+-  static util::Status RemoveRedundantMap(CharsMap *chars_map);
++  static absl::Status RemoveRedundantMap(CharsMap *chars_map);
+ };
+ }  // namespace normalizer
+ }  // namespace sentencepiece
+diff --git a/src/builder_test.cc b/src/builder_test.cc
+index 4acb7b3..1dee5c7 100644
+--- a/src/builder_test.cc
++++ b/src/builder_test.cc
+@@ -18,7 +18,7 @@
+ #include "normalizer.h"
+ #include "sentencepiece_trainer.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
++#include "absl/strings/str_cat.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/char_model_trainer.cc b/src/char_model_trainer.cc
+index f438d78..4f4c603 100644
+--- a/src/char_model_trainer.cc
++++ b/src/char_model_trainer.cc
+@@ -16,12 +16,13 @@
+ #include "char_model.h"
+ #include "char_model_trainer.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ namespace sentencepiece {
+ namespace character {
+-util::Status Trainer::Train() {
++absl::Status Trainer::Train() {
+   RETURN_IF_ERROR(status());
+   CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
+diff --git a/src/char_model_trainer.h b/src/char_model_trainer.h
+index e563819..a5d021c 100644
+--- a/src/char_model_trainer.h
++++ b/src/char_model_trainer.h
+@@ -17,6 +17,7 @@
+ #include "sentencepiece_model.pb.h"
+ #include "trainer_interface.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+ namespace character {
+@@ -30,7 +31,7 @@ class Trainer : public TrainerInterface {
+       : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
+                                            denormalizer_spec) {}
+-  util::Status Train() override;
++  absl::Status Train() override;
+ };
+ }  // namespace character
+ }  // namespace sentencepiece
+diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc
+index 8c2e4b7..e8b4979 100644
+--- a/src/char_model_trainer_test.cc
++++ b/src/char_model_trainer_test.cc
+@@ -19,8 +19,8 @@
+ #include "filesystem.h"
+ #include "sentencepiece_processor.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/common.h b/src/common.h
+index 7595634..339f831 100644
+--- a/src/common.h
++++ b/src/common.h
+@@ -146,6 +146,7 @@ inline const char *BaseName(const char *path) {
+ }  // namespace logging
+ }  // namespace sentencepiece
++#ifndef LOG
+ #define LOG(severity)                                                        \
+   (::sentencepiece::logging::GetMinLogLevel() >                              \
+    ::sentencepiece::logging::LOG_##severity)                                 \
+@@ -156,6 +157,7 @@ inline const char *BaseName(const char *path) {
+             std::cerr << ::sentencepiece::logging::BaseName(__FILE__) << "(" \
+                       << __LINE__ << ") "                                    \
+                       << "LOG(" << #severity << ") "
++#endif  // LOG
+ #define CHECK(condition)                                                      \
+   (condition) ? 0                                                             \
+diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc
+index c5a5188..e5db1d7 100644
+--- a/src/compile_charsmap_main.cc
++++ b/src/compile_charsmap_main.cc
+@@ -22,8 +22,9 @@
+ #include "filesystem.h"
+ #include "init.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/flags/flag.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ using sentencepiece::normalizer::Builder;
+@@ -160,7 +161,7 @@ int main(int argc, char **argv) {
+   const std::vector<std::pair<
+       std::string,
+-      std::function<sentencepiece::util::Status(Builder::CharsMap *)>>>
++      std::function<sentencepiece::absl::Status(Builder::CharsMap *)>>>
+       kRuleList = {{"nfkc", Builder::BuildNFKCMap},
+                    {"nmt_nfkc", Builder::BuildNmtNFKCMap},
+                    {"nfkc_cf", Builder::BuildNFKC_CFMap},
+diff --git a/src/error.cc b/src/error.cc
+index a226d98..ab4675d 100644
+--- a/src/error.cc
++++ b/src/error.cc
+@@ -20,8 +20,8 @@
+ #ifdef _USE_EXTERNAL_ABSL
+ // Naive workaround to define minloglevel on external absl package.
+ // We want to define them in other cc file.
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/flags/parse.h"
++#include "absl/flags/flag.h"
++#include "absl/flags/parse.h"
+ ABSL_FLAG(int32, minloglevel, 0,
+           "Messages logged at a lower level than this don't actually.");
+ #endif
+diff --git a/src/filesystem.cc b/src/filesystem.cc
+index 833c8f7..9a1b6c9 100644
+--- a/src/filesystem.cc
++++ b/src/filesystem.cc
+@@ -15,7 +15,8 @@
+ #include <iostream>
+ #include "filesystem.h"
+-#include "third_party/absl/memory/memory.h"
++#include "absl/status/status.h"
++#include "absl/memory/memory.h"
+ #include "util.h"
+ #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE)
+@@ -36,7 +37,7 @@ class PosixReadableFile : public ReadableFile {
+                                     is_binary ? std::ios::binary | std::ios::in
+                                               : std::ios::in)) {
+     if (!*is_)
+-      status_ = util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
++      status_ = util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
+                 << "\"" << filename.data() << "\": " << util::StrError(errno);
+   }
+@@ -44,7 +45,7 @@ class PosixReadableFile : public ReadableFile {
+     if (is_ != &std::cin) delete is_;
+   }
+-  util::Status status() const { return status_; }
++  absl::Status status() const { return status_; }
+   bool ReadLine(std::string *line) {
+     return static_cast<bool>(std::getline(*is_, *line));
+@@ -61,7 +62,7 @@ class PosixReadableFile : public ReadableFile {
+   }
+  private:
+-  util::Status status_;
++  absl::Status status_;
+   std::istream *is_;
+ };
+@@ -75,7 +76,7 @@ class PosixWritableFile : public WritableFile {
+                                               : std::ios::out)) {
+     if (!*os_)
+       status_ =
+-          util::StatusBuilder(util::StatusCode::kPermissionDenied, GTL_LOC)
++          util::StatusBuilder(absl::StatusCode::kPermissionDenied, GTL_LOC)
+           << "\"" << filename.data() << "\": " << util::StrError(errno);
+   }
+@@ -83,7 +84,7 @@ class PosixWritableFile : public WritableFile {
+     if (os_ != &std::cout) delete os_;
+   }
+-  util::Status status() const { return status_; }
++  absl::Status status() const { return status_; }
+   bool Write(absl::string_view text) {
+     os_->write(text.data(), text.size());
+@@ -93,7 +94,7 @@ class PosixWritableFile : public WritableFile {
+   bool WriteLine(absl::string_view text) { return Write(text) && Write("\n"); }
+  private:
+-  util::Status status_;
++  absl::Status status_;
+   std::ostream *os_;
+ };
+diff --git a/src/filesystem.h b/src/filesystem.h
+index e572b4b..6e8e305 100644
+--- a/src/filesystem.h
++++ b/src/filesystem.h
+@@ -23,7 +23,8 @@
+ #include "common.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+ namespace filesystem {
+@@ -33,7 +34,7 @@ class ReadableFile {
+   explicit ReadableFile(absl::string_view filename, bool is_binary = false) {}
+   virtual ~ReadableFile() {}
+-  virtual util::Status status() const = 0;
++  virtual absl::Status status() const = 0;
+   virtual bool ReadLine(std::string *line) = 0;
+   virtual bool ReadAll(std::string *line) = 0;
+ };
+@@ -44,7 +45,7 @@ class WritableFile {
+   explicit WritableFile(absl::string_view filename, bool is_binary = false) {}
+   virtual ~WritableFile() {}
+-  virtual util::Status status() const = 0;
++  virtual absl::Status status() const = 0;
+   virtual bool Write(absl::string_view text) = 0;
+   virtual bool WriteLine(absl::string_view text) = 0;
+ };
+diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc
+index 790e756..39ece99 100644
+--- a/src/filesystem_test.cc
++++ b/src/filesystem_test.cc
+@@ -14,7 +14,7 @@
+ #include "filesystem.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
++#include "absl/strings/str_cat.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/init.h b/src/init.h
+index 090a2d9..acfda8a 100644
+--- a/src/init.h
++++ b/src/init.h
+@@ -16,8 +16,8 @@
+ #define INIT_H_
+ #include "common.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/flags/parse.h"
++#include "absl/flags/flag.h"
++#include "absl/flags/parse.h"
+ ABSL_DECLARE_FLAG(int32, minloglevel);
+diff --git a/src/model_factory.cc b/src/model_factory.cc
+index be99501..040c00c 100644
+--- a/src/model_factory.cc
++++ b/src/model_factory.cc
+@@ -15,7 +15,7 @@
+ #include "bpe_model.h"
+ #include "char_model.h"
+ #include "model_factory.h"
+-#include "third_party/absl/memory/memory.h"
++#include "absl/memory/memory.h"
+ #include "unigram_model.h"
+ #include "word_model.h"
+diff --git a/src/model_interface.cc b/src/model_interface.cc
+index c49be1e..22c6378 100644
+--- a/src/model_interface.cc
++++ b/src/model_interface.cc
+@@ -16,8 +16,8 @@
+ #include "model_interface.h"
+ #include "sentencepiece_model.pb.h"
+-#include "third_party/absl/memory/memory.h"
+-#include "third_party/absl/strings/str_format.h"
++#include "absl/memory/memory.h"
++#include "absl/strings/str_format.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/model_interface.h b/src/model_interface.h
+index aef5b53..c7858fb 100644
+--- a/src/model_interface.h
++++ b/src/model_interface.h
+@@ -25,9 +25,10 @@
+ #include "normalizer.h"
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/container/flat_hash_map.h"
+-#include "third_party/absl/strings/string_view.h"
+-#include "third_party/darts_clone/darts.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
++#include "include/darts.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -69,7 +70,7 @@ class ModelInterface {
+   // Returns Status.
+   // Encode/Decode functions are valid only when status is OK.
+-  virtual util::Status status() const { return status_; }
++  virtual absl::Status status() const { return status_; }
+   virtual const ModelProto &model_proto() const { return *model_proto_; }
+@@ -82,7 +83,7 @@ class ModelInterface {
+   // normally users do not need to call this function. This function is provided
+   // just in case that a user want to manually choose which encoder version to
+   // use.
+-  virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) {
++  virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version) {
+     encoder_version_ = encoder_version;
+     return util::OkStatus();
+   }
+@@ -261,7 +262,7 @@ class ModelInterface {
+   EncoderVersion encoder_version_ = EncoderVersion::kOptimized;
+   // status.
+-  util::Status status_;
++  absl::Status status_;
+ };
+ }  // namespace sentencepiece
+ #endif  // MODEL_INTERFACE_H_
+diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc
+index 69ee4e6..26a1e05 100644
+--- a/src/model_interface_test.cc
++++ b/src/model_interface_test.cc
+@@ -15,7 +15,7 @@
+ #include "model_factory.h"
+ #include "model_interface.h"
+ #include "testharness.h"
+-#include "third_party/absl/container/flat_hash_map.h"
++#include "absl/container/flat_hash_map.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/normalizer.cc b/src/normalizer.cc
+index 100b875..c553906 100644
+--- a/src/normalizer.cc
++++ b/src/normalizer.cc
+@@ -18,11 +18,12 @@
+ #include <vector>
+ #include "common.h"
+-#include "third_party/absl/memory/memory.h"
+-#include "third_party/absl/strings/match.h"
+-#include "third_party/absl/strings/string_view.h"
+-#include "third_party/absl/strings/strip.h"
+-#include "third_party/darts_clone/darts.h"
++#include "absl/memory/memory.h"
++#include "absl/strings/match.h"
++#include "absl/strings/string_view.h"
++#include "absl/strings/strip.h"
++#include "absl/status/status.h"
++#include "include/darts.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -71,7 +72,7 @@ void Normalizer::Init() {
+   }
+ }
+-util::Status Normalizer::Normalize(absl::string_view input,
++absl::Status Normalizer::Normalize(absl::string_view input,
+                                    std::string *normalized,
+                                    std::vector<size_t> *norm_to_orig) const {
+   norm_to_orig->clear();
+@@ -274,7 +275,7 @@ std::string Normalizer::EncodePrecompiledCharsMap(
+ }
+ // static
+-util::Status Normalizer::DecodePrecompiledCharsMap(
++absl::Status Normalizer::DecodePrecompiledCharsMap(
+     absl::string_view blob, absl::string_view *trie_blob,
+     absl::string_view *normalized, std::string *buffer) {
+   uint32 trie_blob_size = 0;
+diff --git a/src/normalizer.h b/src/normalizer.h
+index 622bbd2..21d1385 100644
+--- a/src/normalizer.h
++++ b/src/normalizer.h
+@@ -24,8 +24,9 @@
+ #include "common.h"
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/strings/string_view.h"
+-#include "third_party/darts_clone/darts.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
++#include "include/darts.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -75,7 +76,7 @@ class Normalizer {
+   // Returns Status.
+   // Normalizes function is valid only when status is OK.
+-  virtual util::Status status() const { return status_; }
++  virtual absl::Status status() const { return status_; }
+   // Normalizes a plain utf8 string into an internal representation for
+   // Sentencepiece model. |norm_to_orig| stores the byte-alignment from
+@@ -86,7 +87,7 @@ class Normalizer {
+   // - Adds a prefix space.
+   // - Replaces a space with a meta symbol.
+   // - Removing heading, tailing and other redundant spaces.
+-  virtual util::Status Normalize(absl::string_view input,
++  virtual absl::Status Normalize(absl::string_view input,
+                                  std::string *normalized,
+                                  std::vector<size_t> *norm_to_orig) const;
+@@ -121,7 +122,7 @@ class Normalizer {
+                                                absl::string_view normalized);
+   // Decodes blob into trie_blob and normalized string.
+-  static util::Status DecodePrecompiledCharsMap(absl::string_view blob,
++  static absl::Status DecodePrecompiledCharsMap(absl::string_view blob,
+                                                 absl::string_view *trie_blob,
+                                                 absl::string_view *normalized,
+                                                 std::string *buffer = nullptr);
+@@ -153,7 +154,7 @@ class Normalizer {
+ #endif
+   // Normalizer's status.
+-  util::Status status_;
++  absl::Status status_;
+ };
+ }  // namespace normalizer
+ }  // namespace sentencepiece
+diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc
+index 049658e..8021511 100644
+--- a/src/pretokenizer_for_training.cc
++++ b/src/pretokenizer_for_training.cc
+@@ -14,7 +14,7 @@
+ #include <string>
+ #include "pretokenizer_for_training.h"
+-#include "third_party/absl/strings/str_replace.h"
++#include "absl/strings/str_replace.h"
+ namespace sentencepiece {
+ namespace pretokenizer {
+diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h
+index 2d3bc82..b4a6de3 100644
+--- a/src/pretokenizer_for_training.h
++++ b/src/pretokenizer_for_training.h
+@@ -21,7 +21,8 @@
+ #include "common.h"
+ #include "sentencepiece.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+ namespace pretokenizer {
+@@ -30,7 +31,7 @@ class PretokenizerForTrainingInterface {
+  public:
+   PretokenizerForTrainingInterface() {}
+   virtual ~PretokenizerForTrainingInterface() {}
+-  virtual util::Status status() const = 0;
++  virtual absl::Status status() const = 0;
+   // Puts kUPPBoundaryStr before and after the pre-tokenizer's segmentation
+   // when there are no spaces between these tokens.
+diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc
+index 80f4787..de89fe3 100644
+--- a/src/pretokenizer_for_training_test.cc
++++ b/src/pretokenizer_for_training_test.cc
+@@ -13,8 +13,9 @@
+ // limitations under the License.!
+ #include "pretokenizer_for_training.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
++#include "absl/strings/str_cat.h"
+ #include "trainer_interface.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+ namespace pretokenizer {
+@@ -28,7 +29,7 @@ class MockPretokenizer : public PretokenizerForTrainingInterface {
+     return spt_;
+   }
+-  util::Status status() const override { return util::OkStatus(); }
++  absl::Status status() const override { return util::OkStatus(); }
+   void SetOutput(const SentencePieceText &spt) { spt_ = spt; }
+diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc
+index 1e4e7a0..78ae527 100644
+--- a/src/sentencepiece_processor.cc
++++ b/src/sentencepiece_processor.cc
+@@ -23,14 +23,15 @@
+ #include "normalizer.h"
+ #include "sentencepiece.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/memory/memory.h"
+-#include "third_party/absl/strings/numbers.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
+-#include "third_party/absl/strings/str_replace.h"
+-#include "third_party/absl/strings/str_split.h"
+-#include "third_party/absl/strings/string_view.h"
+-#include "third_party/absl/strings/strip.h"
++#include "absl/memory/memory.h"
++#include "absl/strings/numbers.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
++#include "absl/strings/str_replace.h"
++#include "absl/strings/str_split.h"
++#include "absl/strings/string_view.h"
++#include "absl/strings/strip.h"
++#include "absl/status/status.h"
+ #include "unigram_model.h"
+ #include "util.h"
+@@ -52,7 +53,7 @@ const char kReplacementCharacter[] = "\xef\xbf\xbd";
+ SentencePieceProcessor::SentencePieceProcessor() {}
+ SentencePieceProcessor::~SentencePieceProcessor() {}
+-util::Status SentencePieceProcessor::Load(absl::string_view filename) {
++absl::Status SentencePieceProcessor::Load(absl::string_view filename) {
+   auto model_proto = absl::make_unique<ModelProto>();
+   RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get()));
+   return Load(std::move(model_proto));
+@@ -62,13 +63,13 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) {
+   CHECK_OK(Load(filename));
+ }
+-util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
++absl::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
+   auto model_proto_copy = absl::make_unique<ModelProto>();
+   *model_proto_copy = model_proto;
+   return Load(std::move(model_proto_copy));
+ }
+-util::Status SentencePieceProcessor::LoadFromSerializedProto(
++absl::Status SentencePieceProcessor::LoadFromSerializedProto(
+     absl::string_view serialized) {
+   auto model_proto = absl::make_unique<ModelProto>();
+   CHECK_OR_RETURN(
+@@ -76,7 +77,7 @@ util::Status SentencePieceProcessor::LoadFromSerializedProto(
+   return Load(std::move(model_proto));
+ }
+-util::Status SentencePieceProcessor::Load(
++absl::Status SentencePieceProcessor::Load(
+     std::unique_ptr<ModelProto> model_proto) {
+   model_proto_ = std::move(model_proto);
+   model_ = ModelFactory::Create(*model_proto_);
+@@ -117,7 +118,7 @@ util::Status SentencePieceProcessor::Load(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::SetEncoderVersion(
++absl::Status SentencePieceProcessor::SetEncoderVersion(
+     EncoderVersion encoder_version) {
+   return model_->SetEncoderVersion(encoder_version);
+ }
+@@ -126,17 +127,17 @@ EncoderVersion SentencePieceProcessor::GetEncoderVersion() const {
+   return model_->GetEncoderVersion();
+ }
+-util::Status SentencePieceProcessor::SetEncodeExtraOptions(
++absl::Status SentencePieceProcessor::SetEncodeExtraOptions(
+     absl::string_view extra_options) {
+   return ParseExtraOptions(extra_options, &encode_extra_options_);
+ }
+-util::Status SentencePieceProcessor::SetDecodeExtraOptions(
++absl::Status SentencePieceProcessor::SetDecodeExtraOptions(
+     absl::string_view extra_options) {
+   return ParseExtraOptions(extra_options, &decode_extra_options_);
+ }
+-util::Status SentencePieceProcessor::status() const {
++absl::Status SentencePieceProcessor::status() const {
+   CHECK_OR_RETURN(model_) << "Model is not initialized.";
+   CHECK_OR_RETURN(normalizer_) << "Normalizer is not initialized.";
+   RETURN_IF_ERROR(model_->status());
+@@ -144,7 +145,7 @@ util::Status SentencePieceProcessor::status() const {
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::SetVocabulary(
++absl::Status SentencePieceProcessor::SetVocabulary(
+     const std::vector<std::string> &valid_vocab) {
+   RETURN_IF_ERROR(status());
+@@ -174,7 +175,7 @@ util::Status SentencePieceProcessor::SetVocabulary(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::ResetVocabulary() {
++absl::Status SentencePieceProcessor::ResetVocabulary() {
+   RETURN_IF_ERROR(status());
+   for (auto &piece : *(model_proto_->mutable_pieces())) {
+     if (piece.type() == ModelProto::SentencePiece::UNUSED)
+@@ -184,7 +185,7 @@ util::Status SentencePieceProcessor::ResetVocabulary() {
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
++absl::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
+                                                     int threshold) {
+   auto input = filesystem::NewReadableFile(filename);
+   RETURN_IF_ERROR(input->status());
+@@ -221,7 +222,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
+ //////////////////////////////////////////////////////////////
+ // Simple API.
+-util::Status SentencePieceProcessor::Encode(
++absl::Status SentencePieceProcessor::Encode(
+     absl::string_view input, std::vector<std::string> *pieces) const {
+   CHECK_OR_RETURN_STATUS_STL(pieces);
+@@ -234,7 +235,7 @@ util::Status SentencePieceProcessor::Encode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::Encode(absl::string_view input,
++absl::Status SentencePieceProcessor::Encode(absl::string_view input,
+                                             std::vector<int> *ids) const {
+   CHECK_OR_RETURN_STATUS_STL(ids);
+@@ -247,7 +248,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input,
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::Decode(
++absl::Status SentencePieceProcessor::Decode(
+     const std::vector<std::string> &pieces, std::string *detokenized) const {
+   CHECK_OR_RETURN_STATUS_STL(detokenized);
+@@ -258,7 +259,7 @@ util::Status SentencePieceProcessor::Decode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
++absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
+                                             std::string *detokenized) const {
+   CHECK_OR_RETURN_STATUS_STL(detokenized);
+@@ -269,7 +270,7 @@ util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::NBestEncode(
++absl::Status SentencePieceProcessor::NBestEncode(
+     absl::string_view input, int nbest_size,
+     std::vector<std::vector<std::string>> *pieces) const {
+   CHECK_OR_RETURN_STATUS_STL(pieces);
+@@ -287,7 +288,7 @@ util::Status SentencePieceProcessor::NBestEncode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::NBestEncode(
++absl::Status SentencePieceProcessor::NBestEncode(
+     absl::string_view input, int nbest_size,
+     std::vector<std::vector<int>> *ids) const {
+   CHECK_OR_RETURN_STATUS_STL(ids);
+@@ -305,7 +306,7 @@ util::Status SentencePieceProcessor::NBestEncode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::SampleEncode(
++absl::Status SentencePieceProcessor::SampleEncode(
+     absl::string_view input, int nbest_size, float alpha,
+     std::vector<std::string> *pieces) const {
+   CHECK_OR_RETURN_STATUS_STL(pieces);
+@@ -319,7 +320,7 @@ util::Status SentencePieceProcessor::SampleEncode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
++absl::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
+                                                   int nbest_size, float alpha,
+                                                   std::vector<int> *ids) const {
+   CHECK_OR_RETURN_STATUS_STL(ids);
+@@ -333,7 +334,7 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::PopulateSentencePieceText(
++absl::Status SentencePieceProcessor::PopulateSentencePieceText(
+     absl::string_view input, absl::string_view normalized,
+     const std::vector<size_t> &norm_to_orig, const EncodeResult &result,
+     SentencePieceText *spt) const {
+@@ -424,7 +425,7 @@ util::Status SentencePieceProcessor::PopulateSentencePieceText(
+   return util::OkStatus();
+ }  // namespace sentencepiece
+-util::Status SentencePieceProcessor::Encode(absl::string_view input,
++absl::Status SentencePieceProcessor::Encode(absl::string_view input,
+                                             SentencePieceText *spt) const {
+   CHECK_OR_RETURN_STATUS_PROTO(spt);
+@@ -439,7 +440,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input,
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::NBestEncode(
++absl::Status SentencePieceProcessor::NBestEncode(
+     absl::string_view input, int nbest_size,
+     NBestSentencePieceText *nbest_spt) const {
+   CHECK_OR_RETURN_STATUS_PROTO(nbest_spt);
+@@ -464,7 +465,7 @@ util::Status SentencePieceProcessor::NBestEncode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::SampleEncode(
++absl::Status SentencePieceProcessor::SampleEncode(
+     absl::string_view input, int nbest_size, float alpha,
+     SentencePieceText *spt) const {
+   CHECK_OR_RETURN_STATUS_PROTO(spt);
+@@ -503,7 +504,7 @@ util::Status SentencePieceProcessor::SampleEncode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::SampleEncodeAndScore(
++absl::Status SentencePieceProcessor::SampleEncodeAndScore(
+     absl::string_view input, int samples, float theta, bool wor,
+     bool include_best, NBestSentencePieceText *samples_spt) const {
+   CHECK_OR_RETURN(model_->IsSampleEncodeAndScoreAvailable())
+@@ -527,7 +528,7 @@ util::Status SentencePieceProcessor::SampleEncodeAndScore(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input,
++absl::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input,
+                                                       float theta,
+                                                       float *entropy) const {
+   CHECK_OR_RETURN(model_->IsCalculateEntropyAvailable())
+@@ -540,7 +541,7 @@ util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input,
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::Decode(
++absl::Status SentencePieceProcessor::Decode(
+     const std::vector<std::string> &pieces, SentencePieceText *spt) const {
+   CHECK_OR_RETURN_STATUS_PROTO(spt);
+@@ -591,7 +592,7 @@ util::Status SentencePieceProcessor::Decode(
+   };
+   auto ProcessBytePieces = [&](int token_index_begin,
+-                               int token_index_end) -> util::Status {
++                               int token_index_end) -> absl::Status {
+     if (token_index_begin >= token_index_end) {
+       return util::OkStatus();
+     }
+@@ -661,14 +662,14 @@ util::Status SentencePieceProcessor::Decode(
+   return util::OkStatus();
+ }
+-util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
++absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
+                                             SentencePieceText *spt) const {
+   std::vector<std::string> pieces;
+   const int num_pieces = GetPieceSize();
+   pieces.reserve(ids.size());
+   for (const int id : ids) {
+     if (id < 0 || id >= num_pieces) {
+-      return util::Status(util::StatusCode::kOutOfRange,
++      return absl::Status(absl::StatusCode::kOutOfRange,
+                           absl::StrCat("Invalid id: ", id));
+     }
+     pieces.emplace_back(IdToPiece(id));
+@@ -783,7 +784,7 @@ int SentencePieceProcessor::pad_id() const {
+ }
+ // static
+-util::Status SentencePieceProcessor::ApplyExtraOptions(
++absl::Status SentencePieceProcessor::ApplyExtraOptions(
+     const std::vector<ExtraOption> &extra_options,
+     SentencePieceText *spt) const {
+   for (const auto &extra_option : extra_options) {
+@@ -818,7 +819,7 @@ util::Status SentencePieceProcessor::ApplyExtraOptions(
+ }
+ // static
+-util::Status SentencePieceProcessor::ParseExtraOptions(
++absl::Status SentencePieceProcessor::ParseExtraOptions(
+     absl::string_view _extra_option,
+     std::vector<SentencePieceProcessor::ExtraOption> *extra_options) const {
+   absl::string_view extra_option(_extra_option.data(), _extra_option.size());
+@@ -877,7 +878,7 @@ void SetRandomGeneratorSeed(unsigned int seed);
+ namespace io {
+-util::Status LoadModelProto(absl::string_view filename,
++absl::Status LoadModelProto(absl::string_view filename,
+                             ModelProto *model_proto) {
+   if (filename.empty()) {
+     return util::NotFoundError("model file path should not be empty.");
+@@ -893,7 +894,7 @@ util::Status LoadModelProto(absl::string_view filename,
+   return util::OkStatus();
+ }
+-util::Status SaveModelProto(absl::string_view filename,
++absl::Status SaveModelProto(absl::string_view filename,
+                             const ModelProto &model_proto) {
+   if (filename.empty()) {
+     return util::NotFoundError("model file path should not be empty.");
+diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h
+index e8bd5f5..346fb0e 100644
+--- a/src/sentencepiece_processor.h
++++ b/src/sentencepiece_processor.h
+@@ -20,9 +20,10 @@
+ #include <string>
+ #include <utility>
+ #include <vector>
++#include "absl/status/status.h"
+ #if defined(_USE_INTERNAL_STRING_VIEW)
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
+ #elif defined(_USE_TF_STRING_VIEW)
+ #include "absl/strings/string_view.h"
+ #else
+@@ -185,7 +186,7 @@ class SentencePieceProcessor {
+   // Loads model from `filename`.
+   // Returns false if `filename` cannot be loaded.
+-  virtual util::Status Load(absl::string_view filename);
++  virtual absl::Status Load(absl::string_view filename);
+   // Loads model from `filename`.
+   // Crash if `filename` cannot be loaded.
+@@ -193,24 +194,24 @@ class SentencePieceProcessor {
+   // Loads model from `model_proto`.
+   // `model_proto` is copied.
+-  virtual util::Status Load(const ModelProto &model_proto);
++  virtual absl::Status Load(const ModelProto &model_proto);
+   // Loads model from `model_proto`.
+   // `model_proto` is moved.
+-  virtual util::Status Load(std::unique_ptr<ModelProto> model_proto);
++  virtual absl::Status Load(std::unique_ptr<ModelProto> model_proto);
+   // Loads model from `serialized`, which is a string-serialized model proto.
+   // Useful to load the model from a platform independent blob object.
+-  virtual util::Status LoadFromSerializedProto(absl::string_view serialized);
++  virtual absl::Status LoadFromSerializedProto(absl::string_view serialized);
+   // Returns the status. Encode/Decode methods are valid when status is OK.
+-  virtual util::Status status() const;
++  virtual absl::Status status() const;
+   // Sets encode extra_option sequence.
+-  virtual util::Status SetEncodeExtraOptions(absl::string_view extra_option);
++  virtual absl::Status SetEncodeExtraOptions(absl::string_view extra_option);
+   // Sets decode extra_option sequence.
+-  virtual util::Status SetDecodeExtraOptions(absl::string_view extra_option);
++  virtual absl::Status SetDecodeExtraOptions(absl::string_view extra_option);
+   //////////////////////////////////////////////////////////////
+   // Vocabulary restriction.
+@@ -219,41 +220,41 @@ class SentencePieceProcessor {
+   // Restricts the vocabulary set.
+   // The input sentences are encoded into the tokens in `valid_vocab`.
+-  virtual util::Status SetVocabulary(
++  virtual absl::Status SetVocabulary(
+       const std::vector<std::string> &valid_vocab);
+   // Reverts the vocabulary restriction.
+-  virtual util::Status ResetVocabulary();
++  virtual absl::Status ResetVocabulary();
+   // Loads the valid vocabulary set from `filename` in TSV format.
+   // Format:  <token> <tab> <freq>.
+   // Any token with frequency < threshold will be treated as OOV.
+-  virtual util::Status LoadVocabulary(absl::string_view filename,
++  virtual absl::Status LoadVocabulary(absl::string_view filename,
+                                       int threshold);
+   //////////////////////////////////////////////////////////////
+   // Simple API.
+   //
+   // Given a UTF8 input, encodes it into a sequence of sentence pieces.
+-  virtual util::Status Encode(absl::string_view input,
++  virtual absl::Status Encode(absl::string_view input,
+                               std::vector<std::string> *pieces) const;
+   // Given a UTF8 input, encodes it into a sequence of ids.
+-  virtual util::Status Encode(absl::string_view input,
++  virtual absl::Status Encode(absl::string_view input,
+                               std::vector<int> *ids) const;
+   // Given a sequence of pieces, decodes it into a detokenized output.
+-  virtual util::Status Decode(const std::vector<std::string> &pieces,
++  virtual absl::Status Decode(const std::vector<std::string> &pieces,
+                               std::string *detokenized) const;
+   // Given a sequence of ids, decodes it into a detokenized output.
+-  virtual util::Status Decode(const std::vector<int> &ids,
++  virtual absl::Status Decode(const std::vector<int> &ids,
+                               std::string *detokenized) const;
+   // Sets the encoder version. Normally users do not need to call this function.
+   // But they can call this fucntion just in case if they want to fall back to
+   // the original encoder.
+-  virtual util::Status SetEncoderVersion(EncoderVersion encoder_version);
++  virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version);
+   // Returns the current encoder version in use.
+   virtual EncoderVersion GetEncoderVersion() const;
+@@ -261,12 +262,12 @@ class SentencePieceProcessor {
+   //////////////////////////////////////////////////////////////
+   // NBest API.
+   // Same as Encode, but returns nbest results.
+-  virtual util::Status NBestEncode(
++  virtual absl::Status NBestEncode(
+       absl::string_view input, int nbest_size,
+       std::vector<std::vector<std::string>> *pieces) const;
+   // Same as Encode, but returns nbest results.
+-  virtual util::Status NBestEncode(absl::string_view input, int nbest_size,
++  virtual absl::Status NBestEncode(absl::string_view input, int nbest_size,
+                                    std::vector<std::vector<int>> *ids) const;
+   //////////////////////////////////////////////////////////////
+@@ -289,12 +290,12 @@ class SentencePieceProcessor {
+   // in https://arxiv.org/abs/1910.13267
+   // Nbest-based sampling is not supported so nbest_size parameter is ignored in
+   // BPE.
+-  virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
++  virtual absl::Status SampleEncode(absl::string_view input, int nbest_size,
+                                     float alpha,
+                                     std::vector<std::string> *pieces) const;
+   // Same as above, but returns a sequence of ids.
+-  virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
++  virtual absl::Status SampleEncode(absl::string_view input, int nbest_size,
+                                     float alpha, std::vector<int> *ids) const;
+   //////////////////////////////////////////////////////////////
+@@ -303,16 +304,16 @@ class SentencePieceProcessor {
+   // and internal sentencepiece sequence.
+   //
+   // Given a UTF8 input, encodes it into SentencePieceText.
+-  virtual util::Status Encode(absl::string_view input,
++  virtual absl::Status Encode(absl::string_view input,
+                               SentencePieceText *spt) const;
+   // Same as above, but returns NBestSentencePieceText.
+-  virtual util::Status NBestEncode(absl::string_view input, int nbest_size,
++  virtual absl::Status NBestEncode(absl::string_view input, int nbest_size,
+                                    NBestSentencePieceText *nbest_spt) const;
+   // Same as above, but samples one segmentation from the hypotheses
+   // (Lattice).
+-  virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
++  virtual absl::Status SampleEncode(absl::string_view input, int nbest_size,
+                                     float alpha, SentencePieceText *spt) const;
+   // Sample `samples` segmentations from the segmentation lattice.
+@@ -323,21 +324,21 @@ class SentencePieceProcessor {
+   // If `include_best` is true, the best tokenization is always included in the
+   // sample, and the remaining elements are sampled excluding the best.
+   // This method is only available in Unigram mode.
+-  virtual util::Status SampleEncodeAndScore(
++  virtual absl::Status SampleEncodeAndScore(
+       absl::string_view input, int samples, float theta, bool wor,
+       bool include_best, NBestSentencePieceText *samples_spt) const;
+   // Calculate entropy of possible tokenization.
+   // Only available in unigram mode.
+-  virtual util::Status CalculateEntropy(absl::string_view input, float theta,
++  virtual absl::Status CalculateEntropy(absl::string_view input, float theta,
+                                         float *entropy) const;
+   // Given a sequence of pieces, decodes it into SentencePieceText.
+-  virtual util::Status Decode(const std::vector<std::string> &pieces,
++  virtual absl::Status Decode(const std::vector<std::string> &pieces,
+                               SentencePieceText *spt) const;
+   // Given a sequence of ids, decodes it into SentencePieceText.
+-  virtual util::Status Decode(const std::vector<int> &ids,
++  virtual absl::Status Decode(const std::vector<int> &ids,
+                               SentencePieceText *spt) const;
+   //////////////////////////////////////////////////////////////
+@@ -487,13 +488,13 @@ class SentencePieceProcessor {
+  private:
+   enum ExtraOption { REVERSE, BOS, EOS };
+-  util::Status ParseExtraOptions(absl::string_view extra_option,
++  absl::Status ParseExtraOptions(absl::string_view extra_option,
+                                  std::vector<ExtraOption> *extra_options) const;
+-  util::Status ApplyExtraOptions(const std::vector<ExtraOption> &extra_options,
++  absl::Status ApplyExtraOptions(const std::vector<ExtraOption> &extra_options,
+                                  SentencePieceText *spt) const;
+-  util::Status PopulateSentencePieceText(
++  absl::Status PopulateSentencePieceText(
+       absl::string_view input, absl::string_view normalized,
+       const std::vector<size_t> &norm_to_orig,
+       const std::vector<std::pair<absl::string_view, int>> &result,
+@@ -526,10 +527,10 @@ namespace io {
+ //  io::LoadModelProto("//path/spm.model", model_proto.get());
+ //  SentencePieceProcessor sp;
+ //  CHECK_OK(sp.Load(std::move(model_proto)));
+-util::Status LoadModelProto(absl::string_view, ModelProto *model_proto);
++absl::Status LoadModelProto(absl::string_view, ModelProto *model_proto);
+ // Saves `model_proto` as `filename`.
+-util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto);
++absl::Status SaveModelProto(absl::string_view, const ModelProto &model_proto);
+ }  // namespace io
+ #endif  // SWIG
+ }  // namespace sentencepiece
+diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc
+index 373e73e..829c3d4 100644
+--- a/src/sentencepiece_processor_test.cc
++++ b/src/sentencepiece_processor_test.cc
+@@ -23,10 +23,10 @@
+ #include "sentencepiece_processor.h"
+ #include "sentencepiece_trainer.h"
+ #include "testharness.h"
+-#include "third_party/absl/container/flat_hash_map.h"
+-#include "third_party/absl/memory/memory.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/memory/memory.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/string_view.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc
+index b9fe64f..5b33cd7 100644
+--- a/src/sentencepiece_trainer.cc
++++ b/src/sentencepiece_trainer.cc
+@@ -22,12 +22,13 @@
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_trainer.h"
+ #include "spec_parser.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/strings/numbers.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_split.h"
+-#include "third_party/absl/strings/string_view.h"
+-#include "third_party/absl/strings/strip.h"
++#include "absl/flags/flag.h"
++#include "absl/strings/numbers.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_split.h"
++#include "absl/strings/string_view.h"
++#include "absl/strings/strip.h"
++#include "absl/status/status.h"
+ #include "trainer_factory.h"
+ #include "util.h"
+@@ -37,7 +38,7 @@ static constexpr char kDefaultNormalizerName[] = "nmt_nfkc";
+ }  // namespace
+ // static
+-util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
++absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
+                                          SentenceIterator *sentence_iterator,
+                                          std::string *serialized_model_proto) {
+   NormalizerSpec normalizer_spec;
+@@ -45,7 +46,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
+                serialized_model_proto);
+ }
+-util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
++absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
+                                          const NormalizerSpec &normalizer_spec,
+                                          SentenceIterator *sentence_iterator,
+                                          std::string *serialized_model_proto) {
+@@ -55,7 +56,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
+ }
+ // static
+-util::Status SentencePieceTrainer::Train(
++absl::Status SentencePieceTrainer::Train(
+     const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec,
+     const NormalizerSpec &denormalizer_spec,
+     SentenceIterator *sentence_iterator, std::string *serialized_model_proto) {
+@@ -97,7 +98,7 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(absl::string_view name) {
+ }
+ // static
+-util::Status SentencePieceTrainer::MergeSpecsFromArgs(
++absl::Status SentencePieceTrainer::MergeSpecsFromArgs(
+     absl::string_view args, TrainerSpec *trainer_spec,
+     NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec) {
+   CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null.";
+@@ -125,7 +126,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs(
+ }
+ // static
+-util::Status SentencePieceTrainer::MergeSpecsFromArgs(
++absl::Status SentencePieceTrainer::MergeSpecsFromArgs(
+     const std::unordered_map<std::string, std::string> &kwargs,
+     TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec,
+     NormalizerSpec *denormalizer_spec) {
+@@ -171,7 +172,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs(
+ }
+ // static
+-util::Status SentencePieceTrainer::Train(absl::string_view args,
++absl::Status SentencePieceTrainer::Train(absl::string_view args,
+                                          SentenceIterator *sentence_iterator,
+                                          std::string *serialized_model_proto) {
+   LOG(INFO) << "Running command: " << args.data();
+@@ -185,7 +186,7 @@ util::Status SentencePieceTrainer::Train(absl::string_view args,
+ }
+ // static
+-util::Status SentencePieceTrainer::Train(
++absl::Status SentencePieceTrainer::Train(
+     const std::unordered_map<std::string, std::string> &kwargs,
+     SentenceIterator *sentence_iterator, std::string *serialized_model_proto) {
+   TrainerSpec trainer_spec;
+@@ -198,7 +199,7 @@ util::Status SentencePieceTrainer::Train(
+ }
+ // static
+-util::Status SentencePieceTrainer::PopulateNormalizerSpec(
++absl::Status SentencePieceTrainer::PopulateNormalizerSpec(
+     NormalizerSpec *normalizer_spec, bool is_denormalizer) {
+   CHECK_OR_RETURN(normalizer_spec);
+@@ -226,7 +227,7 @@ util::Status SentencePieceTrainer::PopulateNormalizerSpec(
+ }
+ // static
+-util::Status SentencePieceTrainer::PopulateModelTypeFromString(
++absl::Status SentencePieceTrainer::PopulateModelTypeFromString(
+     absl::string_view type, TrainerSpec *spec) {
+   static const std::unordered_map<std::string, TrainerSpec::ModelType>
+       kModelTypeMap = {{"unigram", TrainerSpec::UNIGRAM},
+@@ -239,7 +240,7 @@ util::Status SentencePieceTrainer::PopulateModelTypeFromString(
+     return util::OkStatus();
+   }
+-  return util::StatusBuilder(util::StatusCode::kInternal, GTL_LOC)
++  return util::StatusBuilder(absl::StatusCode::kInternal, GTL_LOC)
+          << "\"" << type << "\" is not found in TrainerSpec";
+ }
+@@ -248,7 +249,7 @@ const pretokenizer::PretokenizerForTrainingInterface *g_pretokenizer = nullptr;
+ }  // namespace
+ // static
+-util::Status SentencePieceTrainer::SetPretokenizerForTraining(
++absl::Status SentencePieceTrainer::SetPretokenizerForTraining(
+     const pretokenizer::PretokenizerForTrainingInterface *pretokenizer) {
+   g_pretokenizer = pretokenizer;
+   return util::OkStatus();
+diff --git a/src/sentencepiece_trainer.h b/src/sentencepiece_trainer.h
+index bb74ab9..ec6cf93 100644
+--- a/src/sentencepiece_trainer.h
++++ b/src/sentencepiece_trainer.h
+@@ -19,6 +19,7 @@
+ #include <unordered_map>
+ #include "sentencepiece_processor.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+@@ -46,7 +47,7 @@ class SentenceIterator {
+   virtual bool done() const = 0;
+   virtual void Next() = 0;
+   virtual const std::string &value() const = 0;
+-  virtual util::Status status() const = 0;
++  virtual absl::Status status() const = 0;
+ };
+ class SentencePieceTrainer {
+@@ -54,14 +55,14 @@ class SentencePieceTrainer {
+   // Trains SentencePiece model with `trainer_spec`.
+   // Default `normalizer_spec` is used.
+   // When `sentence_iterator` is passed, load sentences from the iterator.
+-  static util::Status Train(const TrainerSpec &trainer_spec,
++  static absl::Status Train(const TrainerSpec &trainer_spec,
+                             SentenceIterator *sentence_iterator = nullptr,
+                             std::string *serialized_model_proto = nullptr);
+   // Trains SentencePiece model with `trainer_spec` and
+   // `normalizer_spec`.
+   // When `sentence_iterator` is passed, load sentences from the iterator.
+-  static util::Status Train(const TrainerSpec &trainer_spec,
++  static absl::Status Train(const TrainerSpec &trainer_spec,
+                             const NormalizerSpec &normalizer_spec,
+                             SentenceIterator *sentence_iterator = nullptr,
+                             std::string *serialized_model_proto = nullptr);
+@@ -69,7 +70,7 @@ class SentencePieceTrainer {
+   // Trains SentencePiece model with `trainer_spec`, `normalizer_spec`
+   // and `denormalizer_spec`.
+   // When `sentence_iterator` is passed, load sentences from the iterator.
+-  static util::Status Train(const TrainerSpec &trainer_spec,
++  static absl::Status Train(const TrainerSpec &trainer_spec,
+                             const NormalizerSpec &normalizer_spec,
+                             const NormalizerSpec &denormalizer_spec,
+                             SentenceIterator *sentence_iterator = nullptr,
+@@ -78,13 +79,13 @@ class SentencePieceTrainer {
+   // e.g.,
+   // '--input=data --model_prefix=m --vocab_size=8192 model_type=unigram'
+   // When `sentence_iterator` is passed, load sentences from the iterator.
+-  static util::Status Train(absl::string_view args,
++  static absl::Status Train(absl::string_view args,
+                             SentenceIterator *sentence_iterator = nullptr,
+                             std::string *serialized_model_proto = nullptr);
+   // Trains SentencePiece model with mapin `kwargs`.
+   // e.g., {{"input", "data"}, {"model_prefix, "m"}, {"vocab_size", "8192"}...}
+-  static util::Status Train(
++  static absl::Status Train(
+       const std::unordered_map<std::string, std::string> &kwargs,
+       SentenceIterator *sentence_iterator = nullptr,
+       std::string *serialized_model_proto = nullptr);
+@@ -96,19 +97,19 @@ class SentencePieceTrainer {
+   // Populates necessary fields (precompiled_charmap) from
+   // `NormalizerSpec::name` or `NormalizerSpec::normalization_rule_tsv`.
+-  static util::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec,
++  static absl::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec,
+                                              bool is_denormalizer = false);
+   // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the
+   // std::unordered_map in `kargs`.
+-  static util::Status MergeSpecsFromArgs(
++  static absl::Status MergeSpecsFromArgs(
+       const std::unordered_map<std::string, std::string> &kwargs,
+       TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec,
+       NormalizerSpec *denormalizer_spec);
+   // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the
+   // command line flags in `args`.
+-  static util::Status MergeSpecsFromArgs(absl::string_view args,
++  static absl::Status MergeSpecsFromArgs(absl::string_view args,
+                                          TrainerSpec *trainer_spec,
+                                          NormalizerSpec *normalizer_spec,
+                                          NormalizerSpec *denormalizer_spec);
+@@ -116,7 +117,7 @@ class SentencePieceTrainer {
+   // Injects global pre-tokenizer that are applied in training time.
+   // Pretokenizer is only used for extracting pieces.
+   // TODO(taku): It would be better to inject per `trainer_spec`.
+-  static util::Status SetPretokenizerForTraining(
++  static absl::Status SetPretokenizerForTraining(
+       const pretokenizer::PretokenizerForTrainingInterface *pretokenizer);
+   // Returns the current pretokenizer. if no pretokenizer is defined, returns
+@@ -129,17 +130,17 @@ class SentencePieceTrainer {
+   // with comma-separated values. `field_name` must not be a nested message.
+   // The body of these functions are automatically generated with
+   // data/gen_spec_parser.pl
+-  static util::Status SetProtoField(const std::string &name,
++  static absl::Status SetProtoField(const std::string &name,
+                                     const std::string &value,
+                                     TrainerSpec *message);
+-  static util::Status SetProtoField(const std::string &name,
++  static absl::Status SetProtoField(const std::string &name,
+                                     const std::string &value,
+                                     NormalizerSpec *message);
+   // Populates model type from string representation, e.g., "bpe".
+   // Supported model: "unigram", "bpe", "word", "char".
+-  static util::Status PopulateModelTypeFromString(absl::string_view type,
++  static absl::Status PopulateModelTypeFromString(absl::string_view type,
+                                                   TrainerSpec *trainer_spec);
+  private:
+diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc
+index e44e66b..00c8d08 100644
+--- a/src/sentencepiece_trainer_test.cc
++++ b/src/sentencepiece_trainer_test.cc
+@@ -16,7 +16,8 @@
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_trainer.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
++#include "absl/strings/str_cat.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -109,7 +110,7 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) {
+     bool done() const override { return idx_ == vec_.size(); }
+     void Next() override { ++idx_; }
+     const std::string &value() const override { return vec_[idx_]; }
+-    util::Status status() const override { return util::OkStatus(); }
++    absl::Status status() const override { return util::OkStatus(); }
+    private:
+     std::vector<std::string> vec_;
+diff --git a/src/spec_parser.h b/src/spec_parser.h
+index 2c5a95b..259c45d 100644
+--- a/src/spec_parser.h
++++ b/src/spec_parser.h
+@@ -19,8 +19,9 @@
+ #include <vector>
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/strings/ascii.h"
+-#include "third_party/absl/strings/str_split.h"
++#include "absl/strings/ascii.h"
++#include "absl/strings/str_split.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -49,7 +50,7 @@ namespace sentencepiece {
+   if (name == #param_name) {                                                  \
+     int32 v;                                                                  \
+     if (!string_util::lexical_cast(value, &v))                                \
+-      return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
++      return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
+              << "cannot parse \"" << value << "\" as int.";                   \
+     message->set_##param_name(v);                                             \
+     return util::OkStatus();                                                  \
+@@ -59,7 +60,7 @@ namespace sentencepiece {
+   if (name == #param_name) {                                                  \
+     uint64 v;                                                                 \
+     if (!string_util::lexical_cast(value, &v))                                \
+-      return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
++      return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
+              << "cannot parse \"" << value << "\" as int.";                   \
+     message->set_##param_name(v);                                             \
+     return util::OkStatus();                                                  \
+@@ -69,7 +70,7 @@ namespace sentencepiece {
+   if (name == #param_name) {                                                  \
+     double v;                                                                 \
+     if (!string_util::lexical_cast(value, &v))                                \
+-      return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
++      return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
+              << "cannot parse \"" << value << "\" as int.";                   \
+     message->set_##param_name(v);                                             \
+     return util::OkStatus();                                                  \
+@@ -79,7 +80,7 @@ namespace sentencepiece {
+   if (name == #param_name) {                                                  \
+     bool v;                                                                   \
+     if (!string_util::lexical_cast(value.empty() ? "true" : value, &v))       \
+-      return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
++      return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
+              << "cannot parse \"" << value << "\" as bool.";                  \
+     message->set_##param_name(v);                                             \
+     return util::OkStatus();                                                  \
+@@ -89,7 +90,7 @@ namespace sentencepiece {
+   if (name == #param_name) {                                                  \
+     const auto it = map_name.find(absl::AsciiStrToUpper(value));              \
+     if (it == map_name.end())                                                 \
+-      return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
++      return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
+              << "unknown enumeration value of \"" << value << "\" as "        \
+              << #map_name;                                                    \
+     message->set_##param_name(it->second);                                    \
+@@ -186,7 +187,7 @@ inline std::string PrintProto(const NormalizerSpec &message,
+   return os.str();
+ }
+-util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
++absl::Status SentencePieceTrainer::SetProtoField(const std::string &name,
+                                                  const std::string &value,
+                                                  TrainerSpec *message) {
+   CHECK_OR_RETURN(message);
+@@ -239,11 +240,11 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
+   PARSE_STRING(pad_piece);
+   PARSE_STRING(unk_surface);
+-  return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
++  return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
+          << "unknown field name \"" << name << "\" in TrainerSpec.";
+ }
+-util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
++absl::Status SentencePieceTrainer::SetProtoField(const std::string &name,
+                                                  const std::string &value,
+                                                  NormalizerSpec *message) {
+   CHECK_OR_RETURN(message);
+@@ -255,7 +256,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
+   PARSE_BOOL(escape_whitespaces);
+   PARSE_STRING(normalization_rule_tsv);
+-  return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
++  return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
+          << "unknown field name \"" << name << "\" in NormalizerSpec.";
+ }
+diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc
+index 3382ddc..9dda65c 100644
+--- a/src/spm_decode_main.cc
++++ b/src/spm_decode_main.cc
+@@ -21,8 +21,8 @@
+ #include "init.h"
+ #include "sentencepiece.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/strings/str_split.h"
++#include "absl/flags/flag.h"
++#include "absl/strings/str_split.h"
+ #include "util.h"
+ ABSL_FLAG(std::string, model, "", "model file name");
+diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc
+index 4d12a38..29b7458 100644
+--- a/src/spm_encode_main.cc
++++ b/src/spm_encode_main.cc
+@@ -21,10 +21,10 @@
+ #include "init.h"
+ #include "sentencepiece.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/container/flat_hash_map.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/flags/flag.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
+ #include "trainer_interface.h"
+ ABSL_FLAG(std::string, model, "", "model file name");
+diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc
+index b5d93cb..70a65c1 100644
+--- a/src/spm_export_vocab_main.cc
++++ b/src/spm_export_vocab_main.cc
+@@ -20,7 +20,7 @@
+ #include "init.h"
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/flags/flag.h"
++#include "absl/flags/flag.h"
+ ABSL_FLAG(std::string, output, "", "Output filename");
+ ABSL_FLAG(std::string, model, "", "input model file name");
+diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc
+index 96da360..8c541b8 100644
+--- a/src/spm_normalize_main.cc
++++ b/src/spm_normalize_main.cc
+@@ -21,7 +21,7 @@
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+ #include "sentencepiece_trainer.h"
+-#include "third_party/absl/flags/flag.h"
++#include "absl/flags/flag.h"
+ ABSL_FLAG(std::string, model, "", "Model file name");
+ ABSL_FLAG(bool, use_internal_normalization, false,
+diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc
+index baf8dbf..ba1e811 100644
+--- a/src/spm_train_main.cc
++++ b/src/spm_train_main.cc
+@@ -18,10 +18,10 @@
+ #include "init.h"
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_trainer.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/strings/ascii.h"
+-#include "third_party/absl/strings/str_join.h"
+-#include "third_party/absl/strings/str_split.h"
++#include "absl/flags/flag.h"
++#include "absl/strings/ascii.h"
++#include "absl/strings/str_join.h"
++#include "absl/strings/str_split.h"
+ #include "util.h"
+ using sentencepiece::NormalizerSpec;
+diff --git a/src/testharness.cc b/src/testharness.cc
+index f6b1efe..daf2d14 100644
+--- a/src/testharness.cc
++++ b/src/testharness.cc
+@@ -26,7 +26,7 @@
+ #include <vector>
+ #include "common.h"
+-#include "third_party/absl/strings/str_cat.h"
++#include "absl/strings/str_cat.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/testharness.h b/src/testharness.h
+index 9879b06..98317ad 100644
+--- a/src/testharness.h
++++ b/src/testharness.h
+@@ -21,9 +21,9 @@
+ #include <string>
+ #include "common.h"
+-#include "third_party/absl/flags/flag.h"
+-#include "third_party/absl/flags/parse.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/flags/flag.h"
++#include "absl/flags/parse.h"
++#include "absl/strings/string_view.h"
+ ABSL_DECLARE_FLAG(std::string, test_tmpdir);
+ ABSL_DECLARE_FLAG(std::string, test_srcdir);
+diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc
+index d1d2541..ff594d0 100644
+--- a/src/trainer_factory.cc
++++ b/src/trainer_factory.cc
+@@ -14,7 +14,7 @@
+ #include "bpe_model_trainer.h"
+ #include "char_model_trainer.h"
+-#include "third_party/absl/memory/memory.h"
++#include "absl/memory/memory.h"
+ #include "trainer_factory.h"
+ #include "unigram_model_trainer.h"
+ #include "word_model_trainer.h"
+diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc
+index a3a4b74..e6a2587 100644
+--- a/src/trainer_interface.cc
++++ b/src/trainer_interface.cc
+@@ -26,13 +26,14 @@
+ #include "normalizer.h"
+ #include "sentencepiece_processor.h"
+ #include "sentencepiece_trainer.h"
+-#include "third_party/absl/container/flat_hash_map.h"
+-#include "third_party/absl/memory/memory.h"
+-#include "third_party/absl/strings/numbers.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_format.h"
+-#include "third_party/absl/strings/str_join.h"
+-#include "third_party/absl/strings/str_split.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/memory/memory.h"
++#include "absl/strings/numbers.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_format.h"
++#include "absl/strings/str_join.h"
++#include "absl/strings/str_split.h"
++#include "absl/status/status.h"
+ #include "trainer_interface.h"
+ #include "unicode_script.h"
+ #include "util.h"
+@@ -49,7 +50,7 @@ const char32 TrainerInterface::kUPPBoundaryChar = L'\u0009';
+ const char TrainerInterface::kUPPBoundaryStr[] = "\t";
+ namespace {
+-util::Status VerifySpec(const TrainerSpec &trainer_spec) {
++absl::Status VerifySpec(const TrainerSpec &trainer_spec) {
+   CHECK_GT_OR_RETURN(trainer_spec.vocab_size(), 0);
+   if (trainer_spec.model_type() == TrainerSpec::UNIGRAM ||
+@@ -164,7 +165,7 @@ bool MultiFileSentenceIterator::done() const {
+   return (!read_done_ && file_index_ == files_.size());
+ }
+-util::Status MultiFileSentenceIterator::status() const {
++absl::Status MultiFileSentenceIterator::status() const {
+   CHECK_OR_RETURN(fp_);
+   return fp_->status();
+ }
+@@ -296,7 +297,7 @@ bool TrainerInterface::IsValidSentencePiece(
+   return true;
+ }
+-util::Status TrainerInterface::LoadSentences() {
++absl::Status TrainerInterface::LoadSentences() {
+   RETURN_IF_ERROR(status());
+   CHECK_OR_RETURN(sentences_.empty());
+   CHECK_OR_RETURN(required_chars_.empty());
+@@ -537,7 +538,7 @@ void TrainerInterface::SplitSentencesByWhitespace() {
+   LOG(INFO) << "Done! " << sentences_.size();
+ }
+-util::Status TrainerInterface::Serialize(ModelProto *model_proto) const {
++absl::Status TrainerInterface::Serialize(ModelProto *model_proto) const {
+   RETURN_IF_ERROR(status());
+   // Duplicated sentencepiece is not allowed.
+@@ -611,7 +612,7 @@ util::Status TrainerInterface::Serialize(ModelProto *model_proto) const {
+   return util::OkStatus();
+ }
+-util::Status TrainerInterface::SaveModel(absl::string_view filename) const {
++absl::Status TrainerInterface::SaveModel(absl::string_view filename) const {
+   LOG(INFO) << "Saving model: " << filename;
+   ModelProto model_proto;
+   RETURN_IF_ERROR(Serialize(&model_proto));
+@@ -622,7 +623,7 @@ util::Status TrainerInterface::SaveModel(absl::string_view filename) const {
+   return util::OkStatus();
+ }
+-util::Status TrainerInterface::SaveVocab(absl::string_view filename) const {
++absl::Status TrainerInterface::SaveVocab(absl::string_view filename) const {
+   LOG(INFO) << "Saving vocabs: " << filename;
+   ModelProto model_proto;
+   RETURN_IF_ERROR(Serialize(&model_proto));
+@@ -644,7 +645,7 @@ util::Status TrainerInterface::SaveVocab(absl::string_view filename) const {
+   return util::OkStatus();
+ }
+-util::Status TrainerInterface::Save() const {
++absl::Status TrainerInterface::Save() const {
+   if (output_model_proto_) {
+     RETURN_IF_ERROR(Serialize(output_model_proto_));
+   } else {
+@@ -654,7 +655,7 @@ util::Status TrainerInterface::Save() const {
+   return util::OkStatus();
+ }
+-util::Status TrainerInterface::InitMetaPieces() {
++absl::Status TrainerInterface::InitMetaPieces() {
+   CHECK_OR_RETURN(meta_pieces_.empty());
+   bool has_unk = false;
+diff --git a/src/trainer_interface.h b/src/trainer_interface.h
+index f66d59a..b4fbc7b 100644
+--- a/src/trainer_interface.h
++++ b/src/trainer_interface.h
+@@ -27,7 +27,8 @@
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+ #include "sentencepiece_trainer.h"
+-#include "third_party/absl/container/flat_hash_map.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -57,7 +58,7 @@ class MultiFileSentenceIterator : public SentenceIterator {
+   bool done() const override;
+   void Next() override;
+   const std::string &value() const override { return value_; }
+-  util::Status status() const override;
++  absl::Status status() const override;
+  private:
+   void TryRead();
+@@ -90,16 +91,16 @@ class TrainerInterface {
+   // Loads sentence from `sentence_iterator` and stores the model
+   // to `output_model_proto`.
+-  virtual util::Status Train(SentenceIterator *sentence_iterator,
++  virtual absl::Status Train(SentenceIterator *sentence_iterator,
+                              ModelProto *output_model_proto) {
+     sentence_iterator_ = sentence_iterator;
+     output_model_proto_ = output_model_proto;
+     return Train();
+   }
+-  virtual util::Status Train() { return status(); }
++  virtual absl::Status Train() { return status(); }
+-  virtual util::Status status() const { return status_; }
++  virtual absl::Status status() const { return status_; }
+   FRIEND_TEST(TrainerInterfaceTest, IsValidSentencePieceTest);
+   FRIEND_TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest);
+@@ -115,7 +116,7 @@ class TrainerInterface {
+   // Loads all sentences from spec.input() or SentenceIterator.
+   // It loads at most input_sentence_size sentences.
+-  util::Status LoadSentences();
++  absl::Status LoadSentences();
+   // Splits all sentencecs by whitespaces and
+   // replace the |sentences_| with tokenized string.
+@@ -125,7 +126,7 @@ class TrainerInterface {
+   void SplitSentencesByWhitespace();
+   // Save model files into spec.model_prefix().
+-  util::Status Save() const;
++  absl::Status Save() const;
+   // Set of characters which must be included in the final vocab.
+   // The value of this map stores the frequency.
+@@ -152,7 +153,7 @@ class TrainerInterface {
+       meta_pieces_;
+   // Detect errors on initialization.
+-  util::Status status_;
++  absl::Status status_;
+   // Loads sentences from SentenceIterator if not null.
+   SentenceIterator *sentence_iterator_ = nullptr;
+@@ -162,19 +163,19 @@ class TrainerInterface {
+  private:
+   // Serialize final_pieces_ to |model_proto|.
+-  util::Status Serialize(ModelProto *model_proto) const;
++  absl::Status Serialize(ModelProto *model_proto) const;
+   // Saves the best sentence split with the current model for debugging.
+-  util::Status SaveSplits(absl::string_view filename) const;
++  absl::Status SaveSplits(absl::string_view filename) const;
+   // Saves model file.
+-  util::Status SaveModel(absl::string_view filename) const;
++  absl::Status SaveModel(absl::string_view filename) const;
+   // Saves vocabulary file for NMT.
+-  util::Status SaveVocab(absl::string_view filename) const;
++  absl::Status SaveVocab(absl::string_view filename) const;
+   // Initializes `meta_pieces_` from TrainerSpec.
+-  util::Status InitMetaPieces();
++  absl::Status InitMetaPieces();
+   // Randomly sampled raw sentences for self-testing.
+   std::vector<std::string> self_test_samples_;
+diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc
+index 70a51ad..d7f3f0c 100644
+--- a/src/trainer_interface_test.cc
++++ b/src/trainer_interface_test.cc
+@@ -16,8 +16,8 @@
+ #include "filesystem.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_format.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_format.h"
+ #include "trainer_interface.h"
+ #include "util.h"
+diff --git a/src/unicode_script.cc b/src/unicode_script.cc
+index 583dc30..11b24dc 100644
+--- a/src/unicode_script.cc
++++ b/src/unicode_script.cc
+@@ -14,7 +14,7 @@
+ #include <unordered_map>
+-#include "third_party/absl/container/flat_hash_map.h"
++#include "absl/container/flat_hash_map.h"
+ #include "unicode_script.h"
+ #include "unicode_script_map.h"
+ #include "util.h"
+diff --git a/src/unicode_script_map.h b/src/unicode_script_map.h
+index f2e67e9..f1b8299 100644
+--- a/src/unicode_script_map.h
++++ b/src/unicode_script_map.h
+@@ -14,7 +14,7 @@
+ #ifndef UNICODE_SCRIPT_DATA_H_
+ #define UNICODE_SCRIPT_DATA_H_
+-#include "third_party/absl/container/flat_hash_map.h"
++#include "absl/container/flat_hash_map.h"
+ namespace sentencepiece {
+ namespace unicode_script {
+ namespace {
+diff --git a/src/unicode_script_test.cc b/src/unicode_script_test.cc
+index ab33565..e0b1c4d 100644
+--- a/src/unicode_script_test.cc
++++ b/src/unicode_script_test.cc
+@@ -14,7 +14,7 @@
+ #include "common.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
+ #include "unicode_script.h"
+ #include "util.h"
+diff --git a/src/unigram_model.cc b/src/unigram_model.cc
+index 3b99060..9c72fb9 100644
+--- a/src/unigram_model.cc
++++ b/src/unigram_model.cc
+@@ -22,9 +22,9 @@
+ #include <utility>
+ #include <vector>
+-#include "third_party/absl/memory/memory.h"
+-#include "third_party/absl/strings/str_split.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/memory/memory.h"
++#include "absl/strings/str_split.h"
++#include "absl/strings/string_view.h"
+ #include "unigram_model.h"
+ #include "util.h"
+diff --git a/src/unigram_model.h b/src/unigram_model.h
+index 448e489..9062f12 100644
+--- a/src/unigram_model.h
++++ b/src/unigram_model.h
+@@ -24,7 +24,7 @@
+ #include "freelist.h"
+ #include "model_interface.h"
+ #include "sentencepiece_model.pb.h"
+-#include "third_party/darts_clone/darts.h"
++#include "include/darts.h"
+ namespace sentencepiece {
+ namespace unigram {
+diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc
+index f93b21c..808e907 100644
+--- a/src/unigram_model_test.cc
++++ b/src/unigram_model_test.cc
+@@ -22,8 +22,8 @@
+ #include "sentencepiece_model.pb.h"
+ #include "sentencepiece_processor.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
+ #include "util.h"
+ namespace sentencepiece {
+diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc
+index 9615040..7d16bd2 100644
+--- a/src/unigram_model_trainer.cc
++++ b/src/unigram_model_trainer.cc
+@@ -25,8 +25,9 @@
+ #include "normalizer.h"
+ #include "pretokenizer_for_training.h"
+ #include "sentencepiece_trainer.h"
+-#include "third_party/absl/container/flat_hash_map.h"
+-#include "third_party/absl/memory/memory.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/memory/memory.h"
++#include "absl/status/status.h"
+ #include "third_party/esaxx/esa.hxx"  // Suffix array library.
+ #include "unicode_script.h"
+ #include "unigram_model_trainer.h"
+@@ -463,7 +464,7 @@ TrainerModel::SentencePieces Trainer::FinalizeSentencePieces(
+   return Sorted(final_sentencepieces);
+ }
+-util::Status Trainer::Train() {
++absl::Status Trainer::Train() {
+   RETURN_IF_ERROR(status());
+   CHECK_EQ_OR_RETURN(TrainerSpec::UNIGRAM, trainer_spec_.model_type());
+diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h
+index 91fbeb4..d41967d 100644
+--- a/src/unigram_model_trainer.h
++++ b/src/unigram_model_trainer.h
+@@ -21,7 +21,8 @@
+ #include <vector>
+ #include "sentencepiece_model.pb.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ #include "trainer_interface.h"
+ #include "unigram_model.h"
+ #include "util.h"
+@@ -68,7 +69,7 @@ class Trainer : public TrainerInterface {
+       : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
+                                            denormalizer_spec) {}
+-  util::Status Train() override;
++  absl::Status Train() override;
+  private:
+   FRIEND_TEST(TrainerTest, IsValidSentencePieceTest);
+diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc
+index ffe515e..fdb25f6 100644
+--- a/src/unigram_model_trainer_test.cc
++++ b/src/unigram_model_trainer_test.cc
+@@ -16,8 +16,8 @@
+ #include "sentencepiece_processor.h"
+ #include "sentencepiece_trainer.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
+ #include "unigram_model_trainer.h"
+ #include "util.h"
+diff --git a/src/util.h b/src/util.h
+index 0d15863..7122c7c 100644
+--- a/src/util.h
++++ b/src/util.h
+@@ -30,7 +30,8 @@
+ #include "common.h"
+ #include "sentencepiece_processor.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ #ifdef SPM_NO_THREADLOCAL
+ #include <pthread.h>
+@@ -359,14 +360,14 @@ std::string StrError(int errnum);
+ std::vector<std::string> StrSplitAsCSV(absl::string_view text);
+-inline Status OkStatus() { return Status(); }
++inline absl::Status OkStatus() { return absl::Status(); }
+ #define DECLARE_ERROR(FUNC)                                \
+-  inline util::Status FUNC##Error(absl::string_view str) { \
+-    return util::Status(StatusCode::k##FUNC, str.data());  \
++  inline absl::Status FUNC##Error(absl::string_view str) { \
++    return absl::Status(absl::StatusCode::k##FUNC, str.data());  \
+   }                                                        \
+-  inline bool Is##FUNC(const util::Status &status) {       \
+-    return status.code() == StatusCode::k##FUNC;           \
++  inline bool Is##FUNC(const absl::Status &status) {       \
++    return status.code() ==absl::StatusCode::k##FUNC;           \
+   }
+ DECLARE_ERROR(Cancelled)
+@@ -390,8 +391,8 @@ DECLARE_ERROR(Unauthenticated)
+ class StatusBuilder {
+  public:
+-  explicit StatusBuilder(StatusCode code) : code_(code) {}
+-  explicit StatusBuilder(StatusCode code, int loc) : code_(code) {}
++  explicit StatusBuilder(absl::StatusCode code) : code_(code) {}
++  explicit StatusBuilder(absl::StatusCode code, int loc) : code_(code) {}
+   template <typename T>
+   StatusBuilder &operator<<(const T &value) {
+@@ -399,10 +400,10 @@ class StatusBuilder {
+     return *this;
+   }
+-  operator Status() const { return Status(code_, os_.str()); }
++  operator absl::Status() const { return absl::Status(code_, os_.str()); }
+  private:
+-  StatusCode code_;
++  absl::StatusCode code_;
+   std::ostringstream os_;
+ };
+@@ -410,7 +411,7 @@ class StatusBuilder {
+   if (condition) {                                           \
+   } else /* NOLINT */                                        \
+     return ::sentencepiece::util::StatusBuilder(             \
+-               ::sentencepiece::util::StatusCode::kInternal) \
++               ::absl::StatusCode::kInternal) \
+            << __FILE__ << "(" << __LINE__ << ") [" << #condition << "] "
+ #define CHECK_EQ_OR_RETURN(a, b) CHECK_OR_RETURN((a) == (b))
+diff --git a/src/util_test.cc b/src/util_test.cc
+index 71d006f..67290dc 100644
+--- a/src/util_test.cc
++++ b/src/util_test.cc
+@@ -16,7 +16,8 @@
+ #include "filesystem.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
++#include "absl/strings/str_cat.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ namespace sentencepiece {
+@@ -376,27 +377,27 @@ TEST(UtilTest, STLDeleteELementsTest) {
+ }
+ TEST(UtilTest, StatusTest) {
+-  const util::Status ok;
++  const absl::Status ok;
+   EXPECT_TRUE(ok.ok());
+-  EXPECT_EQ(util::StatusCode::kOk, ok.code());
++  EXPECT_EQ(absl::StatusCode::kOk, ok.code());
+   EXPECT_EQ(std::string(""), ok.message());
+-  const util::Status s1(util::StatusCode::kUnknown, "unknown");
+-  const util::Status s2(util::StatusCode::kUnknown, std::string("unknown"));
++  const absl::Status s1(absl::StatusCode::kUnknown, "unknown");
++  const absl::Status s2(absl::StatusCode::kUnknown, std::string("unknown"));
+-  EXPECT_EQ(util::StatusCode::kUnknown, s1.code());
+-  EXPECT_EQ(util::StatusCode::kUnknown, s2.code());
++  EXPECT_EQ(absl::StatusCode::kUnknown, s1.code());
++  EXPECT_EQ(absl::StatusCode::kUnknown, s2.code());
+   EXPECT_EQ(std::string("unknown"), s1.message());
+   EXPECT_EQ(std::string("unknown"), s2.message());
+   auto ok2 = util::OkStatus();
+   EXPECT_TRUE(ok2.ok());
+-  EXPECT_EQ(util::StatusCode::kOk, ok2.code());
++  EXPECT_EQ(absl::StatusCode::kOk, ok2.code());
+   EXPECT_EQ(std::string(""), ok2.message());
+   util::OkStatus().IgnoreError();
+   for (int i = 1; i <= 16; ++i) {
+-    util::Status s(static_cast<util::StatusCode>(i), "message");
++    absl::Status s(static_cast<absl::StatusCode>(i), "message");
+     EXPECT_TRUE(s.ToString().find("message") != std::string::npos)
+         << s.ToString();
+   }
+diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc
+index 0b8b062..bc1f86b 100644
+--- a/src/word_model_trainer.cc
++++ b/src/word_model_trainer.cc
+@@ -15,8 +15,9 @@
+ #include <cmath>
+ #include <string>
+-#include "third_party/absl/container/flat_hash_map.h"
+-#include "third_party/absl/strings/string_view.h"
++#include "absl/container/flat_hash_map.h"
++#include "absl/strings/string_view.h"
++#include "absl/status/status.h"
+ #include "util.h"
+ #include "word_model.h"
+ #include "word_model_trainer.h"
+@@ -24,7 +25,7 @@
+ namespace sentencepiece {
+ namespace word {
+-util::Status Trainer::Train() {
++absl::Status Trainer::Train() {
+   RETURN_IF_ERROR(status());
+   CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
+diff --git a/src/word_model_trainer.h b/src/word_model_trainer.h
+index 76f8f32..436e595 100644
+--- a/src/word_model_trainer.h
++++ b/src/word_model_trainer.h
+@@ -17,6 +17,7 @@
+ #include "sentencepiece_model.pb.h"
+ #include "trainer_interface.h"
++#include "absl/status/status.h"
+ namespace sentencepiece {
+ namespace word {
+@@ -34,7 +35,7 @@ class Trainer : public TrainerInterface {
+       : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
+                                            denormalizer_spec) {}
+-  util::Status Train() override;
++  absl::Status Train() override;
+ };
+ }  // namespace word
+ }  // namespace sentencepiece
+diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc
+index c4a8bc6..366810f 100644
+--- a/src/word_model_trainer_test.cc
++++ b/src/word_model_trainer_test.cc
+@@ -18,8 +18,8 @@
+ #include "filesystem.h"
+ #include "sentencepiece_processor.h"
+ #include "testharness.h"
+-#include "third_party/absl/strings/str_cat.h"
+-#include "third_party/absl/strings/str_join.h"
++#include "absl/strings/str_cat.h"
++#include "absl/strings/str_join.h"
+ #include "util.h"
+ #include "word_model_trainer.h"

gemma.cpp/build/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+*
+!.gitignore
+!.hgignore

gemma.cpp/build/CMakeCache.txt ADDED Viewed

	@@ -0,0 +1,982 @@

+# This is the CMakeCache file.
+# For build in directory: /content/gemma.cpp/build
+# It was generated by CMake: /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
+# You can edit this file to change values found and used by cmake.
+# If you do not want to change any of the values, simply exit the editor.
+# If you do want to change a value, simply edit, save, and exit the editor.
+# The syntax for the file is as follows:
+# KEY:TYPE=VALUE
+# KEY is the name of a variable in the cache.
+# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
+# VALUE is the current value for the KEY.
+########################
+# EXTERNAL cache entries
+########################
+//Build a 32 bit version of the library.
+BENCHMARK_BUILD_32_BITS:BOOL=OFF
+//Flags used by the C++ compiler during coverage builds.
+BENCHMARK_CXX_FLAGS_COVERAGE:STRING=-g
+//Allow the downloading and in-tree building of unmet dependencies
+BENCHMARK_DOWNLOAD_DEPENDENCIES:BOOL=OFF
+//Enable building and running the assembly tests
+BENCHMARK_ENABLE_ASSEMBLY_TESTS:BOOL=OFF
+//Build documentation with Doxygen.
+BENCHMARK_ENABLE_DOXYGEN:BOOL=OFF
+//Enable the use of exceptions in the benchmark library.
+BENCHMARK_ENABLE_EXCEPTIONS:BOOL=ON
+//Enable building the unit tests which depend on gtest
+BENCHMARK_ENABLE_GTEST_TESTS:BOOL=ON
+//Enable installation of benchmark. (Projects embedding benchmark
+// may want to turn this OFF.)
+BENCHMARK_ENABLE_INSTALL:BOOL=ON
+//Enable performance counters provided by libpfm
+BENCHMARK_ENABLE_LIBPFM:BOOL=OFF
+//Enable link time optimisation of the benchmark library.
+BENCHMARK_ENABLE_LTO:BOOL=OFF
+//Enable testing of the benchmark library.
+BENCHMARK_ENABLE_TESTING:BOOL=ON
+//Build Release candidates with -Werror.
+BENCHMARK_ENABLE_WERROR:BOOL=ON
+//Flags used for linking binaries during coverage builds.
+BENCHMARK_EXE_LINKER_FLAGS_COVERAGE:STRING=
+//Build Release candidates with -Werror regardless of compiler
+// issues.
+BENCHMARK_FORCE_WERROR:BOOL=OFF
+//Enable installation of documentation.
+BENCHMARK_INSTALL_DOCS:BOOL=ON
+//Flags used by the shared libraries linker during coverage builds.
+BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE:STRING=
+//Use bundled GoogleTest. If disabled, the find_package(GTest)
+// will be used.
+BENCHMARK_USE_BUNDLED_GTEST:BOOL=ON
+//Build and test using libc++ as the standard library.
+BENCHMARK_USE_LIBCXX:BOOL=OFF
+//Builds the googlemock subproject
+BUILD_GMOCK:BOOL=ON
+//Build shared libraries
+BUILD_SHARED_LIBS:BOOL=OFF
+//Build the testing tree.
+BUILD_TESTING:BOOL=ON
+//Path to a program.
+CMAKE_ADDR2LINE:FILEPATH=/usr/bin/addr2line
+//Path to a program.
+CMAKE_AR:FILEPATH=/usr/bin/ar
+//Choose the type of build, options are: None Debug Release RelWithDebInfo
+// MinSizeRel ...
+CMAKE_BUILD_TYPE:STRING=
+//Enable/Disable color output during build.
+CMAKE_COLOR_MAKEFILE:BOOL=ON
+//CXX compiler
+CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++
+//A wrapper around 'ar' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_CXX_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11
+//A wrapper around 'ranlib' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_CXX_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11
+//Flags used by the CXX compiler during all build types.
+CMAKE_CXX_FLAGS:STRING=
+//Flags used by the CXX compiler during DEBUG builds.
+CMAKE_CXX_FLAGS_DEBUG:STRING=-g
+//Flags used by the CXX compiler during MINSIZEREL builds.
+CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+//Flags used by the CXX compiler during RELEASE builds.
+CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+//Flags used by the CXX compiler during RELWITHDEBINFO builds.
+CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
+//C compiler
+CMAKE_C_COMPILER:FILEPATH=/usr/bin/cc
+//A wrapper around 'ar' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_C_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11
+//A wrapper around 'ranlib' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_C_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11
+//Flags used by the C compiler during all build types.
+CMAKE_C_FLAGS:STRING=
+//Flags used by the C compiler during DEBUG builds.
+CMAKE_C_FLAGS_DEBUG:STRING=-g
+//Flags used by the C compiler during MINSIZEREL builds.
+CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+//Flags used by the C compiler during RELEASE builds.
+CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+//Flags used by the C compiler during RELWITHDEBINFO builds.
+CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
+//Path to a program.
+CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND
+//Flags used by the linker during all build types.
+CMAKE_EXE_LINKER_FLAGS:STRING=
+//Flags used by the linker during DEBUG builds.
+CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=
+//Flags used by the linker during MINSIZEREL builds.
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=
+//Flags used by the linker during RELEASE builds.
+CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=
+//Flags used by the linker during RELWITHDEBINFO builds.
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+//Enable/Disable output of compile commands during generation.
+CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=
+//Value Computed by CMake.
+CMAKE_FIND_PACKAGE_REDIRECTS_DIR:STATIC=/content/gemma.cpp/build/CMakeFiles/pkgRedirects
+//User executables (bin)
+CMAKE_INSTALL_BINDIR:PATH=bin
+//Read-only architecture-independent data (DATAROOTDIR)
+CMAKE_INSTALL_DATADIR:PATH=
+//Read-only architecture-independent data root (share)
+CMAKE_INSTALL_DATAROOTDIR:PATH=share
+//Documentation root (DATAROOTDIR/doc/PROJECT_NAME)
+CMAKE_INSTALL_DOCDIR:PATH=
+//C header files (include)
+CMAKE_INSTALL_INCLUDEDIR:PATH=include
+//Info documentation (DATAROOTDIR/info)
+CMAKE_INSTALL_INFODIR:PATH=
+//Object code libraries (lib)
+CMAKE_INSTALL_LIBDIR:PATH=lib
+//Program executables (libexec)
+CMAKE_INSTALL_LIBEXECDIR:PATH=libexec
+//Locale-dependent data (DATAROOTDIR/locale)
+CMAKE_INSTALL_LOCALEDIR:PATH=
+//Modifiable single-machine data (var)
+CMAKE_INSTALL_LOCALSTATEDIR:PATH=var
+//Man documentation (DATAROOTDIR/man)
+CMAKE_INSTALL_MANDIR:PATH=
+//C header files for non-gcc (/usr/include)
+CMAKE_INSTALL_OLDINCLUDEDIR:PATH=/usr/include
+//Install path prefix, prepended onto install directories.
+CMAKE_INSTALL_PREFIX:PATH=/usr/local
+//Run-time variable data (LOCALSTATEDIR/run)
+CMAKE_INSTALL_RUNSTATEDIR:PATH=
+//System admin executables (sbin)
+CMAKE_INSTALL_SBINDIR:PATH=sbin
+//Modifiable architecture-independent data (com)
+CMAKE_INSTALL_SHAREDSTATEDIR:PATH=com
+//Read-only single-machine data (etc)
+CMAKE_INSTALL_SYSCONFDIR:PATH=etc
+//Path to a program.
+CMAKE_LINKER:FILEPATH=/usr/bin/ld
+//Path to a program.
+CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake
+//Flags used by the linker during the creation of modules during
+// all build types.
+CMAKE_MODULE_LINKER_FLAGS:STRING=
+//Flags used by the linker during the creation of modules during
+// DEBUG builds.
+CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=
+//Flags used by the linker during the creation of modules during
+// MINSIZEREL builds.
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=
+//Flags used by the linker during the creation of modules during
+// RELEASE builds.
+CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=
+//Flags used by the linker during the creation of modules during
+// RELWITHDEBINFO builds.
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+//Path to a program.
+CMAKE_NM:FILEPATH=/usr/bin/nm
+//Path to a program.
+CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy
+//Path to a program.
+CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump
+//Value Computed by CMake
+CMAKE_PROJECT_DESCRIPTION:STATIC=
+//Value Computed by CMake
+CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
+//Value Computed by CMake
+CMAKE_PROJECT_NAME:STATIC=gemma
+//Value Computed by CMake
+CMAKE_PROJECT_VERSION:STATIC=1.2.0
+//Value Computed by CMake
+CMAKE_PROJECT_VERSION_MAJOR:STATIC=1
+//Value Computed by CMake
+CMAKE_PROJECT_VERSION_MINOR:STATIC=2
+//Value Computed by CMake
+CMAKE_PROJECT_VERSION_PATCH:STATIC=0
+//Value Computed by CMake
+CMAKE_PROJECT_VERSION_TWEAK:STATIC=
+//Path to a program.
+CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib
+//Path to a program.
+CMAKE_READELF:FILEPATH=/usr/bin/readelf
+//Flags used by the linker during the creation of shared libraries
+// during all build types.
+CMAKE_SHARED_LINKER_FLAGS:STRING=
+//Flags used by the linker during the creation of shared libraries
+// during DEBUG builds.
+CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=
+//Flags used by the linker during the creation of shared libraries
+// during MINSIZEREL builds.
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=
+//Flags used by the linker during the creation of shared libraries
+// during RELEASE builds.
+CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=
+//Flags used by the linker during the creation of shared libraries
+// during RELWITHDEBINFO builds.
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+//If set, runtime paths are not added when installing shared libraries,
+// but are added when building.
+CMAKE_SKIP_INSTALL_RPATH:BOOL=NO
+//If set, runtime paths are not added when using shared libraries.
+CMAKE_SKIP_RPATH:BOOL=NO
+//Flags used by the linker during the creation of static libraries
+// during all build types.
+CMAKE_STATIC_LINKER_FLAGS:STRING=
+//Flags used by the linker during the creation of static libraries
+// during DEBUG builds.
+CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING=
+//Flags used by the linker during the creation of static libraries
+// during MINSIZEREL builds.
+CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING=
+//Flags used by the linker during the creation of static libraries
+// during RELEASE builds.
+CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING=
+//Flags used by the linker during the creation of static libraries
+// during RELWITHDEBINFO builds.
+CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+//Path to a program.
+CMAKE_STRIP:FILEPATH=/usr/bin/strip
+//Path to a program.
+CMAKE_TAPI:FILEPATH=CMAKE_TAPI-NOTFOUND
+//If this value is on, makefiles will be generated without the
+// .SILENT directive, and all commands will be echoed to the console
+// during the make.  This is useful for debugging only. With Visual
+// Studio IDE projects all commands are done without /nologo.
+CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE
+//Path to the coverage program that CTest uses for performing coverage
+// inspection
+COVERAGE_COMMAND:FILEPATH=/usr/bin/gcov
+//Extra command line flags to pass to the coverage tool
+COVERAGE_EXTRA_FLAGS:STRING=-l
+//How many times to retry timed-out CTest submissions.
+CTEST_SUBMIT_RETRY_COUNT:STRING=3
+//How long to wait between timed-out CTest submissions.
+CTEST_SUBMIT_RETRY_DELAY:STRING=5
+//OFF
+CXXFEATURECHECK_DEBUG:BOOL=OFF
+//Maximum time allowed before CTest will kill the test.
+DART_TESTING_TIMEOUT:STRING=1500
+//Directory under which to collect all populated content
+FETCHCONTENT_BASE_DIR:PATH=/content/gemma.cpp/build/_deps
+//Disables all attempts to download or update content and assumes
+// source dirs already exist
+FETCHCONTENT_FULLY_DISCONNECTED:BOOL=OFF
+//Enables QUIET option for all content population
+FETCHCONTENT_QUIET:BOOL=ON
+//When not empty, overrides where to find pre-populated content
+// for benchmark
+FETCHCONTENT_SOURCE_DIR_BENCHMARK:PATH=
+//When not empty, overrides where to find pre-populated content
+// for highway
+FETCHCONTENT_SOURCE_DIR_HIGHWAY:PATH=
+//When not empty, overrides where to find pre-populated content
+// for json
+FETCHCONTENT_SOURCE_DIR_JSON:PATH=
+//When not empty, overrides where to find pre-populated content
+// for sentencepiece
+FETCHCONTENT_SOURCE_DIR_SENTENCEPIECE:PATH=
+//Enables UPDATE_DISCONNECTED behavior for all content population
+FETCHCONTENT_UPDATES_DISCONNECTED:BOOL=OFF
+//Enables UPDATE_DISCONNECTED behavior just for population of benchmark
+FETCHCONTENT_UPDATES_DISCONNECTED_BENCHMARK:BOOL=OFF
+//Enables UPDATE_DISCONNECTED behavior just for population of highway
+FETCHCONTENT_UPDATES_DISCONNECTED_HIGHWAY:BOOL=OFF
+//Enables UPDATE_DISCONNECTED behavior just for population of json
+FETCHCONTENT_UPDATES_DISCONNECTED_JSON:BOOL=OFF
+//Enables UPDATE_DISCONNECTED behavior just for population of sentencepiece
+FETCHCONTENT_UPDATES_DISCONNECTED_SENTENCEPIECE:BOOL=OFF
+//Enable Gemma tests
+GEMMA_ENABLE_TESTS:BOOL=OFF
+//Path to a program.
+GITCOMMAND:FILEPATH=/usr/bin/git
+//Git command line client
+GIT_EXECUTABLE:FILEPATH=/usr/bin/git
+//Set copts for Armv7 with NEON (requires vfpv4)?
+HWY_CMAKE_ARM7:BOOL=OFF
+//Change to header-only?
+HWY_CMAKE_HEADER_ONLY:BOOL=OFF
+//Set copts for RISCV with RVV?
+HWY_CMAKE_RVV:BOOL=ON
+//Set SSE2 as baseline for 32-bit x86?
+HWY_CMAKE_SSE2:BOOL=OFF
+//Include contrib/
+HWY_ENABLE_CONTRIB:BOOL=ON
+//Build examples
+HWY_ENABLE_EXAMPLES:BOOL=ON
+//Install library
+HWY_ENABLE_INSTALL:BOOL=ON
+//Enable HWY tests
+HWY_ENABLE_TESTS:BOOL=ON
+//Ignore BUILD_SHARED_LIBS
+HWY_FORCE_STATIC_LIBS:BOOL=OFF
+//Use pre-installed googletest?
+HWY_SYSTEM_GTEST:BOOL=OFF
+//Add -Werror flag?
+HWY_WARNINGS_ARE_ERRORS:BOOL=OFF
+//Enable installation of googletest. (Projects embedding googletest
+// may want to turn this OFF.)
+INSTALL_GTEST:BOOL=ON
+//Build the unit tests when BUILD_TESTING is enabled.
+JSON_BuildTests:BOOL=OFF
+//Enable CI build targets.
+JSON_CI:BOOL=OFF
+//Use extended diagnostic messages.
+JSON_Diagnostics:BOOL=OFF
+//Disable default integer enum serialization.
+JSON_DisableEnumSerialization:BOOL=OFF
+//Place use-defined string literals in the global namespace.
+JSON_GlobalUDLs:BOOL=ON
+//Enable implicit conversions.
+JSON_ImplicitConversions:BOOL=ON
+//Install CMake targets during install step.
+JSON_Install:BOOL=OFF
+//Enable legacy discarded value comparison.
+JSON_LegacyDiscardedValueComparison:BOOL=OFF
+//Use non-amalgamated version of the library.
+JSON_MultipleHeaders:BOOL=ON
+//Include as system headers (skip for clang-tidy).
+JSON_SystemInclude:BOOL=OFF
+//Path to a program.
+LLVM_FILECHECK_EXE:FILEPATH=LLVM_FILECHECK_EXE-NOTFOUND
+//Command to build the project
+MAKECOMMAND:STRING=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake --build . --config "${CTEST_CONFIGURATION_TYPE}"
+//Path to the memory checking command, used for memory error detection.
+MEMORYCHECK_COMMAND:FILEPATH=/usr/local/cuda/bin/compute-sanitizer
+//File that contains suppressions for the memory checker
+MEMORYCHECK_SUPPRESSIONS_FILE:FILEPATH=
+//Name of the computer/site where compile is being run
+SITE:STRING=4ac239cc9fe6
+//Provider of absl library
+SPM_ABSL_PROVIDER:STRING=internal
+//Builds test binaries.
+SPM_BUILD_TEST:BOOL=OFF
+//Runs gcov to test coverage.
+SPM_COVERAGE:BOOL=OFF
+//Override system processor
+SPM_CROSS_SYSTEM_PROCESSOR,:BOOL=OFF
+//Use /MT flag in MSVC build
+SPM_ENABLE_MSVC_MT_BUILD,:BOOL=OFF
+//Enables NFKC compile
+SPM_ENABLE_NFKC_COMPILE:BOOL=OFF
+//Builds shared libaries in addition to static libraries.
+SPM_ENABLE_SHARED:BOOL=ON
+//Enable TCMalloc if available.
+SPM_ENABLE_TCMALLOC:BOOL=ON
+//Makes a tensorflow compatible shared file.
+SPM_ENABLE_TENSORFLOW_SHARED:BOOL=OFF
+//Disable thread_local operator
+SPM_NO_THREADLOCAL:BOOL=OFF
+//Provider of protobuf library
+SPM_PROTOBUF_PROVIDER:STRING=internal
+//Link static library of TCMALLOC.
+SPM_TCMALLOC_STATIC:BOOL=OFF
+//Path to a library.
+TCMALLOC_LIB:FILEPATH=TCMALLOC_LIB-NOTFOUND
+//Value Computed by CMake
+benchmark_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/benchmark-build
+//Value Computed by CMake
+benchmark_IS_TOP_LEVEL:STATIC=OFF
+//Value Computed by CMake
+benchmark_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/benchmark-src
+//Value Computed by CMake
+gemma_BINARY_DIR:STATIC=/content/gemma.cpp/build
+//Value Computed by CMake
+gemma_IS_TOP_LEVEL:STATIC=ON
+//Value Computed by CMake
+gemma_SOURCE_DIR:STATIC=/content/gemma.cpp
+//Value Computed by CMake
+gmock_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-build/googlemock
+//Value Computed by CMake
+gmock_IS_TOP_LEVEL:STATIC=OFF
+//Dependencies for the target
+gmock_LIB_DEPENDS:STATIC=general;gtest;
+//Value Computed by CMake
+gmock_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-src/googlemock
+//Build all of Google Mock's own tests.
+gmock_build_tests:BOOL=OFF
+//Dependencies for the target
+gmock_main_LIB_DEPENDS:STATIC=general;gmock;
+//Value Computed by CMake
+googletest-distribution_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-build
+//Value Computed by CMake
+googletest-distribution_IS_TOP_LEVEL:STATIC=OFF
+//Value Computed by CMake
+googletest-distribution_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-src
+//Value Computed by CMake
+gtest_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-build/googletest
+//Value Computed by CMake
+gtest_IS_TOP_LEVEL:STATIC=OFF
+//Value Computed by CMake
+gtest_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-src/googletest
+//Build gtest's sample programs.
+gtest_build_samples:BOOL=OFF
+//Build all of gtest's own tests.
+gtest_build_tests:BOOL=OFF
+//Disable uses of pthreads in gtest.
+gtest_disable_pthreads:BOOL=OFF
+//Use shared (DLL) run-time lib even when Google Test is built
+// as static lib.
+gtest_force_shared_crt:BOOL=ON
+//Build gtest with internal symbols hidden in shared libraries.
+gtest_hide_internal_symbols:BOOL=OFF
+//Dependencies for the target
+gtest_main_LIB_DEPENDS:STATIC=general;gtest;
+//Value Computed by CMake
+hwy_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build
+//Value Computed by CMake
+hwy_IS_TOP_LEVEL:STATIC=OFF
+//Value Computed by CMake
+hwy_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-src
+//Dependencies for the target
+hwy_contrib_LIB_DEPENDS:STATIC=general;hwy;
+//Dependencies for the target
+hwy_test_LIB_DEPENDS:STATIC=general;hwy;
+//Dependencies for the target
+libgemma_LIB_DEPENDS:STATIC=general;hwy;general;hwy_contrib;general;sentencepiece-static;
+//Value Computed by CMake
+nlohmann_json_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/json-build
+//Value Computed by CMake
+nlohmann_json_IS_TOP_LEVEL:STATIC=OFF
+//Value Computed by CMake
+nlohmann_json_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/json-src
+//Value Computed by CMake
+sentencepiece_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/sentencepiece-build
+//Value Computed by CMake
+sentencepiece_IS_TOP_LEVEL:STATIC=OFF
+//Value Computed by CMake
+sentencepiece_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/sentencepiece-src
+//Dependencies for the target
+sentencepiece_train_LIB_DEPENDS:STATIC=general;sentencepiece;
+########################
+# INTERNAL cache entries
+########################
+//Test ATOMICS_LOCK_FREE_INSTRUCTIONS
+ATOMICS_LOCK_FREE_INSTRUCTIONS:INTERNAL=1
+//ADVANCED property for variable: BENCHMARK_CXX_FLAGS_COVERAGE
+BENCHMARK_CXX_FLAGS_COVERAGE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: BENCHMARK_EXE_LINKER_FLAGS_COVERAGE
+BENCHMARK_EXE_LINKER_FLAGS_COVERAGE-ADVANCED:INTERNAL=1
+//Test BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG
+BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG:INTERNAL=1
+//Test BENCHMARK_HAS_O3_FLAG
+BENCHMARK_HAS_O3_FLAG:INTERNAL=1
+//Test BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH
+BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH:INTERNAL=1
+//Test BENCHMARK_HAS_WNO_ODR
+BENCHMARK_HAS_WNO_ODR:INTERNAL=1
+//ADVANCED property for variable: BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE
+BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: BUILD_SHARED_LIBS
+BUILD_SHARED_LIBS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_ADDR2LINE
+CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_AR
+CMAKE_AR-ADVANCED:INTERNAL=1
+//This is the directory where this CMakeCache.txt was created
+CMAKE_CACHEFILE_DIR:INTERNAL=/content/gemma.cpp/build
+//Major version of cmake used to create the current loaded cache
+CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
+//Minor version of cmake used to create the current loaded cache
+CMAKE_CACHE_MINOR_VERSION:INTERNAL=27
+//Patch version of cmake used to create the current loaded cache
+CMAKE_CACHE_PATCH_VERSION:INTERNAL=9
+//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE
+CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1
+//Path to CMake executable.
+CMAKE_COMMAND:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
+//Path to cpack program executable.
+CMAKE_CPACK_COMMAND:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/cpack
+//ADVANCED property for variable: CMAKE_CTEST_COMMAND
+CMAKE_CTEST_COMMAND-ADVANCED:INTERNAL=1
+//Path to ctest program executable.
+CMAKE_CTEST_COMMAND:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/ctest
+//ADVANCED property for variable: CMAKE_CXX_COMPILER
+CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_COMPILER_AR
+CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB
+CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS
+CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG
+CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL
+CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE
+CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO
+CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Test CMAKE_CXX_LINK_NO_PIE_SUPPORTED
+CMAKE_CXX_LINK_NO_PIE_SUPPORTED:INTERNAL=1
+//Test CMAKE_CXX_LINK_PIE_SUPPORTED
+CMAKE_CXX_LINK_PIE_SUPPORTED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER
+CMAKE_C_COMPILER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER_AR
+CMAKE_C_COMPILER_AR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER_RANLIB
+CMAKE_C_COMPILER_RANLIB-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS
+CMAKE_C_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG
+CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL
+CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE
+CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO
+CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_DLLTOOL
+CMAKE_DLLTOOL-ADVANCED:INTERNAL=1
+//Executable file format
+CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS
+CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG
+CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE
+CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS
+CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1
+//Name of external makefile project generator.
+CMAKE_EXTRA_GENERATOR:INTERNAL=
+//Name of generator.
+CMAKE_GENERATOR:INTERNAL=Unix Makefiles
+//Generator instance identifier.
+CMAKE_GENERATOR_INSTANCE:INTERNAL=
+//Name of generator platform.
+CMAKE_GENERATOR_PLATFORM:INTERNAL=
+//Name of generator toolset.
+CMAKE_GENERATOR_TOOLSET:INTERNAL=
+//Test CMAKE_HAVE_LIBC_PTHREAD
+CMAKE_HAVE_LIBC_PTHREAD:INTERNAL=1
+//Source directory with the top level CMakeLists.txt file for this
+// project
+CMAKE_HOME_DIRECTORY:INTERNAL=/content/gemma.cpp
+//ADVANCED property for variable: CMAKE_INSTALL_BINDIR
+CMAKE_INSTALL_BINDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_DATADIR
+CMAKE_INSTALL_DATADIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_DATAROOTDIR
+CMAKE_INSTALL_DATAROOTDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_DOCDIR
+CMAKE_INSTALL_DOCDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_INCLUDEDIR
+CMAKE_INSTALL_INCLUDEDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_INFODIR
+CMAKE_INSTALL_INFODIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_LIBDIR
+CMAKE_INSTALL_LIBDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_LIBEXECDIR
+CMAKE_INSTALL_LIBEXECDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_LOCALEDIR
+CMAKE_INSTALL_LOCALEDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_LOCALSTATEDIR
+CMAKE_INSTALL_LOCALSTATEDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_MANDIR
+CMAKE_INSTALL_MANDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_OLDINCLUDEDIR
+CMAKE_INSTALL_OLDINCLUDEDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_RUNSTATEDIR
+CMAKE_INSTALL_RUNSTATEDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_SBINDIR
+CMAKE_INSTALL_SBINDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_SHAREDSTATEDIR
+CMAKE_INSTALL_SHAREDSTATEDIR-ADVANCED:INTERNAL=1
+//Install .so files without execute permission.
+CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1
+//ADVANCED property for variable: CMAKE_INSTALL_SYSCONFDIR
+CMAKE_INSTALL_SYSCONFDIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_LINKER
+CMAKE_LINKER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
+CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS
+CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG
+CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE
+CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_NM
+CMAKE_NM-ADVANCED:INTERNAL=1
+//number of local generators
+CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=12
+//ADVANCED property for variable: CMAKE_OBJCOPY
+CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_OBJDUMP
+CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
+//Platform information initialized
+CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_RANLIB
+CMAKE_RANLIB-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_READELF
+CMAKE_READELF-ADVANCED:INTERNAL=1
+//Path to CMake installation.
+CMAKE_ROOT:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG
+CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE
+CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH
+CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SKIP_RPATH
+CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS
+CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG
+CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL
+CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE
+CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STRIP
+CMAKE_STRIP-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_TAPI
+CMAKE_TAPI-ADVANCED:INTERNAL=1
+//uname command
+CMAKE_UNAME:INTERNAL=/usr/bin/uname
+//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE
+CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
+//Result of TRY_COMPILE
+COMPILE_HAVE_GNU_POSIX_REGEX:INTERNAL=FALSE
+//Result of TRY_COMPILE
+COMPILE_HAVE_POSIX_REGEX:INTERNAL=TRUE
+//Result of TRY_COMPILE
+COMPILE_HAVE_PTHREAD_AFFINITY:INTERNAL=TRUE
+//Result of TRY_COMPILE
+COMPILE_HAVE_STD_REGEX:INTERNAL=TRUE
+//Result of TRY_COMPILE
+COMPILE_HAVE_STEADY_CLOCK:INTERNAL=TRUE
+//ADVANCED property for variable: COVERAGE_COMMAND
+COVERAGE_COMMAND-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: COVERAGE_EXTRA_FLAGS
+COVERAGE_EXTRA_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CTEST_SUBMIT_RETRY_COUNT
+CTEST_SUBMIT_RETRY_COUNT-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CTEST_SUBMIT_RETRY_DELAY
+CTEST_SUBMIT_RETRY_DELAY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: DART_TESTING_TIMEOUT
+DART_TESTING_TIMEOUT-ADVANCED:INTERNAL=1
+//Details about finding Git
+FIND_PACKAGE_MESSAGE_DETAILS_Git:INTERNAL=[/usr/bin/git][v2.34.1()]
+//Details about finding Python
+FIND_PACKAGE_MESSAGE_DETAILS_Python:INTERNAL=[/usr/local/bin/python][cfound components: Interpreter ][v3.10.12()]
+//Details about finding Threads
+FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()]
+//ADVANCED property for variable: GITCOMMAND
+GITCOMMAND-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: GIT_EXECUTABLE
+GIT_EXECUTABLE-ADVANCED:INTERNAL=1
+//Have include asm/hwcap.h
+HAVE_ASM_HWCAP_H:INTERNAL=
+//Test HAVE_CXX_FLAG_COVERAGE
+HAVE_CXX_FLAG_COVERAGE:INTERNAL=1
+//Test HAVE_CXX_FLAG_FSTRICT_ALIASING
+HAVE_CXX_FLAG_FSTRICT_ALIASING:INTERNAL=1
+//Test HAVE_CXX_FLAG_PEDANTIC
+HAVE_CXX_FLAG_PEDANTIC:INTERNAL=1
+//Test HAVE_CXX_FLAG_PEDANTIC_ERRORS
+HAVE_CXX_FLAG_PEDANTIC_ERRORS:INTERNAL=1
+//Test HAVE_CXX_FLAG_WALL
+HAVE_CXX_FLAG_WALL:INTERNAL=1
+//Test HAVE_CXX_FLAG_WD654
+HAVE_CXX_FLAG_WD654:INTERNAL=
+//Test HAVE_CXX_FLAG_WERROR
+HAVE_CXX_FLAG_WERROR:INTERNAL=1
+//Test HAVE_CXX_FLAG_WEXTRA
+HAVE_CXX_FLAG_WEXTRA:INTERNAL=1
+//Test HAVE_CXX_FLAG_WFLOAT_EQUAL
+HAVE_CXX_FLAG_WFLOAT_EQUAL:INTERNAL=1
+//Test HAVE_CXX_FLAG_WNO_DEPRECATED
+HAVE_CXX_FLAG_WNO_DEPRECATED:INTERNAL=1
+//Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS
+HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS:INTERNAL=1
+//Test HAVE_CXX_FLAG_WOLD_STYLE_CAST
+HAVE_CXX_FLAG_WOLD_STYLE_CAST:INTERNAL=1
+//Test HAVE_CXX_FLAG_WSHADOW
+HAVE_CXX_FLAG_WSHADOW:INTERNAL=1
+//Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32
+HAVE_CXX_FLAG_WSHORTEN_64_TO_32:INTERNAL=
+//Test HAVE_CXX_FLAG_WSTRICT_ALIASING
+HAVE_CXX_FLAG_WSTRICT_ALIASING:INTERNAL=1
+//Test HAVE_CXX_FLAG_WTHREAD_SAFETY
+HAVE_CXX_FLAG_WTHREAD_SAFETY:INTERNAL=
+//Have library rt
+HAVE_LIB_RT:INTERNAL=1
+//Have include sys/auxv.h
+HAVE_SYS_AUXV_H:INTERNAL=1
+//Test HWY_EMSCRIPTEN
+HWY_EMSCRIPTEN:INTERNAL=
+//ADVANCED property for variable: HWY_FORCE_STATIC_LIBS
+HWY_FORCE_STATIC_LIBS-ADVANCED:INTERNAL=1
+//Test HWY_RISCV
+HWY_RISCV:INTERNAL=
+//ADVANCED property for variable: MAKECOMMAND
+MAKECOMMAND-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: MEMORYCHECK_COMMAND
+MEMORYCHECK_COMMAND-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: MEMORYCHECK_SUPPRESSIONS_FILE
+MEMORYCHECK_SUPPRESSIONS_FILE-ADVANCED:INTERNAL=1
+NLOHMANN_JSON_CONFIG_INSTALL_DIR:INTERNAL=share/cmake/nlohmann_json
+//Result of try_run()
+RUN_HAVE_POSIX_REGEX:INTERNAL=0
+//Result of try_run()
+RUN_HAVE_PTHREAD_AFFINITY:INTERNAL=0
+//Result of try_run()
+RUN_HAVE_STD_REGEX:INTERNAL=0
+//Result of try_run()
+RUN_HAVE_STEADY_CLOCK:INTERNAL=0
+//ADVANCED property for variable: SITE
+SITE-ADVANCED:INTERNAL=1
+//STRINGS property for variable: SPM_ABSL_PROVIDER
+SPM_ABSL_PROVIDER-STRINGS:INTERNAL=internal;module;package
+//STRINGS property for variable: SPM_PROTOBUF_PROVIDER
+SPM_PROTOBUF_PROVIDER-STRINGS:INTERNAL=internal;package
+//linker supports push/pop state
+_CMAKE_LINKER_PUSHPOP_STATE_SUPPORTED:INTERNAL=TRUE
+//CMAKE_INSTALL_PREFIX during last run
+_GNUInstallDirs_LAST_CMAKE_INSTALL_PREFIX:INTERNAL=/usr/local
+//Compiler reason failure
+_Python_Compiler_REASON_FAILURE:INTERNAL=
+//Development reason failure
+_Python_Development_REASON_FAILURE:INTERNAL=
+//Path to a program.
+_Python_EXECUTABLE:INTERNAL=/usr/local/bin/python
+//Python Properties
+_Python_INTERPRETER_PROPERTIES:INTERNAL=Python;3;10;12;64;32;;cpython-310-x86_64-linux-gnu;abi3;/usr/lib/python3.10;/usr/lib/python3.10;/usr/local/lib/python3.10/dist-packages;/usr/local/lib/python3.10/dist-packages
+_Python_INTERPRETER_SIGNATURE:INTERNAL=fe70fcc9cb393d9a6babd75ef083d11f
+//NumPy reason failure
+_Python_NumPy_REASON_FAILURE:INTERNAL=
+cmake_package_name:INTERNAL=GTest
+generated_dir:INTERNAL=/content/gemma.cpp/build/_deps/highway-build/googletest-build/googletest/generated
+//ADVANCED property for variable: gmock_build_tests
+gmock_build_tests-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: gtest_build_samples
+gtest_build_samples-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: gtest_build_tests
+gtest_build_tests-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: gtest_disable_pthreads
+gtest_disable_pthreads-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: gtest_force_shared_crt
+gtest_force_shared_crt-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: gtest_hide_internal_symbols
+gtest_hide_internal_symbols-ADVANCED:INTERNAL=1
+targets_export_name:INTERNAL=GTestTargets

gemma.cpp/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake ADDED Viewed

	@@ -0,0 +1,74 @@

+set(CMAKE_C_COMPILER "/usr/bin/cc")
+set(CMAKE_C_COMPILER_ARG1 "")
+set(CMAKE_C_COMPILER_ID "GNU")
+set(CMAKE_C_COMPILER_VERSION "11.4.0")
+set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
+set(CMAKE_C_COMPILER_WRAPPER "")
+set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
+set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
+set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
+set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
+set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
+set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
+set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
+set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
+set(CMAKE_C_PLATFORM_ID "Linux")
+set(CMAKE_C_SIMULATE_ID "")
+set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
+set(CMAKE_C_SIMULATE_VERSION "")
+set(CMAKE_AR "/usr/bin/ar")
+set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
+set(CMAKE_RANLIB "/usr/bin/ranlib")
+set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
+set(CMAKE_LINKER "/usr/bin/ld")
+set(CMAKE_MT "")
+set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
+set(CMAKE_COMPILER_IS_GNUCC 1)
+set(CMAKE_C_COMPILER_LOADED 1)
+set(CMAKE_C_COMPILER_WORKS TRUE)
+set(CMAKE_C_ABI_COMPILED TRUE)
+set(CMAKE_C_COMPILER_ENV_VAR "CC")
+set(CMAKE_C_COMPILER_ID_RUN 1)
+set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
+set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
+set(CMAKE_C_LINKER_PREFERENCE 10)
+set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE)
+# Save compiler ABI information.
+set(CMAKE_C_SIZEOF_DATA_PTR "8")
+set(CMAKE_C_COMPILER_ABI "ELF")
+set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
+set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+if(CMAKE_C_SIZEOF_DATA_PTR)
+  set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
+endif()
+if(CMAKE_C_COMPILER_ABI)
+  set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
+endif()
+if(CMAKE_C_LIBRARY_ARCHITECTURE)
+  set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+endif()
+set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
+if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
+  set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
+endif()
+set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
+set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
+set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
+set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")

gemma.cpp/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake ADDED Viewed

	@@ -0,0 +1,85 @@

+set(CMAKE_CXX_COMPILER "/usr/bin/c++")
+set(CMAKE_CXX_COMPILER_ARG1 "")
+set(CMAKE_CXX_COMPILER_ID "GNU")
+set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
+set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
+set(CMAKE_CXX_COMPILER_WRAPPER "")
+set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
+set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
+set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
+set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
+set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
+set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
+set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
+set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
+set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
+set(CMAKE_CXX_PLATFORM_ID "Linux")
+set(CMAKE_CXX_SIMULATE_ID "")
+set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
+set(CMAKE_CXX_SIMULATE_VERSION "")
+set(CMAKE_AR "/usr/bin/ar")
+set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
+set(CMAKE_RANLIB "/usr/bin/ranlib")
+set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
+set(CMAKE_LINKER "/usr/bin/ld")
+set(CMAKE_MT "")
+set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
+set(CMAKE_COMPILER_IS_GNUCXX 1)
+set(CMAKE_CXX_COMPILER_LOADED 1)
+set(CMAKE_CXX_COMPILER_WORKS TRUE)
+set(CMAKE_CXX_ABI_COMPILED TRUE)
+set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
+set(CMAKE_CXX_COMPILER_ID_RUN 1)
+set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
+set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
+foreach (lang C OBJC OBJCXX)
+  if (CMAKE_${lang}_COMPILER_ID_RUN)
+    foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
+      list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
+    endforeach()
+  endif()
+endforeach()
+set(CMAKE_CXX_LINKER_PREFERENCE 30)
+set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
+set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE)
+# Save compiler ABI information.
+set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
+set(CMAKE_CXX_COMPILER_ABI "ELF")
+set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
+set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+if(CMAKE_CXX_SIZEOF_DATA_PTR)
+  set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
+endif()
+if(CMAKE_CXX_COMPILER_ABI)
+  set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
+endif()
+if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
+  set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+endif()
+set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
+if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
+  set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
+endif()
+set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
+set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
+set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
+set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")

gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f1901373878efd64fb8d123f266ec93db00a3523087d52afa0fff59401a75ce
+size 15968

gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:706369b9a080132db3bd9f26616f30a752f9376201eb47cfb747ef4b34d7120e
+size 15992

gemma.cpp/build/CMakeFiles/3.27.9/CMakeSystem.cmake ADDED Viewed

	@@ -0,0 +1,15 @@

+set(CMAKE_HOST_SYSTEM "Linux-6.1.85+")
+set(CMAKE_HOST_SYSTEM_NAME "Linux")
+set(CMAKE_HOST_SYSTEM_VERSION "6.1.85+")
+set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
+set(CMAKE_SYSTEM "Linux-6.1.85+")
+set(CMAKE_SYSTEM_NAME "Linux")
+set(CMAKE_SYSTEM_VERSION "6.1.85+")
+set(CMAKE_SYSTEM_PROCESSOR "x86_64")
+set(CMAKE_CROSSCOMPILING "FALSE")
+set(CMAKE_SYSTEM_LOADED 1)

gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/CMakeCCompilerId.c ADDED Viewed

	@@ -0,0 +1,866 @@

+#ifdef __cplusplus
+# error "A C++ compiler has been selected for C."
+#endif
+#if defined(__18CXX)
+# define ID_VOID_MAIN
+#endif
+#if defined(__CLASSIC_C__)
+/* cv-qualifiers did not exist in K&R C */
+# define const
+# define volatile
+#endif
+#if !defined(__has_include)
+/* If the compiler does not have __has_include, pretend the answer is
+   always no.  */
+#  define __has_include(x) 0
+#endif
+/* Version number components: V=Version, R=Revision, P=Patch
+   Version date components:   YYYY=Year, MM=Month,   DD=Day  */
+#if defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_ID "GNU"
+# endif
+  /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
+     except that a few beta releases use the old format with V=2021.  */
+# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
+#  if defined(__INTEL_COMPILER_UPDATE)
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
+#  else
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER   % 10)
+#  endif
+# else
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
+   /* The third version component from --version is an update index,
+      but no macro is provided for it.  */
+#  define COMPILER_VERSION_PATCH DEC(0)
+# endif
+# if defined(__INTEL_COMPILER_BUILD_DATE)
+   /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
+#  define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
+# endif
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+# elif defined(__GNUG__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+# endif
+# if defined(__GNUC_MINOR__)
+#  define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
+# define COMPILER_ID "IntelLLVM"
+#if defined(_MSC_VER)
+# define SIMULATE_ID "MSVC"
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_ID "GNU"
+#endif
+/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
+ * later.  Look for 6 digit vs. 8 digit version number to decide encoding.
+ * VVVV is no smaller than the current year when a version is released.
+ */
+#if __INTEL_LLVM_COMPILER < 1000000L
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER    % 10)
+#else
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER     % 100)
+#endif
+#if defined(_MSC_VER)
+  /* _MSC_VER = VVRR */
+# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+#elif defined(__GNUG__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+#endif
+#if defined(__GNUC_MINOR__)
+# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+#endif
+#if defined(__GNUC_PATCHLEVEL__)
+# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+#endif
+#elif defined(__PATHCC__)
+# define COMPILER_ID "PathScale"
+# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
+# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
+# if defined(__PATHCC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
+# endif
+#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
+# define COMPILER_ID "Embarcadero"
+# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
+# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
+# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__     & 0xFFFF)
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+  /* __BORLANDC__ = 0xVRR */
+# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
+# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
+#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
+# define COMPILER_ID "Watcom"
+   /* __WATCOMC__ = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "OpenWatcom"
+   /* __WATCOMC__ = VVRP + 1100 */
+# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+#elif defined(__SUNPRO_C)
+# define COMPILER_ID "SunPro"
+# if __SUNPRO_C >= 0x5100
+   /* __SUNPRO_C = 0xVRRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_C    & 0xF)
+# else
+   /* __SUNPRO_CC = 0xVRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_C    & 0xF)
+# endif
+#elif defined(__HP_cc)
+# define COMPILER_ID "HP"
+  /* __HP_cc = VVRRPP */
+# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000)
+# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__HP_cc     % 100)
+#elif defined(__DECC)
+# define COMPILER_ID "Compaq"
+  /* __DECC_VER = VVRRTPPPP */
+# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000)
+# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000  % 100)
+# define COMPILER_VERSION_PATCH DEC(__DECC_VER         % 10000)
+#elif defined(__IBMC__) && defined(__COMPILER_VER__)
+# define COMPILER_ID "zOS"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+#elif defined(__open_xl__) && defined(__clang__)
+# define COMPILER_ID "IBMClang"
+# define COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
+# define COMPILER_VERSION_MINOR DEC(__open_xl_release__)
+# define COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
+# define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
+#elif defined(__ibmxl__) && defined(__clang__)
+# define COMPILER_ID "XLClang"
+# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
+# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
+# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
+# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
+#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800
+# define COMPILER_ID "XL"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800
+# define COMPILER_ID "VisualAge"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+#elif defined(__NVCOMPILER)
+# define COMPILER_ID "NVHPC"
+# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
+# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
+# if defined(__NVCOMPILER_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
+# endif
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
+# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
+# if defined(__PGIC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
+# endif
+#elif defined(_CRAYC)
+# define COMPILER_ID "Cray"
+# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
+# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
+#elif defined(__TI_COMPILER_VERSION__)
+# define COMPILER_ID "TI"
+  /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
+# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
+# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000   % 1000)
+# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__        % 1000)
+#elif defined(__CLANG_FUJITSU)
+# define COMPILER_ID "FujitsuClang"
+# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# define COMPILER_VERSION_INTERNAL_STR __clang_version__
+#elif defined(__FUJITSU)
+# define COMPILER_ID "Fujitsu"
+# if defined(__FCC_version__)
+#   define COMPILER_VERSION __FCC_version__
+# elif defined(__FCC_major__)
+#   define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+#   define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+#   define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# endif
+# if defined(__fcc_version)
+#   define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
+# elif defined(__FCC_VERSION)
+#   define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
+# endif
+#elif defined(__ghs__)
+# define COMPILER_ID "GHS"
+/* __GHS_VERSION_NUMBER = VVVVRP */
+# ifdef __GHS_VERSION_NUMBER
+# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
+# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER      % 10)
+# endif
+#elif defined(__TASKING__)
+# define COMPILER_ID "Tasking"
+  # define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
+  # define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
+# define COMPILER_VERSION_INTERNAL DEC(__VERSION__)
+#elif defined(__TINYC__)
+# define COMPILER_ID "TinyCC"
+#elif defined(__BCC__)
+# define COMPILER_ID "Bruce"
+#elif defined(__SCO_VERSION__)
+# define COMPILER_ID "SCO"
+#elif defined(__ARMCC_VERSION) && !defined(__clang__)
+# define COMPILER_ID "ARMCC"
+#if __ARMCC_VERSION >= 1000000
+  /* __ARMCC_VERSION = VRRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION     % 10000)
+#else
+  /* __ARMCC_VERSION = VRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION    % 10000)
+#endif
+#elif defined(__clang__) && defined(__apple_build_version__)
+# define COMPILER_ID "AppleClang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
+#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
+# define COMPILER_ID "ARMClang"
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100   % 100)
+# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
+#elif defined(__clang__)
+# define COMPILER_ID "Clang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
+# define COMPILER_ID "LCC"
+# define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
+# define COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
+# if defined(__LCC_MINOR__)
+#  define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
+# endif
+# if defined(__GNUC__) && defined(__GNUC_MINOR__)
+#  define SIMULATE_ID "GNU"
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+#  define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+#  if defined(__GNUC_PATCHLEVEL__)
+#   define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+#  endif
+# endif
+#elif defined(__GNUC__)
+# define COMPILER_ID "GNU"
+# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
+# if defined(__GNUC_MINOR__)
+#  define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+  /* _MSC_VER = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
+# if defined(_MSC_FULL_VER)
+#  if _MSC_VER >= 1400
+    /* _MSC_FULL_VER = VVRRPPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
+#  else
+    /* _MSC_FULL_VER = VVRRPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
+#  endif
+# endif
+# if defined(_MSC_BUILD)
+#  define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
+# endif
+#elif defined(_ADI_COMPILER)
+# define COMPILER_ID "ADSP"
+#if defined(__VERSIONNUM__)
+  /* __VERSIONNUM__ = 0xVVRRPPTT */
+#  define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
+#  define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
+#  define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
+#  define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
+#endif
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# define COMPILER_ID "IAR"
+# if defined(__VER__) && defined(__ICCARM__)
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
+#  define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
+#  define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
+#  define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
+#  define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# endif
+#elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC)
+# define COMPILER_ID "SDCC"
+# if defined(__SDCC_VERSION_MAJOR)
+#  define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR)
+#  define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR)
+#  define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH)
+# else
+  /* SDCC = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(SDCC/100)
+#  define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(SDCC    % 10)
+# endif
+/* These compilers are either not known or too old to define an
+  identification macro.  Try to identify the platform and guess that
+  it is the native compiler.  */
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+#else /* unknown compiler */
+# define COMPILER_ID ""
+#endif
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+#ifdef SIMULATE_ID
+char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
+#endif
+#ifdef __QNXNTO__
+char const* qnxnto = "INFO" ":" "qnxnto[]";
+#endif
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
+#endif
+#define STRINGIFY_HELPER(X) #X
+#define STRINGIFY(X) STRINGIFY_HELPER(X)
+/* Identify known platforms by name.  */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+#elif defined(__MSYS__)
+# define PLATFORM_ID "MSYS"
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+#elif defined(__HAIKU__)
+# define PLATFORM_ID "Haiku"
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+#elif defined(__WATCOMC__)
+# if defined(__LINUX__)
+#  define PLATFORM_ID "Linux"
+# elif defined(__DOS__)
+#  define PLATFORM_ID "DOS"
+# elif defined(__OS2__)
+#  define PLATFORM_ID "OS2"
+# elif defined(__WINDOWS__)
+#  define PLATFORM_ID "Windows3x"
+# elif defined(__VXWORKS__)
+#  define PLATFORM_ID "VxWorks"
+# else /* unknown platform */
+#  define PLATFORM_ID
+# endif
+#elif defined(__INTEGRITY)
+# if defined(INT_178B)
+#  define PLATFORM_ID "Integrity178"
+# else /* regular Integrity */
+#  define PLATFORM_ID "Integrity"
+# endif
+# elif defined(_ADI_COMPILER)
+#  define PLATFORM_ID "ADSP"
+#else /* unknown platform */
+# define PLATFORM_ID
+#endif
+/* For windows compilers MSVC and Intel we can determine
+   the architecture of the compiler being used.  This is because
+   the compilers do not have flags that can change the architecture,
+   but rather depend on which compiler is being used
+*/
+#if defined(_WIN32) && defined(_MSC_VER)
+# if defined(_M_IA64)
+#  define ARCHITECTURE_ID "IA64"
+# elif defined(_M_ARM64EC)
+#  define ARCHITECTURE_ID "ARM64EC"
+# elif defined(_M_X64) || defined(_M_AMD64)
+#  define ARCHITECTURE_ID "x64"
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+# elif defined(_M_ARM64)
+#  define ARCHITECTURE_ID "ARM64"
+# elif defined(_M_ARM)
+#  if _M_ARM == 4
+#   define ARCHITECTURE_ID "ARMV4I"
+#  elif _M_ARM == 5
+#   define ARCHITECTURE_ID "ARMV5I"
+#  else
+#   define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
+#  endif
+# elif defined(_M_MIPS)
+#  define ARCHITECTURE_ID "MIPS"
+# elif defined(_M_SH)
+#  define ARCHITECTURE_ID "SHx"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__WATCOMC__)
+# if defined(_M_I86)
+#  define ARCHITECTURE_ID "I86"
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# if defined(__ICCARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__ICCRX__)
+#  define ARCHITECTURE_ID "RX"
+# elif defined(__ICCRH850__)
+#  define ARCHITECTURE_ID "RH850"
+# elif defined(__ICCRL78__)
+#  define ARCHITECTURE_ID "RL78"
+# elif defined(__ICCRISCV__)
+#  define ARCHITECTURE_ID "RISCV"
+# elif defined(__ICCAVR__)
+#  define ARCHITECTURE_ID "AVR"
+# elif defined(__ICC430__)
+#  define ARCHITECTURE_ID "MSP430"
+# elif defined(__ICCV850__)
+#  define ARCHITECTURE_ID "V850"
+# elif defined(__ICC8051__)
+#  define ARCHITECTURE_ID "8051"
+# elif defined(__ICCSTM8__)
+#  define ARCHITECTURE_ID "STM8"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__ghs__)
+# if defined(__PPC64__)
+#  define ARCHITECTURE_ID "PPC64"
+# elif defined(__ppc__)
+#  define ARCHITECTURE_ID "PPC"
+# elif defined(__ARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__x86_64__)
+#  define ARCHITECTURE_ID "x64"
+# elif defined(__i386__)
+#  define ARCHITECTURE_ID "X86"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__TI_COMPILER_VERSION__)
+# if defined(__TI_ARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__MSP430__)
+#  define ARCHITECTURE_ID "MSP430"
+# elif defined(__TMS320C28XX__)
+#  define ARCHITECTURE_ID "TMS320C28x"
+# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
+#  define ARCHITECTURE_ID "TMS320C6x"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+# elif defined(__ADSPSHARC__)
+#  define ARCHITECTURE_ID "SHARC"
+# elif defined(__ADSPBLACKFIN__)
+#  define ARCHITECTURE_ID "Blackfin"
+#elif defined(__TASKING__)
+# if defined(__CTC__) || defined(__CPTC__)
+#  define ARCHITECTURE_ID "TriCore"
+# elif defined(__CMCS__)
+#  define ARCHITECTURE_ID "MCS"
+# elif defined(__CARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__CARC__)
+#  define ARCHITECTURE_ID "ARC"
+# elif defined(__C51__)
+#  define ARCHITECTURE_ID "8051"
+# elif defined(__CPCP__)
+#  define ARCHITECTURE_ID "PCP"
+# else
+#  define ARCHITECTURE_ID ""
+# endif
+#else
+#  define ARCHITECTURE_ID
+#endif
+/* Convert integer to decimal digit literals.  */
+#define DEC(n)                   \
+  ('0' + (((n) / 10000000)%10)), \
+  ('0' + (((n) / 1000000)%10)),  \
+  ('0' + (((n) / 100000)%10)),   \
+  ('0' + (((n) / 10000)%10)),    \
+  ('0' + (((n) / 1000)%10)),     \
+  ('0' + (((n) / 100)%10)),      \
+  ('0' + (((n) / 10)%10)),       \
+  ('0' +  ((n) % 10))
+/* Convert integer to hex digit literals.  */
+#define HEX(n)             \
+  ('0' + ((n)>>28 & 0xF)), \
+  ('0' + ((n)>>24 & 0xF)), \
+  ('0' + ((n)>>20 & 0xF)), \
+  ('0' + ((n)>>16 & 0xF)), \
+  ('0' + ((n)>>12 & 0xF)), \
+  ('0' + ((n)>>8  & 0xF)), \
+  ('0' + ((n)>>4  & 0xF)), \
+  ('0' + ((n)     & 0xF))
+/* Construct a string literal encoding the version number. */
+#ifdef COMPILER_VERSION
+char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
+/* Construct a string literal encoding the version number components. */
+#elif defined(COMPILER_VERSION_MAJOR)
+char const info_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
+  COMPILER_VERSION_MAJOR,
+# ifdef COMPILER_VERSION_MINOR
+  '.', COMPILER_VERSION_MINOR,
+#  ifdef COMPILER_VERSION_PATCH
+   '.', COMPILER_VERSION_PATCH,
+#   ifdef COMPILER_VERSION_TWEAK
+    '.', COMPILER_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+/* Construct a string literal encoding the internal version number. */
+#ifdef COMPILER_VERSION_INTERNAL
+char const info_version_internal[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
+  'i','n','t','e','r','n','a','l','[',
+  COMPILER_VERSION_INTERNAL,']','\0'};
+#elif defined(COMPILER_VERSION_INTERNAL_STR)
+char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
+#endif
+/* Construct a string literal encoding the version number components. */
+#ifdef SIMULATE_VERSION_MAJOR
+char const info_simulate_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
+  SIMULATE_VERSION_MAJOR,
+# ifdef SIMULATE_VERSION_MINOR
+  '.', SIMULATE_VERSION_MINOR,
+#  ifdef SIMULATE_VERSION_PATCH
+   '.', SIMULATE_VERSION_PATCH,
+#   ifdef SIMULATE_VERSION_TWEAK
+    '.', SIMULATE_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
+#if !defined(__STDC__) && !defined(__clang__)
+# if defined(_MSC_VER) || defined(__ibmxl__) || defined(__IBMC__)
+#  define C_VERSION "90"
+# else
+#  define C_VERSION
+# endif
+#elif __STDC_VERSION__ > 201710L
+# define C_VERSION "23"
+#elif __STDC_VERSION__ >= 201710L
+# define C_VERSION "17"
+#elif __STDC_VERSION__ >= 201000L
+# define C_VERSION "11"
+#elif __STDC_VERSION__ >= 199901L
+# define C_VERSION "99"
+#else
+# define C_VERSION "90"
+#endif
+const char* info_language_standard_default =
+  "INFO" ":" "standard_default[" C_VERSION "]";
+const char* info_language_extensions_default = "INFO" ":" "extensions_default["
+#if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) ||           \
+     defined(__TI_COMPILER_VERSION__)) &&                                     \
+  !defined(__STRICT_ANSI__)
+  "ON"
+#else
+  "OFF"
+#endif
+"]";
+/*--------------------------------------------------------------------------*/
+#ifdef ID_VOID_MAIN
+void main() {}
+#else
+# if defined(__CLASSIC_C__)
+int main(argc, argv) int argc; char *argv[];
+# else
+int main(int argc, char* argv[])
+# endif
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+  require += info_arch[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+#ifdef COMPILER_VERSION_INTERNAL
+  require += info_version_internal[argc];
+#endif
+#ifdef SIMULATE_ID
+  require += info_simulate[argc];
+#endif
+#ifdef SIMULATE_VERSION_MAJOR
+  require += info_simulate_version[argc];
+#endif
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+  require += info_cray[argc];
+#endif
+  require += info_language_standard_default[argc];
+  require += info_language_extensions_default[argc];
+  (void)argv;
+  return require;
+}
+#endif

gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/a.out ADDED Viewed

Binary file (16.1 kB). View file

gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/CMakeCXXCompilerId.cpp ADDED Viewed

	@@ -0,0 +1,855 @@

+/* This source file must have a .cpp extension so that all C++ compilers
+   recognize the extension without flags.  Borland does not know .cxx for
+   example.  */
+#ifndef __cplusplus
+# error "A C compiler has been selected for C++."
+#endif
+#if !defined(__has_include)
+/* If the compiler does not have __has_include, pretend the answer is
+   always no.  */
+#  define __has_include(x) 0
+#endif
+/* Version number components: V=Version, R=Revision, P=Patch
+   Version date components:   YYYY=Year, MM=Month,   DD=Day  */
+#if defined(__COMO__)
+# define COMPILER_ID "Comeau"
+  /* __COMO_VERSION__ = VRR */
+# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100)
+# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100)
+#elif defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_ID "GNU"
+# endif
+  /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
+     except that a few beta releases use the old format with V=2021.  */
+# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
+#  if defined(__INTEL_COMPILER_UPDATE)
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
+#  else
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER   % 10)
+#  endif
+# else
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
+   /* The third version component from --version is an update index,
+      but no macro is provided for it.  */
+#  define COMPILER_VERSION_PATCH DEC(0)
+# endif
+# if defined(__INTEL_COMPILER_BUILD_DATE)
+   /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
+#  define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
+# endif
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+# elif defined(__GNUG__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+# endif
+# if defined(__GNUC_MINOR__)
+#  define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
+# define COMPILER_ID "IntelLLVM"
+#if defined(_MSC_VER)
+# define SIMULATE_ID "MSVC"
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_ID "GNU"
+#endif
+/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
+ * later.  Look for 6 digit vs. 8 digit version number to decide encoding.
+ * VVVV is no smaller than the current year when a version is released.
+ */
+#if __INTEL_LLVM_COMPILER < 1000000L
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER    % 10)
+#else
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER     % 100)
+#endif
+#if defined(_MSC_VER)
+  /* _MSC_VER = VVRR */
+# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+#elif defined(__GNUG__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+#endif
+#if defined(__GNUC_MINOR__)
+# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+#endif
+#if defined(__GNUC_PATCHLEVEL__)
+# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+#endif
+#elif defined(__PATHCC__)
+# define COMPILER_ID "PathScale"
+# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
+# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
+# if defined(__PATHCC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
+# endif
+#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
+# define COMPILER_ID "Embarcadero"
+# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
+# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
+# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__     & 0xFFFF)
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+  /* __BORLANDC__ = 0xVRR */
+# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
+# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
+#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
+# define COMPILER_ID "Watcom"
+   /* __WATCOMC__ = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "OpenWatcom"
+   /* __WATCOMC__ = VVRP + 1100 */
+# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+#elif defined(__SUNPRO_CC)
+# define COMPILER_ID "SunPro"
+# if __SUNPRO_CC >= 0x5100
+   /* __SUNPRO_CC = 0xVRRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC    & 0xF)
+# else
+   /* __SUNPRO_CC = 0xVRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC    & 0xF)
+# endif
+#elif defined(__HP_aCC)
+# define COMPILER_ID "HP"
+  /* __HP_aCC = VVRRPP */
+# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
+# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__HP_aCC     % 100)
+#elif defined(__DECCXX)
+# define COMPILER_ID "Compaq"
+  /* __DECCXX_VER = VVRRTPPPP */
+# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
+# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000  % 100)
+# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER         % 10000)
+#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
+# define COMPILER_ID "zOS"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+#elif defined(__open_xl__) && defined(__clang__)
+# define COMPILER_ID "IBMClang"
+# define COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
+# define COMPILER_VERSION_MINOR DEC(__open_xl_release__)
+# define COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
+# define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
+#elif defined(__ibmxl__) && defined(__clang__)
+# define COMPILER_ID "XLClang"
+# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
+# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
+# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
+# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
+#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
+# define COMPILER_ID "XL"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
+# define COMPILER_ID "VisualAge"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+#elif defined(__NVCOMPILER)
+# define COMPILER_ID "NVHPC"
+# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
+# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
+# if defined(__NVCOMPILER_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
+# endif
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
+# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
+# if defined(__PGIC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
+# endif
+#elif defined(_CRAYC)
+# define COMPILER_ID "Cray"
+# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
+# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
+#elif defined(__TI_COMPILER_VERSION__)
+# define COMPILER_ID "TI"
+  /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
+# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
+# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000   % 1000)
+# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__        % 1000)
+#elif defined(__CLANG_FUJITSU)
+# define COMPILER_ID "FujitsuClang"
+# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# define COMPILER_VERSION_INTERNAL_STR __clang_version__
+#elif defined(__FUJITSU)
+# define COMPILER_ID "Fujitsu"
+# if defined(__FCC_version__)
+#   define COMPILER_VERSION __FCC_version__
+# elif defined(__FCC_major__)
+#   define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+#   define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+#   define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# endif
+# if defined(__fcc_version)
+#   define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
+# elif defined(__FCC_VERSION)
+#   define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
+# endif
+#elif defined(__ghs__)
+# define COMPILER_ID "GHS"
+/* __GHS_VERSION_NUMBER = VVVVRP */
+# ifdef __GHS_VERSION_NUMBER
+# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
+# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER      % 10)
+# endif
+#elif defined(__TASKING__)
+# define COMPILER_ID "Tasking"
+  # define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
+  # define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
+# define COMPILER_VERSION_INTERNAL DEC(__VERSION__)
+#elif defined(__SCO_VERSION__)
+# define COMPILER_ID "SCO"
+#elif defined(__ARMCC_VERSION) && !defined(__clang__)
+# define COMPILER_ID "ARMCC"
+#if __ARMCC_VERSION >= 1000000
+  /* __ARMCC_VERSION = VRRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION     % 10000)
+#else
+  /* __ARMCC_VERSION = VRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION    % 10000)
+#endif
+#elif defined(__clang__) && defined(__apple_build_version__)
+# define COMPILER_ID "AppleClang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
+#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
+# define COMPILER_ID "ARMClang"
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100   % 100)
+# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
+#elif defined(__clang__)
+# define COMPILER_ID "Clang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
+# define COMPILER_ID "LCC"
+# define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
+# define COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
+# if defined(__LCC_MINOR__)
+#  define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
+# endif
+# if defined(__GNUC__) && defined(__GNUC_MINOR__)
+#  define SIMULATE_ID "GNU"
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+#  define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+#  if defined(__GNUC_PATCHLEVEL__)
+#   define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+#  endif
+# endif
+#elif defined(__GNUC__) || defined(__GNUG__)
+# define COMPILER_ID "GNU"
+# if defined(__GNUC__)
+#  define COMPILER_VERSION_MAJOR DEC(__GNUC__)
+# else
+#  define COMPILER_VERSION_MAJOR DEC(__GNUG__)
+# endif
+# if defined(__GNUC_MINOR__)
+#  define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+  /* _MSC_VER = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
+# if defined(_MSC_FULL_VER)
+#  if _MSC_VER >= 1400
+    /* _MSC_FULL_VER = VVRRPPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
+#  else
+    /* _MSC_FULL_VER = VVRRPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
+#  endif
+# endif
+# if defined(_MSC_BUILD)
+#  define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
+# endif
+#elif defined(_ADI_COMPILER)
+# define COMPILER_ID "ADSP"
+#if defined(__VERSIONNUM__)
+  /* __VERSIONNUM__ = 0xVVRRPPTT */
+#  define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
+#  define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
+#  define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
+#  define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
+#endif
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# define COMPILER_ID "IAR"
+# if defined(__VER__) && defined(__ICCARM__)
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
+#  define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
+#  define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
+#  define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
+#  define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# endif
+/* These compilers are either not known or too old to define an
+  identification macro.  Try to identify the platform and guess that
+  it is the native compiler.  */
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+#else /* unknown compiler */
+# define COMPILER_ID ""
+#endif
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+#ifdef SIMULATE_ID
+char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
+#endif
+#ifdef __QNXNTO__
+char const* qnxnto = "INFO" ":" "qnxnto[]";
+#endif
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
+#endif
+#define STRINGIFY_HELPER(X) #X
+#define STRINGIFY(X) STRINGIFY_HELPER(X)
+/* Identify known platforms by name.  */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+#elif defined(__MSYS__)
+# define PLATFORM_ID "MSYS"
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+#elif defined(__HAIKU__)
+# define PLATFORM_ID "Haiku"
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+#elif defined(__WATCOMC__)
+# if defined(__LINUX__)
+#  define PLATFORM_ID "Linux"
+# elif defined(__DOS__)
+#  define PLATFORM_ID "DOS"
+# elif defined(__OS2__)
+#  define PLATFORM_ID "OS2"
+# elif defined(__WINDOWS__)
+#  define PLATFORM_ID "Windows3x"
+# elif defined(__VXWORKS__)
+#  define PLATFORM_ID "VxWorks"
+# else /* unknown platform */
+#  define PLATFORM_ID
+# endif
+#elif defined(__INTEGRITY)
+# if defined(INT_178B)
+#  define PLATFORM_ID "Integrity178"
+# else /* regular Integrity */
+#  define PLATFORM_ID "Integrity"
+# endif
+# elif defined(_ADI_COMPILER)
+#  define PLATFORM_ID "ADSP"
+#else /* unknown platform */
+# define PLATFORM_ID
+#endif
+/* For windows compilers MSVC and Intel we can determine
+   the architecture of the compiler being used.  This is because
+   the compilers do not have flags that can change the architecture,
+   but rather depend on which compiler is being used
+*/
+#if defined(_WIN32) && defined(_MSC_VER)
+# if defined(_M_IA64)
+#  define ARCHITECTURE_ID "IA64"
+# elif defined(_M_ARM64EC)
+#  define ARCHITECTURE_ID "ARM64EC"
+# elif defined(_M_X64) || defined(_M_AMD64)
+#  define ARCHITECTURE_ID "x64"
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+# elif defined(_M_ARM64)
+#  define ARCHITECTURE_ID "ARM64"
+# elif defined(_M_ARM)
+#  if _M_ARM == 4
+#   define ARCHITECTURE_ID "ARMV4I"
+#  elif _M_ARM == 5
+#   define ARCHITECTURE_ID "ARMV5I"
+#  else
+#   define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
+#  endif
+# elif defined(_M_MIPS)
+#  define ARCHITECTURE_ID "MIPS"
+# elif defined(_M_SH)
+#  define ARCHITECTURE_ID "SHx"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__WATCOMC__)
+# if defined(_M_I86)
+#  define ARCHITECTURE_ID "I86"
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# if defined(__ICCARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__ICCRX__)
+#  define ARCHITECTURE_ID "RX"
+# elif defined(__ICCRH850__)
+#  define ARCHITECTURE_ID "RH850"
+# elif defined(__ICCRL78__)
+#  define ARCHITECTURE_ID "RL78"
+# elif defined(__ICCRISCV__)
+#  define ARCHITECTURE_ID "RISCV"
+# elif defined(__ICCAVR__)
+#  define ARCHITECTURE_ID "AVR"
+# elif defined(__ICC430__)
+#  define ARCHITECTURE_ID "MSP430"
+# elif defined(__ICCV850__)
+#  define ARCHITECTURE_ID "V850"
+# elif defined(__ICC8051__)
+#  define ARCHITECTURE_ID "8051"
+# elif defined(__ICCSTM8__)
+#  define ARCHITECTURE_ID "STM8"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__ghs__)
+# if defined(__PPC64__)
+#  define ARCHITECTURE_ID "PPC64"
+# elif defined(__ppc__)
+#  define ARCHITECTURE_ID "PPC"
+# elif defined(__ARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__x86_64__)
+#  define ARCHITECTURE_ID "x64"
+# elif defined(__i386__)
+#  define ARCHITECTURE_ID "X86"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+#elif defined(__TI_COMPILER_VERSION__)
+# if defined(__TI_ARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__MSP430__)
+#  define ARCHITECTURE_ID "MSP430"
+# elif defined(__TMS320C28XX__)
+#  define ARCHITECTURE_ID "TMS320C28x"
+# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
+#  define ARCHITECTURE_ID "TMS320C6x"
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+# elif defined(__ADSPSHARC__)
+#  define ARCHITECTURE_ID "SHARC"
+# elif defined(__ADSPBLACKFIN__)
+#  define ARCHITECTURE_ID "Blackfin"
+#elif defined(__TASKING__)
+# if defined(__CTC__) || defined(__CPTC__)
+#  define ARCHITECTURE_ID "TriCore"
+# elif defined(__CMCS__)
+#  define ARCHITECTURE_ID "MCS"
+# elif defined(__CARM__)
+#  define ARCHITECTURE_ID "ARM"
+# elif defined(__CARC__)
+#  define ARCHITECTURE_ID "ARC"
+# elif defined(__C51__)
+#  define ARCHITECTURE_ID "8051"
+# elif defined(__CPCP__)
+#  define ARCHITECTURE_ID "PCP"
+# else
+#  define ARCHITECTURE_ID ""
+# endif
+#else
+#  define ARCHITECTURE_ID
+#endif
+/* Convert integer to decimal digit literals.  */
+#define DEC(n)                   \
+  ('0' + (((n) / 10000000)%10)), \
+  ('0' + (((n) / 1000000)%10)),  \
+  ('0' + (((n) / 100000)%10)),   \
+  ('0' + (((n) / 10000)%10)),    \
+  ('0' + (((n) / 1000)%10)),     \
+  ('0' + (((n) / 100)%10)),      \
+  ('0' + (((n) / 10)%10)),       \
+  ('0' +  ((n) % 10))
+/* Convert integer to hex digit literals.  */
+#define HEX(n)             \
+  ('0' + ((n)>>28 & 0xF)), \
+  ('0' + ((n)>>24 & 0xF)), \
+  ('0' + ((n)>>20 & 0xF)), \
+  ('0' + ((n)>>16 & 0xF)), \
+  ('0' + ((n)>>12 & 0xF)), \
+  ('0' + ((n)>>8  & 0xF)), \
+  ('0' + ((n)>>4  & 0xF)), \
+  ('0' + ((n)     & 0xF))
+/* Construct a string literal encoding the version number. */
+#ifdef COMPILER_VERSION
+char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
+/* Construct a string literal encoding the version number components. */
+#elif defined(COMPILER_VERSION_MAJOR)
+char const info_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
+  COMPILER_VERSION_MAJOR,
+# ifdef COMPILER_VERSION_MINOR
+  '.', COMPILER_VERSION_MINOR,
+#  ifdef COMPILER_VERSION_PATCH
+   '.', COMPILER_VERSION_PATCH,
+#   ifdef COMPILER_VERSION_TWEAK
+    '.', COMPILER_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+/* Construct a string literal encoding the internal version number. */
+#ifdef COMPILER_VERSION_INTERNAL
+char const info_version_internal[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
+  'i','n','t','e','r','n','a','l','[',
+  COMPILER_VERSION_INTERNAL,']','\0'};
+#elif defined(COMPILER_VERSION_INTERNAL_STR)
+char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
+#endif
+/* Construct a string literal encoding the version number components. */
+#ifdef SIMULATE_VERSION_MAJOR
+char const info_simulate_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
+  SIMULATE_VERSION_MAJOR,
+# ifdef SIMULATE_VERSION_MINOR
+  '.', SIMULATE_VERSION_MINOR,
+#  ifdef SIMULATE_VERSION_PATCH
+   '.', SIMULATE_VERSION_PATCH,
+#   ifdef SIMULATE_VERSION_TWEAK
+    '.', SIMULATE_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
+#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L
+#  if defined(__INTEL_CXX11_MODE__)
+#    if defined(__cpp_aggregate_nsdmi)
+#      define CXX_STD 201402L
+#    else
+#      define CXX_STD 201103L
+#    endif
+#  else
+#    define CXX_STD 199711L
+#  endif
+#elif defined(_MSC_VER) && defined(_MSVC_LANG)
+#  define CXX_STD _MSVC_LANG
+#else
+#  define CXX_STD __cplusplus
+#endif
+const char* info_language_standard_default = "INFO" ":" "standard_default["
+#if CXX_STD > 202002L
+  "23"
+#elif CXX_STD > 201703L
+  "20"
+#elif CXX_STD >= 201703L
+  "17"
+#elif CXX_STD >= 201402L
+  "14"
+#elif CXX_STD >= 201103L
+  "11"
+#else
+  "98"
+#endif
+"]";
+const char* info_language_extensions_default = "INFO" ":" "extensions_default["
+#if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) ||           \
+     defined(__TI_COMPILER_VERSION__)) &&                                     \
+  !defined(__STRICT_ANSI__)
+  "ON"
+#else
+  "OFF"
+#endif
+"]";
+/*--------------------------------------------------------------------------*/
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+  require += info_arch[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+#ifdef COMPILER_VERSION_INTERNAL
+  require += info_version_internal[argc];
+#endif
+#ifdef SIMULATE_ID
+  require += info_simulate[argc];
+#endif
+#ifdef SIMULATE_VERSION_MAJOR
+  require += info_simulate_version[argc];
+#endif
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+  require += info_cray[argc];
+#endif
+  require += info_language_standard_default[argc];
+  require += info_language_extensions_default[argc];
+  (void)argv;
+  return require;
+}

gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out ADDED Viewed

Binary file (16.1 kB). View file

gemma.cpp/build/CMakeFiles/CMakeConfigureLog.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff