ZeroWw commited on
Commit
055eba4
·
verified ·
1 Parent(s): da618d9

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +34 -0
  2. 9b-it-sfp.sbs +3 -0
  3. gemma.cpp/.bazelrc +1 -0
  4. gemma.cpp/.bazelversion +1 -0
  5. gemma.cpp/.clang-format +2 -0
  6. gemma.cpp/.clang-tidy +211 -0
  7. gemma.cpp/.github/workflows/build.yml +119 -0
  8. gemma.cpp/.gitignore +4 -0
  9. gemma.cpp/.vscode/settings.json +6 -0
  10. gemma.cpp/BUILD.bazel +420 -0
  11. gemma.cpp/CMakeLists.txt +149 -0
  12. gemma.cpp/CMakePresets.json +59 -0
  13. gemma.cpp/DEVELOPERS.md +203 -0
  14. gemma.cpp/LICENSE +202 -0
  15. gemma.cpp/LICENSE-BSD3 +26 -0
  16. gemma.cpp/MODULE.bazel +72 -0
  17. gemma.cpp/README.md +493 -0
  18. gemma.cpp/WORKSPACE +4 -0
  19. gemma.cpp/backprop/backward-inl.h +428 -0
  20. gemma.cpp/backprop/backward.cc +95 -0
  21. gemma.cpp/backprop/backward.h +32 -0
  22. gemma.cpp/backprop/backward_scalar.h +362 -0
  23. gemma.cpp/backprop/backward_scalar_test.cc +614 -0
  24. gemma.cpp/backprop/backward_test.cc +264 -0
  25. gemma.cpp/backprop/common_scalar.h +120 -0
  26. gemma.cpp/backprop/forward-inl.h +289 -0
  27. gemma.cpp/backprop/forward.cc +86 -0
  28. gemma.cpp/backprop/forward.h +33 -0
  29. gemma.cpp/backprop/forward_scalar.h +300 -0
  30. gemma.cpp/backprop/optimize_test.cc +144 -0
  31. gemma.cpp/backprop/optimizer.cc +135 -0
  32. gemma.cpp/backprop/optimizer.h +37 -0
  33. gemma.cpp/backprop/prompt.h +34 -0
  34. gemma.cpp/backprop/sampler.h +87 -0
  35. gemma.cpp/backprop/test_util.h +168 -0
  36. gemma.cpp/bazel/BUILD +5 -0
  37. gemma.cpp/bazel/sentencepiece.bazel +97 -0
  38. gemma.cpp/bazel/sentencepiece.patch +2339 -0
  39. gemma.cpp/build/.gitignore +3 -0
  40. gemma.cpp/build/CMakeCache.txt +982 -0
  41. gemma.cpp/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake +74 -0
  42. gemma.cpp/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake +85 -0
  43. gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin +3 -0
  44. gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin +3 -0
  45. gemma.cpp/build/CMakeFiles/3.27.9/CMakeSystem.cmake +15 -0
  46. gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/CMakeCCompilerId.c +866 -0
  47. gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/a.out +0 -0
  48. gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/CMakeCXXCompilerId.cpp +855 -0
  49. gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out +0 -0
  50. gemma.cpp/build/CMakeFiles/CMakeConfigureLog.yaml +0 -0
.gitattributes CHANGED
@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 9b-it-sfp.sbs filter=lfs diff=lfs merge=lfs -text
37
+ gemma.cpp/build/CMakeFiles/libgemma.dir/gemma/gemma.cc.o filter=lfs diff=lfs merge=lfs -text
38
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_128a.cc.o filter=lfs diff=lfs merge=lfs -text
39
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_128d.cc.o filter=lfs diff=lfs merge=lfs -text
40
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f32a.cc.o filter=lfs diff=lfs merge=lfs -text
41
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f32d.cc.o filter=lfs diff=lfs merge=lfs -text
42
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f64a.cc.o filter=lfs diff=lfs merge=lfs -text
43
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_f64d.cc.o filter=lfs diff=lfs merge=lfs -text
44
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i16a.cc.o filter=lfs diff=lfs merge=lfs -text
45
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i16d.cc.o filter=lfs diff=lfs merge=lfs -text
46
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i32a.cc.o filter=lfs diff=lfs merge=lfs -text
47
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i32d.cc.o filter=lfs diff=lfs merge=lfs -text
48
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i64a.cc.o filter=lfs diff=lfs merge=lfs -text
49
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_i64d.cc.o filter=lfs diff=lfs merge=lfs -text
50
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv128a.cc.o filter=lfs diff=lfs merge=lfs -text
51
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv128d.cc.o filter=lfs diff=lfs merge=lfs -text
52
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv64a.cc.o filter=lfs diff=lfs merge=lfs -text
53
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_kv64d.cc.o filter=lfs diff=lfs merge=lfs -text
54
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u16a.cc.o filter=lfs diff=lfs merge=lfs -text
55
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u16d.cc.o filter=lfs diff=lfs merge=lfs -text
56
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u32a.cc.o filter=lfs diff=lfs merge=lfs -text
57
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u32d.cc.o filter=lfs diff=lfs merge=lfs -text
58
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u64a.cc.o filter=lfs diff=lfs merge=lfs -text
59
+ gemma.cpp/build/_deps/highway-build/CMakeFiles/hwy_contrib.dir/hwy/contrib/sort/vqsort_u64d.cc.o filter=lfs diff=lfs merge=lfs -text
60
+ gemma.cpp/build/_deps/highway-build/libhwy.a filter=lfs diff=lfs merge=lfs -text
61
+ gemma.cpp/build/_deps/highway-build/libhwy_contrib.a filter=lfs diff=lfs merge=lfs -text
62
+ gemma.cpp/build/_deps/highway-src/g3doc/highway_intro.pdf filter=lfs diff=lfs merge=lfs -text
63
+ gemma.cpp/build/_deps/json-src/docs/avatars.png filter=lfs diff=lfs merge=lfs -text
64
+ gemma.cpp/build/_deps/json-src/docs/json.gif filter=lfs diff=lfs merge=lfs -text
65
+ gemma.cpp/build/_deps/json-src/docs/usages/macos.png filter=lfs diff=lfs merge=lfs -text
66
+ gemma.cpp/build/_deps/sentencepiece-build/src/libsentencepiece.a filter=lfs diff=lfs merge=lfs -text
67
+ gemma.cpp/build/gemma filter=lfs diff=lfs merge=lfs -text
68
+ gemma.cpp/build/libgemma.a filter=lfs diff=lfs merge=lfs -text
69
+ tokenizer.spm filter=lfs diff=lfs merge=lfs -text
9b-it-sfp.sbs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aad1b51e34090d4f4ed0a392470c47bd34acb10301fd12e7b81160f8b6d2d41
3
+ size 10159826688
gemma.cpp/.bazelrc ADDED
@@ -0,0 +1 @@
 
 
1
+ common --enable_bzlmod
gemma.cpp/.bazelversion ADDED
@@ -0,0 +1 @@
 
 
1
+ 7.1.1
gemma.cpp/.clang-format ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Language: Cpp
2
+ BasedOnStyle: Google
gemma.cpp/.clang-tidy ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FormatStyle: file
2
+ WarningsAsErrors: "*"
3
+ Checks: "-*,\
4
+ abseil-*,\
5
+ -abseil-string-find-startswith,\
6
+ -abseil-string-find-str-contains,\
7
+ bugprone-*,\
8
+ -bugprone-argument-comment,\
9
+ -bugprone-assert-side-effect,\
10
+ -bugprone-bad-signal-to-kill-thread,\
11
+ -bugprone-bool-pointer-implicit-conversion,\
12
+ -bugprone-branch-clone,\
13
+ -bugprone-copy-constructor-init,\
14
+ -bugprone-dangling-handle,\
15
+ -bugprone-dynamic-static-initializers,\
16
+ -bugprone-easily-swappable-parameters,\
17
+ -bugprone-exception-escape,\
18
+ -bugprone-fold-init-type,\
19
+ -bugprone-forward-declaration-namespace,\
20
+ -bugprone-forwarding-reference-overload,\
21
+ -bugprone-implicit-widening-of-multiplication-result,\
22
+ -bugprone-inaccurate-erase,\
23
+ -bugprone-incorrect-roundings,\
24
+ -bugprone-infinite-loop,\
25
+ -bugprone-integer-division,\
26
+ -bugprone-lambda-function-name,\
27
+ -bugprone-macro-parentheses,\
28
+ -bugprone-macro-repeated-side-effects,\
29
+ -bugprone-misplaced-operator-in-strlen-in-alloc,\
30
+ -bugprone-misplaced-widening-cast,\
31
+ -bugprone-move-forwarding-reference,\
32
+ -bugprone-multiple-statement-macro,\
33
+ -bugprone-narrowing-conversions,\
34
+ -bugprone-no-escape,\
35
+ -bugprone-not-null-terminated-result,\
36
+ -bugprone-parent-virtual-call,\
37
+ -bugprone-posix-return,\
38
+ -bugprone-redundant-branch-condition,\
39
+ -bugprone-reserved-identifier,\
40
+ -bugprone-signal-handler,\
41
+ -bugprone-signed-char-misuse,\
42
+ -bugprone-sizeof-container,\
43
+ -bugprone-sizeof-expression,\
44
+ -bugprone-spuriously-wake-up-functions,\
45
+ -bugprone-string-constructor,\
46
+ -bugprone-string-integer-assignment,\
47
+ -bugprone-string-literal-with-embedded-nul,\
48
+ -bugprone-stringview-nullptr,\
49
+ -bugprone-suspicious-enum-usage,\
50
+ -bugprone-suspicious-include,\
51
+ -bugprone-suspicious-memory-comparison,\
52
+ -bugprone-suspicious-memset-usage,\
53
+ -bugprone-suspicious-missing-comma,\
54
+ -bugprone-suspicious-semicolon,\
55
+ -bugprone-suspicious-string-compare,\
56
+ -bugprone-swapped-arguments,\
57
+ -bugprone-terminating-continue,\
58
+ -bugprone-throw-keyword-missing,\
59
+ -bugprone-too-small-loop-variable,\
60
+ -bugprone-undefined-memory-manipulation,\
61
+ -bugprone-undelegated-constructor,\
62
+ -bugprone-unhandled-exception-at-new,\
63
+ -bugprone-unhandled-self-assignment,\
64
+ -bugprone-unused-raii,\
65
+ -bugprone-unused-return-value,\
66
+ -bugprone-use-after-move,\
67
+ -bugprone-virtual-near-miss,\
68
+ cert-*,\
69
+ -cert-dcl16-c,\
70
+ -cert-dcl21-cpp,\
71
+ -cert-dcl37-c,\
72
+ -cert-dcl50-cpp,\
73
+ -cert-dcl51-cpp,\
74
+ -cert-dcl54-cpp,\
75
+ -cert-dcl58-cpp,\
76
+ -cert-err33-c,\
77
+ -cert-msc30-c,\
78
+ -cert-msc32-c,\
79
+ -cert-msc50-cpp,\
80
+ -cert-msc51-cpp,\
81
+ -cert-oop54-cpp,\
82
+ -cert-str34-c,\
83
+ -cert-str34-c,\
84
+ -cert-str34-c,\
85
+ -cert-str34-c,\
86
+ -clang-analyzer-*,\
87
+ concurrency-*,\
88
+ -concurrency-mt-unsafe,\
89
+ cppcoreguidelines-*,\
90
+ -concurrency-mt-unsafe,\
91
+ -cppcoreguidelines-avoid-c-arrays,\
92
+ -cppcoreguidelines-avoid-const-or-ref-data-members,\
93
+ -cppcoreguidelines-avoid-do-while,\
94
+ -cppcoreguidelines-avoid-goto,\
95
+ -cppcoreguidelines-avoid-magic-numbers,\
96
+ -cppcoreguidelines-avoid-non-const-global-variables,\
97
+ -cppcoreguidelines-c-copy-assignment-signature,\
98
+ -cppcoreguidelines-explicit-virtual-functions,\
99
+ -cppcoreguidelines-init-variables,\
100
+ -cppcoreguidelines-interfaces-global-init,\
101
+ -cppcoreguidelines-macro-usage,\
102
+ -cppcoreguidelines-narrowing-conversions,\
103
+ -cppcoreguidelines-no-malloc,\
104
+ -cppcoreguidelines-non-private-member-variables-in-classes,\
105
+ -cppcoreguidelines-owning-memory,\
106
+ -cppcoreguidelines-prefer-member-initializer,\
107
+ -cppcoreguidelines-pro-bounds-array-to-pointer-decay,\
108
+ -cppcoreguidelines-pro-bounds-constant-array-index,\
109
+ -cppcoreguidelines-pro-bounds-pointer-arithmetic,\
110
+ -cppcoreguidelines-pro-type-const-cast,\
111
+ -cppcoreguidelines-pro-type-member-init,\
112
+ -cppcoreguidelines-pro-type-reinterpret-cast,\
113
+ -cppcoreguidelines-pro-type-static-cast-downcast,\
114
+ -cppcoreguidelines-pro-type-union-access,\
115
+ -cppcoreguidelines-pro-type-vararg,\
116
+ -cppcoreguidelines-slicing,\
117
+ -cppcoreguidelines-special-member-functions,\
118
+ -cppcoreguidelines-virtual-class-destructor,\
119
+ google-*,\
120
+ -google-default-arguments,\
121
+ -google-explicit-constructor,\
122
+ -google-readability-avoid-underscore-in-googletest-name,\
123
+ -google-readability-braces-around-statements,\
124
+ -google-readability-casting,\
125
+ -google-readability-namespace-comments,\
126
+ -google-readability-todo,\
127
+ -google-runtime-int,\
128
+ -google-upgrade-googletest-case,\
129
+ misc-*,\
130
+ -misc-misplaced-const,\
131
+ -misc-new-delete-overloads,\
132
+ -misc-non-private-member-variables-in-classes,\
133
+ -misc-no-recursion,\
134
+ -misc-redundant-expression,\
135
+ -misc-uniqueptr-reset-release,\
136
+ -misc-unconventional-assign-operator,\
137
+ -misc-unused-parameters,\
138
+ -misc-unused-using-decls,\
139
+ modernize-*,\
140
+ -modernize-avoid-c-arrays,\
141
+ -modernize-concat-nested-namespaces,\
142
+ -modernize-deprecated-headers,\
143
+ -modernize-loop-convert,\
144
+ -modernize-macro-to-enum,\
145
+ -modernize-make-unique,\
146
+ -modernize-pass-by-value,\
147
+ -modernize-raw-string-literal,\
148
+ -modernize-redundant-void-arg,\
149
+ -modernize-return-braced-init-list,\
150
+ -modernize-unary-static-assert,\
151
+ -modernize-use-auto,\
152
+ -modernize-use-bool-literals,\
153
+ -modernize-use-default-member-init,\
154
+ -modernize-use-emplace,\
155
+ -modernize-use-equals-default,\
156
+ -modernize-use-equals-delete,\
157
+ -modernize-use-nodiscard,\
158
+ -modernize-use-nullptr,\
159
+ -modernize-use-override,\
160
+ -modernize-use-trailing-return-type,\
161
+ -modernize-use-transparent-functors,\
162
+ -modernize-use-using,\
163
+ performance-*,\
164
+ -performance-faster-string-find,\
165
+ -performance-for-range-copy,\
166
+ -performance-inefficient-algorithm,\
167
+ -performance-inefficient-string-concatenation,\
168
+ -performance-inefficient-vector-operation,\
169
+ -performance-move-const-arg,\
170
+ -performance-no-automatic-move,\
171
+ -performance-noexcept-move-constructor,\
172
+ -performance-no-int-to-ptr,\
173
+ -performance-trivially-destructible,\
174
+ -performance-unnecessary-copy-initialization,\
175
+ -performance-unnecessary-value-param,\
176
+ portability-*,\
177
+ readability-*,\
178
+ -readability-avoid-const-params-in-decls,\
179
+ -readability-braces-around-statements,\
180
+ -readability-const-return-type,\
181
+ -readability-container-data-pointer,\
182
+ -readability-container-size-empty,\
183
+ -readability-convert-member-functions-to-static,\
184
+ -readability-else-after-return,\
185
+ -readability-function-cognitive-complexity,\
186
+ -readability-identifier-length,\
187
+ -readability-implicit-bool-conversion,\
188
+ -readability-inconsistent-declaration-parameter-name,\
189
+ -readability-isolate-declaration,\
190
+ -readability-magic-numbers,\
191
+ -readability-make-member-function-const,\
192
+ -readability-named-parameter,\
193
+ -readability-non-const-parameter,\
194
+ -readability-qualified-auto,\
195
+ -readability-redundant-access-specifiers,\
196
+ -readability-redundant-control-flow,\
197
+ -readability-redundant-declaration,\
198
+ -readability-redundant-member-init,\
199
+ -readability-redundant-smartptr-get,\
200
+ -readability-redundant-string-cstr,\
201
+ -readability-redundant-string-init,\
202
+ -readability-simplify-boolean-expr,\
203
+ -readability-static-accessed-through-instance,\
204
+ -readability-static-definition-in-anonymous-namespace,\
205
+ -readability-suspicious-call-argument,\
206
+ -readability-uppercase-literal-suffix,\
207
+ -readability-use-anyofallof
208
+ "
209
+ CheckOptions:
210
+ - { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase }
211
+ - { key: readability-identifier-naming.ConstexprVariablePrefix, value: k }
gemma.cpp/.github/workflows/build.yml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: build
2
+
3
+ # Trigger on push, pull request, or via manual dispatch.
4
+ on:
5
+ push:
6
+ pull_request:
7
+ types: [opened, reopened, labeled, unlabeled, synchronize]
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ build:
12
+ runs-on: ${{ matrix.os }}
13
+ name: ${{ matrix.os }} (${{ matrix.preset }}) ${{ matrix.build_type }}
14
+ timeout-minutes: 30
15
+
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ # When adding another, also add to copybara's github_check_runs.
20
+ os: ['ubuntu-latest', 'macos-latest', 'windows-latest', 'ubuntu-20.04']
21
+ build_type: ['Release']
22
+ preset: ['make', 'windows']
23
+ exclude:
24
+ - os: ubuntu-20.04
25
+ preset: windows
26
+ - os: ubuntu-latest
27
+ preset: windows
28
+ - os: macos-latest
29
+ preset: windows
30
+ - os: windows-latest
31
+ preset: make
32
+
33
+ concurrency:
34
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.preset }}-${{ matrix.build_type }}
35
+ cancel-in-progress: true
36
+
37
+ steps:
38
+ - uses: actions/checkout@v4
39
+
40
+ # Set up ccache
41
+ - name: ccache
42
+ uses: hendrikmuhs/ccache-action@v1.2
43
+
44
+ - name: Configure CMake
45
+ run: >
46
+ cmake --preset ${{ matrix.preset }}
47
+ -S ${{ github.workspace }} -B ${{ github.workspace }}/build
48
+ -D CMAKE_BUILD_TYPE=${{ matrix.build_type }}
49
+ -D CMAKE_C_COMPILER_LAUNCHER=ccache
50
+ -D CMAKE_CXX_COMPILER_LAUNCHER=ccache
51
+
52
+ - name: Build
53
+ run: cmake --build ${{ github.workspace }}/build --preset ${{ matrix.preset }} --config ${{ matrix.build_type }} -j 4
54
+
55
+ - name: Archive production artifacts
56
+ uses: actions/upload-artifact@v4
57
+ with:
58
+ name: gemma-${{ matrix.os }}-${{ matrix.preset }}-${{ matrix.build_type }}
59
+ path: |
60
+ ${{ github.workspace }}/build/${{ matrix.build_type }}/gemma.exe
61
+ ${{ github.workspace }}/build/${{ matrix.build_type }}/libgemma.lib
62
+ ${{ github.workspace }}/build/gemma
63
+ ${{ github.workspace }}/build/libgemma.a
64
+
65
+ - if: matrix.os == 'ubuntu-20.04'
66
+ name: Upload build artifacts to Kaggle
67
+ uses: pculliton/push-kaggle-dataset@v1.0.0
68
+ env:
69
+ KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
70
+ KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
71
+ with:
72
+ id: "phillipculliton/gemma-build-artifacts"
73
+ files: |
74
+ build/gemma
75
+ build/_deps/sentencepiece-build/src/libsentencepiece.so.0
76
+
77
+ - if: matrix.os == 'ubuntu-20.04'
78
+ name: Create code for new test notebook version
79
+ run: |
80
+ cat > runner.py << EOF
81
+ import subprocess
82
+ subprocess.run(["cp", "/kaggle/input/gemma-build-artifacts/gemma", "/kaggle/working"])
83
+ subprocess.run(["chmod", "700", "/kaggle/working/gemma"])
84
+ subprocess.run(["cp", "/kaggle/input/gemma-build-artifacts/_deps/sentencepiece-build/src/libsentencepiece.so.0", "/kaggle/working"])
85
+ output = subprocess.run(["/kaggle/working/gemma", "--tokenizer", "/kaggle/input/gemma/gemmacpp/2b-it-sfp/4/tokenizer.spm", "--compressed_weights", "/kaggle/input/gemma/gemmacpp/2b-it-sfp/4/2b-it-sfp.sbs", "--model", "2b-it", "--verbosity", "0", "--max_generated_tokens", "128"], stdout=subprocess.PIPE, input='Write an email to the moon.', encoding='ascii').stdout
86
+ assert("write an email to the moon." not in output.lower());
87
+ assert("moon" in output.lower());
88
+ EOF
89
+
90
+ - if: matrix.os == 'ubuntu-20.04'
91
+ name: Run kaggle test notebook
92
+ uses: pculliton/kaggle-action@v1.0.28
93
+ with:
94
+ username: ${{ secrets.KAGGLE_USERNAME }}
95
+ key: ${{ secrets.KAGGLE_KEY }}
96
+ title: GemmaCPP-CI-2
97
+ code_file: runner.py
98
+ dataset_sources: "phillipculliton/gemma-build-artifacts"
99
+ model_sources: "google/gemma/gemmaCpp/2b-it-sfp/4"
100
+ enable_gpu: False
101
+ kernel_type: script
102
+
103
+ bazel:
104
+ runs-on: ubuntu-latest
105
+ steps:
106
+ - name: Harden Runner
107
+ uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0
108
+ with:
109
+ egress-policy: audit # cannot be block - runner does git checkout
110
+
111
+ - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.0.0
112
+
113
+ - uses: bazelbuild/setup-bazelisk@b39c379c82683a5f25d34f0d062761f62693e0b2 # v3.0.0
114
+
115
+ - uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1
116
+ with:
117
+ path: ~/.cache/bazel
118
+ key: bazel-${{ runner.os }}
119
+ - run: bazel build --cxxopt=-std=c++20 //:all
gemma.cpp/.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .cache/
2
+ bazel-*/
3
+ build-*/
4
+ python/*/__pycache__
gemma.cpp/.vscode/settings.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cmake.configureOnOpen": false,
3
+ "files.associations": {
4
+ "array": "cpp"
5
+ }
6
+ }
gemma.cpp/BUILD.bazel ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gemma.cpp is a lightweight, standalone C++ inference engine for the Gemma
2
+ # foundation models from Google.
3
+
4
+ load("@rules_license//rules:license.bzl", "license")
5
+
6
+ package(
7
+ default_applicable_licenses = [
8
+ "//:license", # Placeholder comment, do not modify
9
+ ],
10
+ default_visibility = ["//visibility:public"],
11
+ )
12
+
13
+ license(
14
+ name = "license",
15
+ package_name = "gemma_cpp",
16
+ )
17
+
18
+ # Dual-licensed Apache 2 and 3-clause BSD.
19
+ licenses(["notice"])
20
+
21
+ exports_files(["LICENSE"])
22
+
23
+ cc_library(
24
+ name = "ops",
25
+ hdrs = ["gemma/ops.h"],
26
+ deps = [
27
+ "//compression:compress",
28
+ "@hwy//:algo",
29
+ "@hwy//:dot",
30
+ "@hwy//:hwy",
31
+ "@hwy//:math",
32
+ "@hwy//:matvec",
33
+ "@hwy//:profiler",
34
+ "@hwy//:thread_pool",
35
+ ],
36
+ )
37
+
38
+ cc_test(
39
+ name = "ops_test",
40
+ size = "small",
41
+ timeout = "long",
42
+ srcs = ["gemma/ops_test.cc"],
43
+ local_defines = ["HWY_IS_TEST"],
44
+ # for test_suite.
45
+ tags = ["hwy_ops_test"],
46
+ deps = [
47
+ ":ops",
48
+ "@googletest//:gtest_main", # buildcleaner: keep
49
+ "//compression:compress",
50
+ "@hwy//:hwy",
51
+ "@hwy//:hwy_test_util",
52
+ "@hwy//:thread_pool",
53
+ ],
54
+ )
55
+
56
+ cc_library(
57
+ name = "common",
58
+ srcs = ["gemma/common.cc"],
59
+ hdrs = [
60
+ "gemma/common.h",
61
+ "gemma/configs.h",
62
+ ],
63
+ deps = [
64
+ "//compression:compress",
65
+ "@hwy//:hwy", # base.h
66
+ "@hwy//:thread_pool",
67
+ ],
68
+ )
69
+
70
+ cc_library(
71
+ name = "weights",
72
+ srcs = ["gemma/weights.cc"],
73
+ hdrs = ["gemma/weights.h"],
74
+ deps = [
75
+ ":common",
76
+ "//compression:compress",
77
+ "//compression:io",
78
+ "@hwy//:hwy",
79
+ "@hwy//:profiler",
80
+ "@hwy//:stats",
81
+ "@hwy//:thread_pool",
82
+ ],
83
+ )
84
+
85
+ cc_library(
86
+ name = "weights_raw",
87
+ hdrs = ["gemma/weights_raw.h"],
88
+ deps = [
89
+ ":common",
90
+ ":weights",
91
+ "//compression:compress",
92
+ "@hwy//:hwy",
93
+ "@hwy//:thread_pool",
94
+ ],
95
+ )
96
+
97
+ cc_library(
98
+ name = "gemma_lib",
99
+ srcs = [
100
+ "gemma/gemma.cc",
101
+ ],
102
+ hdrs = [
103
+ "gemma/activations.h",
104
+ "gemma/gemma.h",
105
+ ],
106
+ textual_hdrs = [
107
+ # Placeholder for internal file1, do not remove,
108
+ # Placeholder for internal file2, do not remove,
109
+ ],
110
+ deps = [
111
+ ":common",
112
+ ":ops",
113
+ ":weights",
114
+ "//compression:compress",
115
+ "//compression:io",
116
+ "@hwy//:hwy",
117
+ "@hwy//:matvec",
118
+ "@hwy//:nanobenchmark", # timer
119
+ "@hwy//:profiler",
120
+ "@hwy//:thread_pool",
121
+ "@com_google_sentencepiece//:sentencepiece_processor",
122
+ ],
123
+ )
124
+
125
+ cc_library(
126
+ name = "cross_entropy",
127
+ srcs = ["gemma/cross_entropy.cc"],
128
+ hdrs = ["gemma/cross_entropy.h"],
129
+ deps = [
130
+ ":common",
131
+ ":gemma_lib",
132
+ "@hwy//:hwy",
133
+ ],
134
+ )
135
+
136
+ cc_library(
137
+ name = "args",
138
+ hdrs = ["util/args.h"],
139
+ deps = [
140
+ "//compression:io",
141
+ "@hwy//:hwy",
142
+ ],
143
+ )
144
+
145
+ cc_library(
146
+ name = "app",
147
+ hdrs = ["util/app.h"],
148
+ deps = [
149
+ ":args",
150
+ ":common",
151
+ ":gemma_lib",
152
+ "//compression:io",
153
+ "@hwy//:hwy",
154
+ "@hwy//:thread_pool",
155
+ "@hwy//:topology",
156
+ ],
157
+ )
158
+
159
+ cc_library(
160
+ name = "benchmark_helper",
161
+ srcs = ["gemma/benchmark_helper.cc"],
162
+ hdrs = ["gemma/benchmark_helper.h"],
163
+ deps = [
164
+ ":app",
165
+ ":args",
166
+ ":common",
167
+ ":cross_entropy",
168
+ ":gemma_lib",
169
+ # Placeholder for internal dep, do not remove.,
170
+ "@benchmark//:benchmark",
171
+ "//compression:compress",
172
+ "@hwy//:hwy",
173
+ "@hwy//:nanobenchmark",
174
+ "@hwy//:thread_pool",
175
+ ],
176
+ )
177
+
178
+ cc_test(
179
+ name = "gemma_test",
180
+ srcs = ["gemma/gemma_test.cc"],
181
+ # Requires model files
182
+ tags = [
183
+ "local",
184
+ "manual",
185
+ "no_tap",
186
+ ],
187
+ deps = [
188
+ ":app",
189
+ ":args",
190
+ ":benchmark_helper",
191
+ ":common",
192
+ ":cross_entropy",
193
+ ":gemma_lib",
194
+ ":ops",
195
+ "@googletest//:gtest_main",
196
+ "//compression:io",
197
+ "@hwy//:hwy_test_util",
198
+ "@hwy//:thread_pool",
199
+ ],
200
+ )
201
+
202
+ cc_binary(
203
+ name = "gemma",
204
+ srcs = ["gemma/run.cc"],
205
+ deps = [
206
+ ":app",
207
+ ":args",
208
+ ":benchmark_helper",
209
+ ":common",
210
+ ":gemma_lib",
211
+ # Placeholder for internal dep, do not remove.,
212
+ "//compression:compress",
213
+ "@hwy//:hwy",
214
+ "@hwy//:nanobenchmark",
215
+ "@hwy//:profiler",
216
+ "@hwy//:thread_pool",
217
+ ],
218
+ )
219
+
220
+ cc_binary(
221
+ name = "compress_weights",
222
+ srcs = ["gemma/compress_weights.cc"],
223
+ deps = [
224
+ ":args",
225
+ ":common",
226
+ ":gemma_lib",
227
+ ":weights",
228
+ ":weights_raw",
229
+ # Placeholder for internal dep, do not remove.,
230
+ "//compression:compress",
231
+ "@hwy//:hwy",
232
+ "@hwy//:nanobenchmark",
233
+ "@hwy//:profiler",
234
+ "@hwy//:thread_pool",
235
+ ],
236
+ )
237
+
238
+ cc_binary(
239
+ name = "single_benchmark",
240
+ srcs = ["gemma/benchmark.cc"],
241
+ deps = [
242
+ ":app",
243
+ ":args",
244
+ ":benchmark_helper",
245
+ ":common",
246
+ ":cross_entropy",
247
+ ":gemma_lib",
248
+ "//compression:io",
249
+ "@hwy//:hwy",
250
+ "@hwy//:nanobenchmark",
251
+ "@hwy//:thread_pool",
252
+ "@nlohmann_json//:json",
253
+ ],
254
+ )
255
+
256
+ cc_binary(
257
+ name = "benchmarks",
258
+ srcs = ["gemma/benchmarks.cc"],
259
+ deps = [
260
+ ":benchmark_helper",
261
+ "@benchmark//:benchmark",
262
+ ],
263
+ )
264
+
265
+ cc_binary(
266
+ name = "debug_prompt",
267
+ srcs = [
268
+ "debug_prompt.cc",
269
+ ],
270
+ deps = [
271
+ ":app",
272
+ ":args",
273
+ ":benchmark_helper",
274
+ ":gemma_lib",
275
+ "//compression:io",
276
+ "@hwy//:hwy",
277
+ "@hwy//:thread_pool",
278
+ "@nlohmann_json//:json",
279
+ ],
280
+ )
281
+
282
+ cc_binary(
283
+ name = "gemma_mmlu",
284
+ srcs = ["gemma/run_mmlu.cc"],
285
+ deps = [
286
+ ":app",
287
+ ":args",
288
+ ":benchmark_helper",
289
+ ":gemma_lib",
290
+ "//compression:io",
291
+ "@hwy//:hwy",
292
+ "@hwy//:profiler",
293
+ "@hwy//:thread_pool",
294
+ "@nlohmann_json//:json",
295
+ ],
296
+ )
297
+
298
+ cc_library(
299
+ name = "prompt",
300
+ hdrs = ["backprop/prompt.h"],
301
+ deps = [],
302
+ )
303
+
304
+ cc_library(
305
+ name = "sampler",
306
+ hdrs = ["backprop/sampler.h"],
307
+ deps = [
308
+ ":prompt",
309
+ ],
310
+ )
311
+
312
+ cc_library(
313
+ name = "backprop",
314
+ srcs = [
315
+ "backprop/backward.cc",
316
+ "backprop/forward.cc",
317
+ ],
318
+ hdrs = [
319
+ "backprop/backward.h",
320
+ "backprop/backward-inl.h",
321
+ "backprop/forward.h",
322
+ "backprop/forward-inl.h",
323
+ ],
324
+ deps = [
325
+ ":common",
326
+ ":gemma_lib",
327
+ ":ops",
328
+ ":prompt",
329
+ ":weights",
330
+ "@hwy//:hwy", # base.h
331
+ "@hwy//:thread_pool",
332
+ ],
333
+ )
334
+
335
+ cc_library(
336
+ name = "backprop_scalar",
337
+ hdrs = [
338
+ "backprop/backward_scalar.h",
339
+ "backprop/common_scalar.h",
340
+ "backprop/forward_scalar.h",
341
+ ],
342
+ deps = [
343
+ ":common",
344
+ ":gemma_lib",
345
+ ":prompt",
346
+ ":weights_raw",
347
+ ],
348
+ )
349
+
350
+ cc_test(
351
+ name = "backward_scalar_test",
352
+ size = "large",
353
+ srcs = [
354
+ "backprop/backward_scalar_test.cc",
355
+ "backprop/test_util.h",
356
+ ],
357
+ deps = [
358
+ ":backprop_scalar",
359
+ ":prompt",
360
+ ":sampler",
361
+ ":weights_raw",
362
+ "@googletest//:gtest_main",
363
+ ],
364
+ )
365
+
366
+ cc_test(
367
+ name = "backward_test",
368
+ size = "large",
369
+ srcs = [
370
+ "backprop/backward_test.cc",
371
+ "backprop/test_util.h",
372
+ ],
373
+ deps = [
374
+ ":backprop",
375
+ ":backprop_scalar",
376
+ ":gemma_lib",
377
+ ":ops",
378
+ ":sampler",
379
+ ":weights_raw",
380
+ "@googletest//:gtest_main",
381
+ "@hwy//:hwy",
382
+ "@hwy//:hwy_test_util",
383
+ "@hwy//:thread_pool",
384
+ ],
385
+ )
386
+
387
+ cc_library(
388
+ name = "optimizer",
389
+ srcs = [
390
+ "backprop/optimizer.cc",
391
+ ],
392
+ hdrs = [
393
+ "backprop/optimizer.h",
394
+ ],
395
+ deps = [
396
+ ":common",
397
+ ":weights",
398
+ "//compression:compress",
399
+ "@hwy//:hwy",
400
+ "@hwy//:thread_pool",
401
+ ],
402
+ )
403
+
404
+ cc_test(
405
+ name = "optimize_test",
406
+ srcs = [
407
+ "backprop/optimize_test.cc",
408
+ ],
409
+ deps = [
410
+ ":backprop",
411
+ ":common",
412
+ ":gemma_lib",
413
+ ":optimizer",
414
+ ":prompt",
415
+ ":sampler",
416
+ ":weights",
417
+ "@googletest//:gtest_main",
418
+ "@hwy//:thread_pool",
419
+ ],
420
+ )
gemma.cpp/CMakeLists.txt ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2019 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ cmake_minimum_required(VERSION 3.11)
16
+
17
+ include(FetchContent)
18
+
19
+ project(gemma)
20
+
21
+ set(CMAKE_CXX_STANDARD 17)
22
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
23
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
24
+
25
+ FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 457c891775a7397bdb0376bb1031e6e027af1c48 EXCLUDE_FROM_ALL)
26
+ FetchContent_MakeAvailable(highway)
27
+
28
+ ## Note: absl needs to be installed by sentencepiece. This will only happen if
29
+ ## cmake is invoked with -DSPM_ENABLE_SHARED=OFF and -DSPM_ABSL_PROVIDER=module
30
+ FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c EXCLUDE_FROM_ALL)
31
+ FetchContent_MakeAvailable(sentencepiece)
32
+
33
+ FetchContent_Declare(json GIT_REPOSITORY https://github.com/nlohmann/json.git GIT_TAG 9cca280a4d0ccf0c08f47a99aa71d1b0e52f8d03 EXCLUDE_FROM_ALL)
34
+ FetchContent_MakeAvailable(json)
35
+
36
+ FetchContent_Declare(benchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG v1.8.2 EXCLUDE_FROM_ALL)
37
+ FetchContent_MakeAvailable(benchmark)
38
+
39
+ set(SOURCES
40
+ compression/blob_store.cc
41
+ compression/blob_store.h
42
+ compression/compress.h
43
+ compression/compress-inl.h
44
+ compression/io_win.cc
45
+ compression/io.cc
46
+ compression/io.h
47
+ compression/nuq.h
48
+ compression/nuq-inl.h
49
+ compression/sfp.h
50
+ compression/sfp-inl.h
51
+ compression/test_util.h
52
+ backprop/backward.cc
53
+ backprop/backward.h
54
+ backprop/backward-inl.h
55
+ backprop/backward_scalar.h
56
+ backprop/common_scalar.h
57
+ backprop/forward.cc
58
+ backprop/forward.h
59
+ backprop/forward-inl.h
60
+ backprop/forward_scalar.h
61
+ backprop/optimizer.cc
62
+ backprop/optimizer.h
63
+ gemma/configs.h
64
+ gemma/activations.h
65
+ gemma/benchmark_helper.cc
66
+ gemma/benchmark_helper.h
67
+ gemma/common.cc
68
+ gemma/common.h
69
+ gemma/cross_entropy.cc
70
+ gemma/cross_entropy.h
71
+ gemma/gemma.cc
72
+ gemma/gemma.h
73
+ gemma/ops.h
74
+ gemma/weights.cc
75
+ gemma/weights.h
76
+ gemma/weights_raw.h
77
+ util/app.h
78
+ util/args.h
79
+ )
80
+
81
+ if(NOT CMAKE_BUILD_TYPE)
82
+ set(CMAKE_BUILD_TYPE "Release")
83
+ endif()
84
+
85
+ FetchContent_GetProperties(sentencepiece)
86
+
87
+ ## Library Target
88
+
89
+ add_library(libgemma ${SOURCES})
90
+ set_property(TARGET libgemma PROPERTY CXX_STANDARD 17)
91
+ set_target_properties(libgemma PROPERTIES PREFIX "")
92
+ set_property(TARGET libgemma PROPERTY POSITION_INDEPENDENT_CODE ON)
93
+ target_include_directories(libgemma PUBLIC ./)
94
+ target_link_libraries(libgemma hwy hwy_contrib sentencepiece-static)
95
+ target_include_directories(libgemma PUBLIC ${sentencepiece_SOURCE_DIR})
96
+ target_compile_definitions(libgemma PRIVATE $<$<PLATFORM_ID:Windows>:_CRT_SECURE_NO_WARNINGS NOMINMAX>)
97
+ target_compile_options(libgemma PRIVATE $<$<PLATFORM_ID:Windows>:-Wno-deprecated-declarations>)
98
+ install(TARGETS libgemma DESTINATION lib)
99
+
100
+ # Executable Target
101
+
102
+ add_executable(gemma gemma/run.cc)
103
+ target_link_libraries(gemma libgemma hwy hwy_contrib)
104
+ install(TARGETS gemma DESTINATION bin)
105
+
106
+ add_executable(single_benchmark gemma/benchmark.cc)
107
+ target_link_libraries(single_benchmark libgemma hwy hwy_contrib nlohmann_json::nlohmann_json)
108
+
109
+ add_executable(benchmarks gemma/benchmarks.cc)
110
+ target_link_libraries(benchmarks libgemma hwy hwy_contrib nlohmann_json::nlohmann_json benchmark)
111
+
112
+ add_executable(debug_prompt debug_prompt.cc)
113
+ target_link_libraries(debug_prompt libgemma hwy hwy_contrib nlohmann_json::nlohmann_json)
114
+
115
+ ## Tests
116
+ set(GEMMA_ENABLE_TESTS OFF CACHE BOOL "Enable Gemma tests")
117
+ if (GEMMA_ENABLE_TESTS)
118
+
119
+ enable_testing()
120
+ include(GoogleTest)
121
+
122
+ set(GEMMA_TEST_FILES
123
+ backprop/backward_test.cc
124
+ backprop/backward_scalar_test.cc
125
+ backprop/optimize_test.cc
126
+ gemma/ops_test.cc
127
+ gemma/gemma_test.cc
128
+ )
129
+
130
+ foreach (TESTFILE IN LISTS GEMMA_TEST_FILES)
131
+ # The TESTNAME is the name without the extension or directory.
132
+ get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
133
+ add_executable(${TESTNAME} ${TESTFILE})
134
+ # Test all targets, not just the best/baseline. This changes the default
135
+ # policy to all-attainable; note that setting -DHWY_COMPILE_* directly can
136
+ # cause compile errors because only one may be set, and other CMakeLists.txt
137
+ # that include us may set them.
138
+ target_compile_options(${TESTNAME} PRIVATE -DHWY_IS_TEST=1)
139
+
140
+ target_link_libraries(${TESTNAME} PRIVATE libgemma GTest::gtest_main hwy hwy_contrib hwy_test)
141
+
142
+ gtest_discover_tests(${TESTNAME})
143
+ endforeach ()
144
+ endif() # GEMMA_ENABLE_TESTS
145
+
146
+ ## Tools
147
+
148
+ add_executable(compress_weights gemma/compress_weights.cc)
149
+ target_link_libraries(compress_weights libgemma hwy hwy_contrib)
gemma.cpp/CMakePresets.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 3,
3
+ "cmakeMinimumRequired": {
4
+ "major": 3,
5
+ "minor": 11,
6
+ "patch": 0
7
+ },
8
+ "configurePresets": [
9
+ {
10
+ "name": "__defaults__",
11
+ "hidden": true,
12
+ "binaryDir": "${sourceDir}/build"
13
+ },
14
+ {
15
+ "name": "make",
16
+ "inherits": "__defaults__",
17
+ "displayName": "Make",
18
+ "description": "Unix Makefiles",
19
+ "generator": "Unix Makefiles",
20
+ "binaryDir": "${sourceDir}/build"
21
+ },
22
+ {
23
+ "name": "windows",
24
+ "inherits": "__defaults__",
25
+ "displayName": "Windows",
26
+ "description": "Visual Studio 2022 with Clang/LLVM frontend",
27
+ "generator": "Visual Studio 17 2022",
28
+ "toolset": "ClangCL",
29
+ "condition": {
30
+ "type": "equals",
31
+ "lhs": "${hostSystemName}",
32
+ "rhs": "Windows"
33
+ }
34
+ }
35
+ ],
36
+ "buildPresets": [
37
+ {
38
+ "name": "__defaults__",
39
+ "hidden": true,
40
+ "targets": [
41
+ "gemma",
42
+ "libgemma"
43
+ ]
44
+ },
45
+ {
46
+ "name": "make",
47
+ "inherits": "__defaults__",
48
+ "displayName": "Unix Makefiles",
49
+ "configurePreset": "make"
50
+ },
51
+ {
52
+ "name": "windows",
53
+ "inherits": "__defaults__",
54
+ "displayName": "Windows",
55
+ "configuration": "Release",
56
+ "configurePreset": "windows"
57
+ }
58
+ ]
59
+ }
gemma.cpp/DEVELOPERS.md ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Developer Notes
2
+
3
+ ## Motivation: A Minimalist C++ LLM Runtime for Research and Experimentation
4
+
5
+ In the past, neural network inference has been similar to a simple, opaque,
6
+ stateless function function with a single input and output. By contrast,
7
+ foundation model runtimes are better considered as systems with multiple forms
8
+ of state, subsystems, and heterogeneous inputs and outputs. They are often
9
+ integrated with a wide variety of other systems that have their own resources
10
+ (e.g. RAG and tools) and potentially interact with an external environment. They
11
+ have become compute engines to embed proximal tasks and goals within expansively
12
+ broad, general-purpose world models.
13
+
14
+ With this in mind, we believe that developing an experimental runtime that is
15
+ flexible and approachable will allow us to explore the design space of co-design
16
+ between high level model concerns and low-level runtime computation.
17
+
18
+ ## Design Priorities
19
+
20
+ Given these motivations, we propose the following priorities for
21
+ making decisions regarding the direction and design of the codebase.
22
+
23
+ **Maximize Leverage with a Narrow Scope.** We focus on direct implementations of
24
+ foundation models like Gemma. This allows us to focus effort on bottlenecks of
25
+ specific models. We are willing to trade off generality to keep implementation
26
+ code relatively simple and readable at all layers of the stack, achieve good
27
+ performance, and maintain the velocity of a small team.
28
+
29
+ **Data Oriented Design.** Follow data oriented design principles where possible
30
+ to minimize unnecessary performance pessimization. It's best to apply these
31
+ optimizations during the initial design, or when refactoring a subcomponent. The
32
+ first step is to think in terms of batches or tuples of plain old data (POD)
33
+ types: separate arrays, instead of an array of structs. The second is to
34
+ de-emphasize control flow (if statements, virtual functions and class
35
+ hierarchies). The third step is to know intrinsic properties of data and bake
36
+ that into the layout and algorithm.
37
+
38
+ **Prioritize Small Batch Latency** Since production serving solutions are
39
+ available for large-scale serving powered by accelerators and optimizing for
40
+ throughput, this project focuses on the possibilities of local, interactive use
41
+ of foundation models. Although throughput remains important, low latency and
42
+ small batch sizes are prioritized, other things being equal.
43
+
44
+ **Maintain a Portable Baseline** Our starting point is a portable CPU SIMD (via
45
+ [highway](https://github.com/google/highway)). We expect to add accelerator and
46
+ hybrid CPU/GPU support in the future, but the project should continue to allow
47
+ builds using this portable baseline. This ensures that research-oriented and
48
+ experimental runtimes and hardware platforms will have a minimum viable option
49
+ to run Gemma even if specialized production-ready deployment paths are not
50
+ available.
51
+
52
+ ## Code Organization
53
+
54
+ The implementation code is roughly split into 4 layers, from high to low level:
55
+
56
+ 1. Frontends (`run.cc`) - Either interactive interfaces or automation
57
+ orchestration that interacts. Frontend code implements a use case objective
58
+ in terms of invocations to model inference and generation (2). Projects that
59
+ use gemma.cpp as a library are considered alternative frontends to `run.cc`.
60
+ We will add examples of additional frontends in the future.
61
+
62
+ 2. Models (`gemma.cc`, `gemma.h`, `configs.h`) - Implements the compute graph
63
+ of the model including supporting functions such as loading and compressing
64
+ weights using transformer operations provided by layer (3).
65
+
66
+ 3. Operations (`ops.h`) - A minimal set of transformer and supporting
67
+ mathematical operations implementations using compute backends (4). This
68
+ code should be agnostic to the specifics of the compute graph of the model
69
+ implementation (2).
70
+
71
+ 4. Backend (`highway`) - Low-level hardware interface (SIMD in the case of
72
+ highway) supporting the implementations in (3).
73
+
74
+ Besides these layers, supporting utilities are:
75
+
76
+ - `compression/` - model compression operations. The 8-bit switched floating
77
+ point model conversion is here.
78
+ - `util/` - command line argument handling and any other utilities.
79
+
80
+ ## Style and Formatting
81
+
82
+ A `.clang-format` configuration is provided with our defaults, please run source
83
+ files through `clang-format` (or a formatter that produces equivalent behavior)
84
+ before finalizing PR for submission.
85
+
86
+ ## Converting weights
87
+
88
+ We use a stripped down binary blob (.sbs) artifact to accelerate weight loading
89
+ in C++. These files can be downloaded directly from Kaggle and HuggingFace. You
90
+ can also convert Pytorch or Keras checkpoints to .sbs, but most end users should
91
+ not have to do this.
92
+
93
+ If starting with Keras, first run this script to convert to Pytorch:
94
+ https://github.com/keras-team/keras-nlp/blob/master/tools/gemma/export_gemma_to_torch_xla.py
95
+
96
+ From Pytorch, use the following script to generate uncompressed weights:
97
+ https://github.com/google/gemma.cpp/blob/dev/util/convert_weights.py
98
+
99
+ Then run gemma/compress_weights.cc (Bazel target :compress_weights), specifying
100
+ the resulting file as `--weights` and the desired .sbs name as the
101
+ `--compressed_weights`.
102
+
103
+ ## Compile-Time Flags (Advanced)
104
+
105
+ There are several compile-time flags to be aware of (note these may or may not
106
+ be exposed to the build system):
107
+
108
+ - `GEMMA_MAX_SEQ_LEN` : Sets maximum sequence length to preallocate for the KV
109
+ Cache. The default is 4096 tokens but can be overridden. This is not exposed
110
+ through `CMakeLists.txt` yet.
111
+
112
+ In the medium term this will likely be deprecated in favor of handling options
113
+ at runtime - dynamically resizing the KV cache as needed.
114
+
115
+ ## Using gemma.cpp as a Library (Advanced)
116
+
117
+ Unless you are doing lower level implementations or research, from an
118
+ application standpoint you can think of gemma.h and gemma.cc as the "core" of
119
+ the library.
120
+
121
+ You can regard `run.cc` as an example application that your own application is
122
+ substituting for, so the invocations into gemma.h and gemma.cc you see in
123
+ `run.cc` are probably the functions you'll be invoking. You can find examples of
124
+ the invocations to tokenizer methods and `Generate()` in `run.cc`.
125
+
126
+ Keep in mind gemma.cpp is oriented at more experimental / prototype / research
127
+ applications. If you're targeting production, there's more standard paths via
128
+ jax / pytorch / keras / XNNPACK for NN deployments.
129
+
130
+ ### Gemma struct contains all the state of the inference engine - tokenizer, weights, and activations
131
+
132
+ `Gemma(...)` - constructor, creates a gemma model object.
133
+
134
+ In a standard LLM chat app, you'll probably use a Gemma object directly, in
135
+ more exotic data processing or research applications, you might decompose
136
+ working with weights, kv cache and activations (e.g. you might have multiple kv
137
+ caches and activations for a single set of weights) more directly rather than
138
+ only using a Gemma object.
139
+
140
+ ### Use the tokenizer in the Gemma object (or interact with the Tokenizer object directly)
141
+
142
+ The Gemma object contains contains a pointer to a Tokenizer object. The main
143
+ operations performed on the tokenizer are to load the tokenizer model from a
144
+ file (usually `tokenizer.spm`), call `Encode()` to go from string prompts to
145
+ token id vectors, or `Decode()` to go from token id vector outputs from the
146
+ model back to strings. `benchmark_helper.h` provides wrapper functions that make
147
+ them easier to use.
148
+
149
+ ### `model.Generate()` is the entrypoint for token generation
150
+
151
+ Calling into `model.Generate` with a tokenized prompt will
152
+
153
+ 1. mutate the activation values in `model` and
154
+ 2. invoke `StreamFunc` - a lambda callback for each generated token.
155
+
156
+ Your application defines its own `StreamFunc` as a lambda callback to do
157
+ something every time a token string is streamed from the engine (e.g., print to
158
+ the screen, write data to the disk, send the string to a server, etc.). You can
159
+ see in `run.cc` the `StreamFunc` lambda takes care of printing each token to the
160
+ screen as it arrives.
161
+
162
+ Optionally you can define `accept_token` as another lambda - this is mostly for
163
+ constrained decoding type of use cases where you want to force the generation to
164
+ fit a grammar. If you're not doing this, you can send an empty lambda or
165
+ `std::function` as a no-op which is what `run.cc` does.
166
+
167
+ ### `Transformer()` implements the inference (i.e. `forward()` method in PyTorch or Jax) computation of the neural network
168
+
169
+ For high-level applications, you might only call `model.Generate()` and never
170
+ interact directly with the neural network, but if you're doing something a bit
171
+ more custom you can call transformer which performs a single inference operation
172
+ on a single token and mutates the Activations and the KVCache through the neural
173
+ network computation.
174
+
175
+ Note that an experimental backward pass is available in backprop/, which may be
176
+ useful for fine tuning.
177
+
178
+ ### For low level operations, defining new architectures, call `ops.h` functions directly
179
+
180
+ You use `ops.h` if you're writing other NN architectures or modifying the
181
+ inference path of the Gemma model.
182
+
183
+ ## Building with Bazel
184
+
185
+ The sentencepiece library we depend on requires some additional work to build
186
+ with the Bazel build system. First, it does not export its BUILD file, so we
187
+ provide `bazel/sentencepiece.bazel`. Second, it ships with a vendored subset of
188
+ the Abseil library. `bazel/sentencepiece.patch` changes the code to support
189
+ Abseil as a standalone dependency without third_party/ prefixes, similar to the
190
+ transforms we apply to Gemma via Copybara.
191
+
192
+ ## Debugging
193
+
194
+ At the first sign of incorrect or unexpected results, we recommend running with
195
+ ASan/MSan enabled. When using blaze/bazel, you can add `--config=asan` or
196
+ `--config=msan-track-origins` to the build command. In addition to their checks
197
+ for memory overruns or uninitialized memory, we also enable debug-only asserts
198
+ in Gemma.cpp for those build configurations.
199
+
200
+ ## Discord
201
+
202
+ We're also trying out a discord server for discussion here -
203
+ https://discord.gg/H5jCBAWxAe
gemma.cpp/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
gemma.cpp/LICENSE-BSD3 ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) The gemma.cpp Project Authors. All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without modification,
4
+ are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this
7
+ list of conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+
13
+ 3. Neither the name of the copyright holder nor the names of its
14
+ contributors may be used to endorse or promote products derived from
15
+ this software without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
gemma.cpp/MODULE.bazel ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module(
2
+ name = "gemma",
3
+ version = "0.1.0",
4
+ )
5
+
6
+ bazel_dep(name = "rules_license", version = "0.0.7")
7
+ bazel_dep(name = "googletest", version = "1.14.0")
8
+
9
+ # Copied from Highway because Bazel does not load them transitively
10
+ bazel_dep(name = "bazel_skylib", version = "1.4.1")
11
+ bazel_dep(name = "rules_cc", version = "0.0.9")
12
+ bazel_dep(name = "platforms", version = "0.0.7")
13
+
14
+ http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
15
+
16
+ http_archive(
17
+ name = "hwy",
18
+ urls = ["https://github.com/google/highway/archive/refs/tags/1.2.0.zip"],
19
+ integrity = "sha256-fbtKAGj5hhhBr5Bggtsrj4aIodC2OHb1njB8LGfom8A=", strip_prefix = "highway-1.2.0",
20
+ )
21
+
22
+ http_archive(
23
+ name = "nlohmann_json",
24
+ urls = ["https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.zip"],
25
+ integrity = "sha256-BAIrBdgG61/3MCPCgLaGl9Erk+G3JnoLIqGjnsdXgGk=",
26
+ strip_prefix = "json-3.11.3",
27
+ )
28
+
29
+ http_archive(
30
+ name = "com_google_sentencepiece",
31
+ sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
32
+ strip_prefix = "sentencepiece-0.1.96",
33
+ urls = ["https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip"],
34
+ build_file = "@//bazel:sentencepiece.bazel",
35
+ patches = ["@//bazel:sentencepiece.patch"],
36
+ patch_args = ["-p1"],
37
+ )
38
+
39
+ # For sentencepiece
40
+ http_archive(
41
+ name = "darts_clone",
42
+ build_file_content = """
43
+ licenses(["notice"])
44
+ exports_files(["LICENSE"])
45
+ package(default_visibility = ["//visibility:public"])
46
+ cc_library(
47
+ name = "darts_clone",
48
+ hdrs = [
49
+ "include/darts.h",
50
+ ],
51
+ )
52
+ """,
53
+ sha256 = "c97f55d05c98da6fcaf7f9ecc6a6dc6bc5b18b8564465f77abff8879d446491c",
54
+ strip_prefix = "darts-clone-e40ce4627526985a7767444b6ed6893ab6ff8983",
55
+ urls = [
56
+ "https://github.com/s-yata/darts-clone/archive/e40ce4627526985a7767444b6ed6893ab6ff8983.zip",
57
+ ],
58
+ )
59
+ # ABSL on 2023-10-18
60
+ http_archive(
61
+ name = "com_google_absl",
62
+ sha256 = "f841f78243f179326f2a80b719f2887c38fe226d288ecdc46e2aa091e6aa43bc",
63
+ strip_prefix = "abseil-cpp-9687a8ea750bfcddf790372093245a1d041b21a3",
64
+ urls = ["https://github.com/abseil/abseil-cpp/archive//9687a8ea750bfcddf790372093245a1d041b21a3.tar.gz"],
65
+ )
66
+ # Benchmark
67
+ http_archive(
68
+ name = "benchmark",
69
+ urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.8.2.tar.gz"],
70
+ integrity = "sha256-KqspgNA3YTf5adkoSPu2gharsHYzA0U0/IxlzE56DpM=",
71
+ strip_prefix = "benchmark-1.8.2",
72
+ )
gemma.cpp/README.md ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gemma.cpp
2
+
3
+ gemma.cpp is a lightweight, standalone C++ inference engine for the Gemma
4
+ foundation models from Google.
5
+
6
+ For additional information about Gemma, see
7
+ [ai.google.dev/gemma](https://ai.google.dev/gemma). Model weights, including gemma.cpp
8
+ specific artifacts, are [available on
9
+ kaggle](https://www.kaggle.com/models/google/gemma).
10
+
11
+ NOTE: 2024-04-04: if using 2B models, please re-download weights from Kaggle and
12
+ ensure you have the latest version (-mqa or version 3). We are changing the code
13
+ to match the new weights. If you wish to use old weights, change `ConfigGemma2B`
14
+ in `configs.h` back to `kVocabSize = 256128` and `kKVHeads = 8`.
15
+
16
+ ## Who is this project for?
17
+
18
+ Modern LLM inference engines are sophisticated systems, often with bespoke
19
+ capabilities extending beyond traditional neural network runtimes. With this
20
+ comes opportunities for research and innovation through co-design of high level
21
+ algorithms and low-level computation. However, there is a gap between
22
+ deployment-oriented C++ inference runtimes, which are not designed for
23
+ experimentation, and Python-centric ML research frameworks, which abstract away
24
+ low-level computation through compilation.
25
+
26
+ gemma.cpp provides a minimalist implementation of Gemma 2B and 7B models,
27
+ focusing on simplicity and directness rather than full generality. This is
28
+ inspired by vertically-integrated model implementations such as
29
+ [ggml](https://github.com/ggerganov/ggml),
30
+ [llama.c](https://github.com/karpathy/llama2.c), and
31
+ [llama.rs](https://github.com/srush/llama2.rs).
32
+
33
+ gemma.cpp targets experimentation and research use cases. It is intended to be
34
+ straightforward to embed in other projects with minimal dependencies and also
35
+ easily modifiable with a small ~2K LoC core implementation (along with ~4K LoC
36
+ of supporting utilities). We use the [Google
37
+ Highway](https://github.com/google/highway) Library to take advantage of
38
+ portable SIMD for CPU inference.
39
+
40
+ For production-oriented edge deployments we recommend standard deployment
41
+ pathways using Python frameworks like JAX, Keras, PyTorch, and Transformers
42
+ ([all model variations here](https://www.kaggle.com/models/google/gemma)).
43
+
44
+ ## Contributing
45
+
46
+ Community contributions large and small are welcome. See
47
+ [DEVELOPERS.md](https://github.com/google/gemma.cpp/blob/main/DEVELOPERS.md)
48
+ for additional notes contributing developers and [join the discord by following
49
+ this invite link](https://discord.gg/H5jCBAWxAe). This project follows
50
+ [Google's Open Source Community
51
+ Guidelines](https://opensource.google.com/conduct/).
52
+
53
+ *Active development is currently done on the `dev` branch. Please open pull
54
+ requests targeting `dev` branch instead of `main`, which is intended to be more
55
+ stable.*
56
+
57
+ ## Quick Start
58
+
59
+ ### System requirements
60
+
61
+ Before starting, you should have installed:
62
+
63
+ - [CMake](https://cmake.org/)
64
+ - [Clang C++ compiler](https://clang.llvm.org/get_started.html), supporting at
65
+ least C++17.
66
+ - `tar` for extracting archives from Kaggle.
67
+
68
+ Building natively on Windows requires the Visual Studio 2012 Build Tools with the
69
+ optional Clang/LLVM C++ frontend (`clang-cl`). This can be installed from the
70
+ command line with
71
+ [`winget`](https://learn.microsoft.com/en-us/windows/package-manager/winget/):
72
+
73
+ ```sh
74
+ winget install --id Kitware.CMake
75
+ winget install --id Microsoft.VisualStudio.2022.BuildTools --force --override "--passive --wait --add Microsoft.VisualStudio.Workload.VCTools;installRecommended --add Microsoft.VisualStudio.Component.VC.Llvm.Clang --add Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset"
76
+ ```
77
+
78
+ ### Step 1: Obtain model weights and tokenizer from Kaggle or Hugging Face Hub
79
+
80
+ Visit [the Gemma model page on
81
+ Kaggle](https://www.kaggle.com/models/google/gemma/frameworks/gemmaCpp) and select `Model Variations
82
+ |> Gemma C++`. On this tab, the `Variation` dropdown includes the options below.
83
+ Note bfloat16 weights are higher fidelity, while 8-bit switched floating point
84
+ weights enable faster inference. In general, we recommend starting with the
85
+ `-sfp` checkpoints.
86
+
87
+ Alternatively, visit the [gemma.cpp](https://huggingface.co/models?other=gemma.cpp)
88
+ models on the Hugging Face Hub. First go the the model repository of the model of interest
89
+ (see recommendations below). Then, click the `Files and versions` tab and download the
90
+ model and tokenizer files. For programmatic downloading, if you have `huggingface_hub`
91
+ installed, you can also download by running:
92
+
93
+ ```
94
+ huggingface-cli login # Just the first time
95
+ huggingface-cli download google/gemma-2b-sfp-cpp --local-dir build/
96
+ ```
97
+
98
+ 2B instruction-tuned (`it`) and pre-trained (`pt`) models:
99
+
100
+ | Model name | Description |
101
+ | ----------- | ----------- |
102
+ | `2b-it` | 2 billion parameter instruction-tuned model, bfloat16 |
103
+ | `2b-it-sfp` | 2 billion parameter instruction-tuned model, 8-bit switched floating point |
104
+ | `2b-pt` | 2 billion parameter pre-trained model, bfloat16 |
105
+ | `2b-pt-sfp` | 2 billion parameter pre-trained model, 8-bit switched floating point |
106
+
107
+ 7B instruction-tuned (`it`) and pre-trained (`pt`) models:
108
+
109
+ | Model name | Description |
110
+ | ----------- | ----------- |
111
+ | `7b-it` | 7 billion parameter instruction-tuned model, bfloat16 |
112
+ | `7b-it-sfp` | 7 billion parameter instruction-tuned model, 8-bit switched floating point |
113
+ | `7b-pt` | 7 billion parameter pre-trained model, bfloat16 |
114
+ | `7b-pt-sfp` | 7 billion parameter pre-trained model, 8-bit switched floating point |
115
+
116
+ > [!NOTE]
117
+ > **Important**: We strongly recommend starting off with the `2b-it-sfp` model to
118
+ > get up and running.
119
+
120
+ ### Step 2: Extract Files
121
+
122
+ If you downloaded the models from Hugging Face, skip to step 3.
123
+
124
+ After filling out the consent form, the download should proceed to retrieve a
125
+ tar archive file `archive.tar.gz`. Extract files from `archive.tar.gz` (this can
126
+ take a few minutes):
127
+
128
+ ```
129
+ tar -xf archive.tar.gz
130
+ ```
131
+
132
+ This should produce a file containing model weights such as `2b-it-sfp.sbs` and
133
+ a tokenizer file (`tokenizer.spm`). You may want to move these files to a
134
+ convenient directory location (e.g. the `build/` directory in this repo).
135
+
136
+ ### Step 3: Build
137
+
138
+ The build system uses [CMake](https://cmake.org/). To build the gemma inference
139
+ runtime, create a build directory and generate the build files using `cmake`
140
+ from the top-level project directory. Note if you previous ran `cmake` and are
141
+ re-running with a different setting, be sure to delete all files in the `build/`
142
+ directory with `rm -rf build/*`.
143
+
144
+ #### Unix-like Platforms
145
+ ```sh
146
+ cmake -B build
147
+ ```
148
+
149
+ After running `cmake`, you can enter the `build/` directory and run `make` to
150
+ build the `./gemma` executable:
151
+
152
+ ```sh
153
+ # Configure `build` directory
154
+ cmake --preset make
155
+
156
+ # Build project using make
157
+ cmake --build --preset make -j [number of parallel threads to use]
158
+ ```
159
+
160
+ Replace `[number of parallel threads to use]` with a number - the number of
161
+ cores available on your system is a reasonable heuristic. For example,
162
+ `make -j4 gemma` will build using 4 threads. If the `nproc` command is
163
+ available, you can use `make -j$(nproc) gemma` as a reasonable default
164
+ for the number of threads.
165
+
166
+ If you aren't sure of the right value for the `-j` flag, you can simply run
167
+ `make gemma` instead and it should still build the `./gemma` executable.
168
+
169
+ > [!NOTE]
170
+ > On Windows Subsystem for Linux (WSL) users should set the number of
171
+ > parallel threads to 1. Using a larger number may result in errors.
172
+
173
+ If the build is successful, you should now have a `gemma` executable in the `build/` directory.
174
+
175
+ #### Windows
176
+
177
+ ```sh
178
+ # Configure `build` directory
179
+ cmake --preset windows
180
+
181
+ # Build project using Visual Studio Build Tools
182
+ cmake --build --preset windows -j [number of parallel threads to use]
183
+ ```
184
+
185
+ If the build is successful, you should now have a `gemma.exe` executable in the `build/` directory.
186
+
187
+ #### Bazel
188
+
189
+ ```sh
190
+ bazel build -c opt --cxxopt=-std=c++20 :gemma
191
+ ```
192
+
193
+ If the build is successful, you should now have a `gemma` executable in the `bazel-bin/` directory.
194
+
195
+ #### Make
196
+
197
+ If you prefer Makefiles, @jart has made one available here:
198
+
199
+ https://github.com/jart/gemma3/blob/main/Makefile
200
+
201
+ ### Step 4: Run
202
+
203
+ You can now run `gemma` from inside the `build/` directory.
204
+
205
+ `gemma` has the following required arguments:
206
+
207
+ Argument | Description | Example value
208
+ --------------- | ---------------------------- | -----------------------
209
+ `--model` | The model type. | `2b-it` ... (see below)
210
+ `--weights` | The compressed weights file. | `2b-it-sfp.sbs`
211
+ `--weight_type` | The compressed weight type. | `sfp`
212
+ `--tokenizer` | The tokenizer file. | `tokenizer.spm`
213
+
214
+ `gemma` is invoked as:
215
+
216
+ ```sh
217
+ ./gemma \
218
+ --tokenizer [tokenizer file] \
219
+ --weights [compressed weights file] \
220
+ --weight_type [f32 or bf16 or sfp] \
221
+ --model [2b-it or 2b-pt or 7b-it or 7b-pt or ...]
222
+ ```
223
+
224
+ Example invocation for the following configuration:
225
+
226
+ - Compressed weights file `2b-it-sfp.sbs` (2B instruction-tuned model, 8-bit
227
+ switched floating point).
228
+ - Tokenizer file `tokenizer.spm`.
229
+
230
+ ```sh
231
+ ./gemma \
232
+ --tokenizer tokenizer.spm \
233
+ --weights 2b-it-sfp.sbs --weight_type sfp --model 2b-it
234
+ ```
235
+
236
+ ### RecurrentGemma
237
+
238
+ This repository includes a version of Gemma based on Griffin
239
+ ([paper](https://arxiv.org/abs/2402.19427),
240
+ [code](https://github.com/google-deepmind/recurrentgemma)). Its architecture
241
+ includes both recurrent layers and local attention, thus it is more efficient
242
+ for longer sequences and has a smaller memory footprint than standard Gemma. We
243
+ here provide a C++ implementation of this model based on the paper.
244
+
245
+ To use the recurrent version of Gemma included in this repository, build the
246
+ gemma binary as noted above in Step 3. Download the compressed weights and
247
+ tokenizer from the RecurrentGemma
248
+ [Kaggle](https://www.kaggle.com/models/google/recurrentgemma/gemmaCpp) as in
249
+ Step 1, and run the binary as follows:
250
+
251
+ `./gemma --tokenizer tokenizer.spm --model gr2b-it --weights 2b-it-sfp.sbs`
252
+
253
+ ### Troubleshooting and FAQs
254
+
255
+ **Running `./gemma` fails with "Failed to read cache gating_ein_0 (error 294) ..."**
256
+
257
+ The most common problem is that the `--weight_type` argument does not match that
258
+ of the model file. Revisit step #3 and check which weights you downloaded.
259
+
260
+ Note that we have already moved weight type from a compile-time decision to a
261
+ runtime argument. In a subsequent step, we plan to bake this information into
262
+ the weights.
263
+
264
+ **Problems building in Windows / Visual Studio**
265
+
266
+ Currently if you're using Windows, we recommend building in WSL (Windows
267
+ Subsystem for Linux). We are exploring options to enable other build
268
+ configurations, see issues for active discussion.
269
+
270
+ **Model does not respond to instructions and produces strange output**
271
+
272
+ A common issue is that you are using a pre-trained model, which is not
273
+ instruction-tuned and thus does not respond to instructions. Make sure you are
274
+ using an instruction-tuned model (`2b-it-sfp`, `2b-it`, `7b-it-sfp`, `7b-it`)
275
+ and not a pre-trained model (any model with a `-pt` suffix).
276
+
277
+ **How do I convert my fine-tune to a `.sbs` compressed model file?**
278
+
279
+ We're working on a python script to convert a standard model format to `.sbs`,
280
+ and hope have it available in the next week or so. Follow [this
281
+ issue](https://github.com/google/gemma.cpp/issues/11) for updates.
282
+
283
+ **What are some easy ways to make the model run faster?**
284
+
285
+ 1. Make sure you are using the 8-bit switched floating point `-sfp` models.
286
+ 2. If you're on a laptop, make sure power mode is set to maximize performance
287
+ and saving mode is **off**. For most laptops, the power saving modes get
288
+ activated automatically if the computer is not plugged in.
289
+ 3. Close other unused cpu-intensive applications.
290
+ 4. On macs, anecdotally we observe a "warm-up" ramp-up in speed as performance
291
+ cores get engaged.
292
+ 5. Experiment with the `--num_threads` argument value. Depending on the device,
293
+ larger numbers don't always mean better performance.
294
+
295
+ We're also working on algorithmic and optimization approaches for faster
296
+ inference, stay tuned.
297
+
298
+ ## Usage
299
+
300
+ `gemma` has different usage modes, controlled by the verbosity flag.
301
+
302
+ All usage modes are currently interactive, triggering text generation upon
303
+ newline input.
304
+
305
+ | Verbosity | Usage mode | Details |
306
+ | --------------- | ---------- | --------------------------------------------- |
307
+ | `--verbosity 0` | Minimal | Only prints generation output. Suitable as a CLI tool. |
308
+ | `--verbosity 1` | Default | Standard user-facing terminal UI. |
309
+ | `--verbosity 2` | Detailed | Shows additional developer and debug info. |
310
+
311
+ ### Interactive Terminal App
312
+
313
+ By default, verbosity is set to 1, bringing up a terminal-based interactive
314
+ interface when `gemma` is invoked:
315
+
316
+ ```console
317
+ $ ./gemma [...]
318
+ __ _ ___ _ __ ___ _ __ ___ __ _ ___ _ __ _ __
319
+ / _` |/ _ \ '_ ` _ \| '_ ` _ \ / _` | / __| '_ \| '_ \
320
+ | (_| | __/ | | | | | | | | | | (_| || (__| |_) | |_) |
321
+ \__, |\___|_| |_| |_|_| |_| |_|\__,_(_)___| .__/| .__/
322
+ __/ | | | | |
323
+ |___/ |_| |_|
324
+
325
+ tokenizer : tokenizer.spm
326
+ compressed_weights : 2b-it-sfp.sbs
327
+ model : 2b-it
328
+ weights : [no path specified]
329
+ max_tokens : 3072
330
+ max_generated_tokens : 2048
331
+
332
+ *Usage*
333
+ Enter an instruction and press enter (%C reset conversation, %Q quits).
334
+
335
+ *Examples*
336
+ - Write an email to grandma thanking her for the cookies.
337
+ - What are some historical attractions to visit around Massachusetts?
338
+ - Compute the nth fibonacci number in javascript.
339
+ - Write a standup comedy bit about WebGPU programming.
340
+
341
+ > What are some outdoorsy places to visit around Boston?
342
+
343
+ [ Reading prompt ] .....................
344
+
345
+
346
+ **Boston Harbor and Islands:**
347
+
348
+ * **Boston Harbor Islands National and State Park:** Explore pristine beaches, wildlife, and maritime history.
349
+ * **Charles River Esplanade:** Enjoy scenic views of the harbor and city skyline.
350
+ * **Boston Harbor Cruise Company:** Take a relaxing harbor cruise and admire the city from a different perspective.
351
+ * **Seaport Village:** Visit a charming waterfront area with shops, restaurants, and a seaport museum.
352
+
353
+ **Forest and Nature:**
354
+
355
+ * **Forest Park:** Hike through a scenic forest with diverse wildlife.
356
+ * **Quabbin Reservoir:** Enjoy boating, fishing, and hiking in a scenic setting.
357
+ * **Mount Forest:** Explore a mountain with breathtaking views of the city and surrounding landscape.
358
+
359
+ ...
360
+ ```
361
+
362
+ ### Usage as a Command Line Tool
363
+
364
+ For using the `gemma` executable as a command line tool, it may be useful to
365
+ create an alias for gemma.cpp with arguments fully specified:
366
+
367
+ ```sh
368
+ alias gemma2b="~/gemma.cpp/build/gemma -- --tokenizer ~/gemma.cpp/build/tokenizer.spm --weights ~/gemma.cpp/build/2b-it-sfp.sbs --model 2b-it --verbosity 0"
369
+ ```
370
+
371
+ Replace the above paths with your own paths to the model and tokenizer paths
372
+ from the download.
373
+
374
+ Here is an example of prompting `gemma` with a truncated input
375
+ file (using a `gemma2b` alias like defined above):
376
+
377
+ ```sh
378
+ cat configs.h | tail -35 | tr '\n' ' ' | xargs -0 echo "What does this C++ code do: " | gemma2b
379
+ ```
380
+
381
+ > [!NOTE]
382
+ > CLI usage of gemma.cpp is experimental and should take context length
383
+ > limitations into account.
384
+
385
+ The output of the above command should look like:
386
+
387
+ ```console
388
+ $ cat configs.h | tail -35 | tr '\n' ' ' | xargs -0 echo "What does this C++ code do: " | gemma2b
389
+ [ Reading prompt ] ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
390
+ The code defines two C++ structs, `ConfigGemma7B` and `ConfigGemma2B`, which are used for configuring a deep learning model.
391
+
392
+ **ConfigGemma7B**:
393
+
394
+ * `kSeqLen`: Stores the length of the sequence to be processed. It's set to 7168.
395
+ * `kVocabSize`: Stores the size of the vocabulary, which is 256128.
396
+ * `kLayers`: Number of layers in the deep learning model. It's set to 28.
397
+ * `kModelDim`: Dimension of the model's internal representation. It's set to 3072.
398
+ * `kFFHiddenDim`: Dimension of the feedforward and recurrent layers' hidden representations. It's set to 16 * 3072 / 2.
399
+
400
+ **ConfigGemma2B**:
401
+
402
+ * `kSeqLen`: Stores the length of the sequence to be processed. It's also set to 7168.
403
+ * `kVocabSize`: Size of the vocabulary, which is 256128.
404
+ * `kLayers`: Number of layers in the deep learning model. It's set to 18.
405
+ * `kModelDim`: Dimension of the model's internal representation. It's set to 2048.
406
+ * `kFFHiddenDim`: Dimension of the feedforward and recurrent layers' hidden representations. It's set to 16 * 2048 / 2.
407
+
408
+ These structs are used to configure a deep learning model with specific parameters for either Gemma7B or Gemma2B architecture.
409
+ ```
410
+
411
+ ### Incorporating gemma.cpp as a Library in your Project
412
+
413
+ The easiest way to incorporate gemma.cpp in your own project is to pull in
414
+ gemma.cpp and dependencies using `FetchContent`. You can add the following to your
415
+ CMakeLists.txt:
416
+
417
+ ```
418
+ include(FetchContent)
419
+
420
+ FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c)
421
+ FetchContent_MakeAvailable(sentencepiece)
422
+
423
+ FetchContent_Declare(gemma GIT_REPOSITORY https://github.com/google/gemma.cpp GIT_TAG origin/main)
424
+ FetchContent_MakeAvailable(gemma)
425
+
426
+ FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG da250571a45826b21eebbddc1e50d0c1137dee5f)
427
+ FetchContent_MakeAvailable(highway)
428
+ ```
429
+
430
+ Note for the gemma.cpp `GIT_TAG`, you may replace `origin/main` for a specific
431
+ commit hash if you would like to pin the library version.
432
+
433
+ After your executable is defined (substitute your executable name for
434
+ `[Executable Name]` below):
435
+
436
+ ```
437
+ target_link_libraries([Executable Name] libgemma hwy hwy_contrib sentencepiece)
438
+ FetchContent_GetProperties(gemma)
439
+ FetchContent_GetProperties(sentencepiece)
440
+ target_include_directories([Executable Name] PRIVATE ${gemma_SOURCE_DIR})
441
+ target_include_directories([Executable Name] PRIVATE ${sentencepiece_SOURCE_DIR})
442
+ ```
443
+
444
+ ### Building gemma.cpp as a Library
445
+
446
+ gemma.cpp can also be used as a library dependency in your own project. The
447
+ shared library artifact can be built by modifying the make invocation to build
448
+ the `libgemma` target instead of `gemma`.
449
+
450
+ > [!NOTE]
451
+ > If you are using gemma.cpp in your own project with the `FetchContent` steps
452
+ > in the previous section, building the library is done automatically by `cmake`
453
+ > and this section can be skipped.
454
+
455
+ First, run `cmake`:
456
+
457
+ ```sh
458
+ cmake -B build
459
+ ```
460
+
461
+ Then, run `make` with the `libgemma` target:
462
+
463
+ ```sh
464
+ cd build
465
+ make -j [number of parallel threads to use] libgemma
466
+ ```
467
+
468
+ If this is successful, you should now have a `libgemma` library file in the
469
+ `build/` directory. On Unix platforms, the filename is `libgemma.a`.
470
+
471
+ ## Independent Projects Using gemma.cpp
472
+
473
+ Some independent projects using gemma.cpp:
474
+
475
+ - [gemma-cpp-python - Python bindings](https://github.com/namtranase/gemma-cpp-python)
476
+ - [lua-cgemma - Lua bindings](https://github.com/ufownl/lua-cgemma)
477
+ - [Godot engine demo project](https://github.com/Rliop913/Gemma-godot-demo-project)
478
+
479
+ If you would like to have your project included, feel free to get in touch or
480
+ submit a PR with a `README.md` edit.
481
+
482
+ ## Acknowledgements and Contacts
483
+
484
+ gemma.cpp was started in fall 2023 by [Austin Huang](mailto:austinvhuang@google.com)
485
+ and [Jan Wassenberg](mailto:janwas@google.com), and subsequently released February 2024
486
+ thanks to contributions from Phil Culliton, Paul Chang, and Dan Zheng.
487
+
488
+ Griffin support was implemented in April 2024 thanks to contributions by Andrey
489
+ Mikhaylov, Eugene Kliuchnikov, Jan Wassenberg, Jyrki Alakuijala, Lode
490
+ Vandevenne, Luca Versari, Martin Bruse, Phil Culliton, Sami Boukortt, Thomas
491
+ Fischbacher and Zoltan Szabadka.
492
+
493
+ This is not an officially supported Google product.
gemma.cpp/WORKSPACE ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ workspace(name = "gemma")
2
+
3
+ # This file marks the root of the Bazel workspace.
4
+ # See MODULE.bazel for external dependencies setup.
gemma.cpp/backprop/backward-inl.h ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ // Implementation of the Vector-Jacobian Products (VJP) of the individual
17
+ // operations of the forward pass.
18
+
19
+ // Include guard for non-SIMD code.
20
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_INL_H_
21
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_INL_H_
22
+
23
+ #include <stddef.h>
24
+
25
+ #include <algorithm>
26
+ #include <cmath>
27
+
28
+ #include "backprop/prompt.h"
29
+ #include "gemma/activations.h"
30
+ #include "gemma/common.h"
31
+ #include "hwy/base.h"
32
+ #include "hwy/contrib/thread_pool/thread_pool.h"
33
+
34
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_INL_H_
35
+
36
+ // Include guard for (potentially) SIMD code.
37
+ #if defined(THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE) == defined(HWY_TARGET_TOGGLE)
38
+ #ifdef THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE
39
+ #undef THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE
40
+ #else
41
+ #define THIRD_PARTY_GEMMA_CPP_BACKWARD_TOGGLE
42
+ #endif
43
+
44
+ #include "gemma/ops.h"
45
+ #include "hwy/highway.h"
46
+
47
+ HWY_BEFORE_NAMESPACE();
48
+ namespace gcpp {
49
+ namespace HWY_NAMESPACE {
50
+ namespace hn = hwy::HWY_NAMESPACE;
51
+
52
+ template <size_t kCols, size_t kRows>
53
+ void MatMulVJP(const float* HWY_RESTRICT weights, // kRows * kCols,
54
+ const float* HWY_RESTRICT x, // num_tokens * kCols
55
+ const float* HWY_RESTRICT v, // num_tokens * kRows
56
+ size_t num_tokens,
57
+ float* HWY_RESTRICT grad_w, // kRows * kCols,
58
+ float* HWY_RESTRICT grad_x, // num_tokens * kCols
59
+ hwy::ThreadPool& pool) {
60
+ hwy::ZeroBytes(grad_x, num_tokens * kCols * sizeof(grad_x[0]));
61
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
62
+ const size_t voffs = pos * kRows;
63
+ const size_t xoffs = pos * kCols;
64
+ for (size_t j = 0; j < kRows; ++j) {
65
+ MulByConstAndAdd(v[voffs + j], &x[xoffs], &grad_w[j * kCols], kCols);
66
+ MulByConstAndAdd(v[voffs + j], &weights[j * kCols], &grad_x[xoffs],
67
+ kCols);
68
+ }
69
+ }
70
+ }
71
+
72
+ template <size_t kHeads, size_t kCols, size_t kRows>
73
+ void MultiHeadMatMulVJP(
74
+ const float* HWY_RESTRICT weights, // kHeads * kRows * kCols
75
+ const float* HWY_RESTRICT x, // num_tokens * kHeads * kCols
76
+ const float* HWY_RESTRICT v, // num_tokens * kRows
77
+ size_t num_tokens,
78
+ float* HWY_RESTRICT grad_w, // kHeads * kRows * kCols
79
+ float* HWY_RESTRICT grad_x, // num_tokens * kHeads * kCols
80
+ hwy::ThreadPool& pool) {
81
+ hwy::ZeroBytes(grad_x, num_tokens * kHeads * kCols * sizeof(grad_x[0]));
82
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
83
+ for (size_t j = 0; j < kRows; ++j) {
84
+ for (size_t h = 0; h < kHeads; ++h) {
85
+ MulByConstAndAdd(v[pos * kRows + j],
86
+ &x[pos * kHeads * kCols + h * kCols],
87
+ &grad_w[h * kRows * kCols + j * kCols], kCols);
88
+ MulByConstAndAdd(v[pos * kRows + j],
89
+ &weights[h * kRows * kCols + j * kCols],
90
+ &grad_x[pos * kHeads * kCols + h * kCols], kCols);
91
+ }
92
+ }
93
+ }
94
+ }
95
+
96
+ template <class D, HWY_IF_F32_D(D)>
97
+ static HWY_INLINE hn::Vec<D> DGelu(D d, hn::Vec<D> v) {
98
+ const hn::Vec<D> kMul = hn::Set(d, 0.044715f);
99
+ const hn::Vec<D> kSqrt2OverPi = hn::Set(d, 0.797884560804236f);
100
+ const hn::Vec<D> kHalf = hn::Set(d, 0.5f);
101
+ const hn::Vec<D> kOne = hn::Set(d, 1.0f);
102
+ // kSqrtOverPi*3*kMul
103
+ const hn::Vec<D> kMulv2 = hn::Set(d, 0.1070322244f);
104
+
105
+ const hn::Vec<D> v2 = hn::Mul(v, v);
106
+ const hn::Vec<D> v3 = hn::Mul(v2, v);
107
+ const hn::Vec<D> arg = hn::Mul(kSqrt2OverPi, hn::MulAdd(kMul, v3, v));
108
+ const hn::Vec<D> tanh = hn::Tanh(d, arg);
109
+ const hn::Vec<D> cdf = hn::MulAdd(kHalf, tanh, kHalf);
110
+ const hn::Vec<D> dtanh = hn::Sub(kOne, hn::Mul(tanh, tanh));
111
+ const hn::Vec<D> darg = hn::MulAdd(kMulv2, v2, kSqrt2OverPi);
112
+ return hn::MulAdd(kHalf, hn::Mul(v, hn::Mul(dtanh, darg)), cdf);
113
+ }
114
+
115
+ static HWY_NOINLINE void SoftmaxVJP(const float* HWY_RESTRICT forward,
116
+ float* HWY_RESTRICT backward,
117
+ const size_t size) {
118
+ namespace hn = hwy::HWY_NAMESPACE;
119
+ using D = hn::ScalableTag<float>;
120
+ const D d;
121
+
122
+ const auto offset =
123
+ hn::Set(d, hn::Dot::Compute<0>(d, forward, backward, size));
124
+ hn::Transform1(
125
+ d, backward, size, forward,
126
+ [&offset](const auto d, const auto v, const auto y)
127
+ HWY_ATTR { return hn::Mul(y, hn::Sub(v, offset)); });
128
+ }
129
+
130
+ static HWY_NOINLINE void RMSNormVJP(
131
+ const float* HWY_RESTRICT weights, const float* HWY_RESTRICT x,
132
+ const float* HWY_RESTRICT v, size_t model_dim, size_t num_tokens,
133
+ float* HWY_RESTRICT grad_w, float* HWY_RESTRICT grad_x,
134
+ hwy::ThreadPool& pool) {
135
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
136
+ const size_t offset = pos * model_dim;
137
+ constexpr float eps = 1e-6f;
138
+ float ss = SquaredL2(x + offset, model_dim);
139
+ ss = 1.0f / sqrtf(ss / StaticCast<float>(model_dim) + eps);
140
+ for (size_t i = 0; i < model_dim; ++i) {
141
+ grad_w[i] += v[offset + i] * x[offset + i] * ss;
142
+ }
143
+ const float ss3 = ss * ss * ss / StaticCast<float>(model_dim);
144
+ float tmp = 0.0f;
145
+ for (size_t i = 0; i < model_dim; ++i) {
146
+ tmp += (1.0f + weights[i]) * v[offset + i] * x[offset + i];
147
+ }
148
+ tmp *= ss3;
149
+ for (size_t i = 0; i < model_dim; ++i) {
150
+ grad_x[offset + i] = ss * (1.0f + weights[i]) * v[offset + i] -
151
+ tmp * x[offset + i];
152
+ }
153
+ }
154
+ }
155
+
156
+ static HWY_NOINLINE void InputEmbeddingVJP(
157
+ const float* weights, const std::vector<int>& prompt,
158
+ const float scaling, const float* HWY_RESTRICT v,
159
+ float* HWY_RESTRICT grad, size_t model_dim) {
160
+ HWY_ASSERT(!prompt.empty());
161
+ for (size_t pos = 0; pos < prompt.size() - 1; ++pos) {
162
+ int token = prompt[pos];
163
+ MulByConstAndAdd(scaling, v + pos * model_dim,
164
+ grad + token * model_dim, model_dim);
165
+ }
166
+ }
167
+
168
+ template <typename TConfig, template<typename> typename LayerT>
169
+ void LayerVJP(const LayerT<TConfig>& weights,
170
+ const ForwardLayer<float, TConfig>& forward,
171
+ const float* HWY_RESTRICT next_layer_grad,
172
+ size_t num_tokens,
173
+ LayerT<TConfig>& grad,
174
+ ForwardLayer<float, TConfig>& backward,
175
+ hwy::ThreadPool& pool) {
176
+ static constexpr size_t kModelDim = TConfig::kModelDim;
177
+ static constexpr size_t kQKVDim = TConfig::kQKVDim;
178
+ static constexpr size_t kHeads = TConfig::kHeads;
179
+ static constexpr size_t kSeqLen = TConfig::kSeqLen;
180
+ static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
181
+ static const float kQueryScale =
182
+ static_cast<float>(1.0 / sqrt(static_cast<double>(kQKVDim)));
183
+ HWY_ASSERT(num_tokens <= kSeqLen);
184
+
185
+ MatMulVJP<kFFHiddenDim, kModelDim>(
186
+ weights.linear_w.data(), forward.ffw_hidden_gated.data(), next_layer_grad,
187
+ num_tokens, grad.linear_w.data(), backward.ffw_hidden_gated.data(),
188
+ pool);
189
+
190
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
191
+ const size_t hidden_offset = pos * kFFHiddenDim * 2;
192
+ const float* HWY_RESTRICT f_out = forward.ffw_hidden.data() + hidden_offset;
193
+ const float* HWY_RESTRICT f_out_mul = f_out + kFFHiddenDim;
194
+ const float* HWY_RESTRICT b_out_gated =
195
+ backward.ffw_hidden_gated.data() + pos * kFFHiddenDim;
196
+ float* HWY_RESTRICT b_out = backward.ffw_hidden.data() + hidden_offset;
197
+ float* HWY_RESTRICT b_out_mul = b_out + kFFHiddenDim;
198
+ namespace hn = hwy::HWY_NAMESPACE;
199
+ using DF = hn::ScalableTag<float>;
200
+ using VF = hn::Vec<DF>;
201
+ DF df;
202
+ for (size_t i = 0; i < kFFHiddenDim; i += Lanes(df)) {
203
+ const auto y = Load(df, f_out + i);
204
+ const auto x = Load(df, f_out_mul + i);
205
+ const auto v = Load(df, b_out_gated + i);
206
+ hn::Store(hn::Mul(v, Gelu(df, y)), df, b_out_mul + i);
207
+ hn::Store(hn::Mul(v, hn::Mul(x, DGelu(df, y))), df, b_out + i);
208
+ }
209
+ }
210
+
211
+ MatMulVJP<kModelDim, kFFHiddenDim * 2>(
212
+ weights.gating_einsum_w.data(),
213
+ forward.bf_pre_ffw_rms_out.data(), backward.ffw_hidden.data(),
214
+ num_tokens, grad.gating_einsum_w.data(),
215
+ backward.bf_pre_ffw_rms_out.data(), pool);
216
+ RMSNormVJP(weights.pre_ffw_norm_scale.data(),
217
+ forward.attention_out.data(),
218
+ backward.bf_pre_ffw_rms_out.data(),
219
+ kModelDim, num_tokens,
220
+ grad.pre_ffw_norm_scale.data(),
221
+ backward.attention_out.data(), pool);
222
+
223
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
224
+ AddFrom(next_layer_grad + pos * kModelDim,
225
+ backward.attention_out.data() + pos * kModelDim, kModelDim);
226
+ }
227
+
228
+ hwy::ZeroBytes(backward.qkv.data(),
229
+ num_tokens * (kHeads + 2) * kQKVDim * sizeof(backward.qkv[0]));
230
+
231
+ MultiHeadMatMulVJP<kHeads, kQKVDim, kModelDim>(
232
+ weights.attn_vec_einsum_w.data(), forward.att_out.data(),
233
+ backward.attention_out.data(), num_tokens,
234
+ grad.attn_vec_einsum_w.data(), backward.att_out.data(), pool);
235
+
236
+ for (size_t head = 0; head < kHeads; ++head) {
237
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
238
+ const size_t aoffset = head * kSeqLen + pos * kHeads * kSeqLen;
239
+ const float* HWY_RESTRICT f_head_att = forward.att.data() + aoffset;
240
+ const float* HWY_RESTRICT b_att_out =
241
+ backward.att_out.data() + (pos * kHeads + head) * kQKVDim;
242
+ float* HWY_RESTRICT b_head_att = backward.att.data() + aoffset;
243
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
244
+ const size_t v2offs = (pos2 * (kHeads + 2) + kHeads + 1) * kQKVDim;
245
+ const float* HWY_RESTRICT f_v2 = forward.qkv.data() + v2offs;
246
+ float* HWY_RESTRICT b_v2 = backward.qkv.data() + v2offs;
247
+ b_head_att[pos2] = Dot(b_att_out, f_v2, kQKVDim);
248
+ MulByConstAndAdd(f_head_att[pos2], b_att_out, b_v2, kQKVDim);
249
+ }
250
+ }
251
+ }
252
+
253
+ for (size_t head = 0; head < kHeads; ++head) {
254
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
255
+ const size_t aoffset = head * kSeqLen + pos * kHeads * kSeqLen;
256
+ const float* HWY_RESTRICT f_head_att = forward.att.data() + aoffset;
257
+ float* HWY_RESTRICT b_head_att = backward.att.data() + aoffset;
258
+ SoftmaxVJP(f_head_att, b_head_att, pos + 1);
259
+ }
260
+ }
261
+
262
+ for (size_t head = 0; head < kHeads; ++head) {
263
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
264
+ const size_t qoffs = (pos * (kHeads + 2) + head) * kQKVDim;
265
+ const size_t aoffs = head * kSeqLen + pos * kHeads * kSeqLen;
266
+ const float* HWY_RESTRICT f_q = forward.qkv.data() + qoffs;
267
+ const float* HWY_RESTRICT b_head_att = backward.att.data() + aoffs;
268
+ float* HWY_RESTRICT b_q = backward.qkv.data() + qoffs;
269
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
270
+ const size_t k2offs = (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
271
+ const float* HWY_RESTRICT f_k2 = forward.qkv.data() + k2offs;
272
+ float* HWY_RESTRICT b_k2 = backward.qkv.data() + k2offs;
273
+ MulByConstAndAdd(b_head_att[pos2], f_k2, b_q, kQKVDim);
274
+ MulByConstAndAdd(b_head_att[pos2], f_q, b_k2, kQKVDim);
275
+ }
276
+ }
277
+ }
278
+
279
+ for (int pos = 0; pos < static_cast<int>(num_tokens); ++pos) {
280
+ float* HWY_RESTRICT b_kv =
281
+ backward.qkv.data() + (pos * (kHeads + 2) + kHeads) * kQKVDim;
282
+ Rope(b_kv, kQKVDim, -pos);
283
+ }
284
+
285
+ for (size_t head = 0; head < kHeads; ++head) {
286
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
287
+ float* HWY_RESTRICT b_q =
288
+ backward.qkv.data() + (pos * (kHeads + 2) + head) * kQKVDim;
289
+ MulByConst(kQueryScale, b_q, kQKVDim);
290
+ Rope(b_q, kQKVDim, -pos);
291
+ }
292
+ }
293
+
294
+ MatMulVJP<kModelDim, (kHeads + 2) * kQKVDim>(
295
+ weights.qkv_einsum_w.data(), forward.pre_att_rms_out.data(),
296
+ backward.qkv.data(), num_tokens,
297
+ grad.qkv_einsum_w.data(), backward.pre_att_rms_out.data(), pool);
298
+ RMSNormVJP(weights.pre_attention_norm_scale.data(),
299
+ forward.input.data(),
300
+ backward.pre_att_rms_out.data(),
301
+ kModelDim, num_tokens,
302
+ grad.pre_attention_norm_scale.data(),
303
+ backward.input.data(), pool);
304
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
305
+ AddFrom(backward.attention_out.data() + pos * kModelDim,
306
+ backward.input.data() + pos * kModelDim, kModelDim);
307
+ }
308
+ }
309
+
310
+ static HWY_NOINLINE void SoftcapVJP(const float* HWY_RESTRICT forward,
311
+ float* HWY_RESTRICT backward,
312
+ const float cap,
313
+ const size_t size) {
314
+ namespace hn = hwy::HWY_NAMESPACE;
315
+ using D = hn::ScalableTag<float>;
316
+ const D d;
317
+
318
+ const auto one = hn::Set(d, 1.0f);
319
+ const auto vcap = hn::Set(d, cap);
320
+ const auto vinv_cap = hn::Div(hn::Set(d, 1.0f), vcap);
321
+
322
+ // TODO(szabadka): Investigate what to do when the argmax is not unique.
323
+ // TODO(szabadka): Use IndexOfMax from hwy when it is available.
324
+ size_t imax = std::max_element(forward, forward + size) - forward;
325
+
326
+ hn::Transform1(
327
+ d, backward, size, forward,
328
+ [&](const auto d, const auto v, const auto y) HWY_ATTR {
329
+ const auto scaled = hn::Mul(vinv_cap, y);
330
+ return hn::Mul(v, hn::Sub(one, hn::Mul(scaled, scaled)));
331
+ });
332
+
333
+ backward[imax] = 0;
334
+ auto sum = hn::Zero(d);
335
+ Foreach(d, backward, size, sum,
336
+ [&sum](const auto d, const auto value) HWY_ATTR {
337
+ sum = hn::Add(sum, value);
338
+ });
339
+ backward[imax] = -hn::ReduceSum(d, sum);
340
+ }
341
+
342
+ static HWY_NOINLINE void CrossEntropyLossGrad(
343
+ const float* HWY_RESTRICT x, float* HWY_RESTRICT grad,
344
+ const Prompt& prompt, size_t vocab_size) {
345
+ HWY_ASSERT(!prompt.tokens.empty());
346
+ const float scaling = -1.0 / std::log(2.0);
347
+ size_t num_tokens = prompt.tokens.size() - 1;
348
+ hwy::ZeroBytes(grad, num_tokens * vocab_size * sizeof(grad[0]));
349
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
350
+ if (pos + 1 < prompt.context_size) {
351
+ continue;
352
+ }
353
+ const int next_token = prompt.tokens[pos + 1];
354
+ grad[pos * vocab_size + next_token] =
355
+ scaling / x[pos * vocab_size + next_token];
356
+ }
357
+ }
358
+
359
+ template <typename TConfig, template<typename...> typename WeightsT,
360
+ template<typename> typename LayerT>
361
+ void CrossEntropyLossBackwardPass(const Prompt& prompt,
362
+ const WeightsT<TConfig>& weights,
363
+ const ForwardPass<float, TConfig>& forward,
364
+ WeightsT<TConfig>& grad,
365
+ ForwardPass<float, TConfig>& backward,
366
+ hwy::ThreadPool& pool) {
367
+ static constexpr size_t kVocabSize = TConfig::kVocabSize;
368
+ static constexpr size_t kModelDim = TConfig::kModelDim;
369
+ static constexpr size_t kLayers = TConfig::kLayers;
370
+ const float kEmbScaling = EmbeddingScaling<TConfig>();
371
+ static_assert(!TConfig::kAbsolutePE);
372
+ static_assert(!TConfig::kPostNormScale);
373
+ static_assert(TConfig::kKVHeads == 1);
374
+
375
+ HWY_DASSERT(prompt.context_size > 0);
376
+ HWY_DASSERT(prompt.context_size < prompt.tokens.size());
377
+ const size_t num_tokens = prompt.tokens.size() - 1;
378
+
379
+ CrossEntropyLossGrad(forward.probs.data(), backward.logits.data(), prompt,
380
+ kVocabSize);
381
+
382
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
383
+ SoftmaxVJP(forward.probs.data() + pos * kVocabSize,
384
+ backward.logits.data() + pos * kVocabSize,
385
+ kVocabSize);
386
+ }
387
+
388
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
389
+ SoftcapVJP(forward.logits.data() + pos * kVocabSize,
390
+ backward.logits.data() + pos * kVocabSize, 30.0f, kVocabSize);
391
+ }
392
+
393
+ MatMulVJP<kModelDim, kVocabSize>(
394
+ weights.embedder_input_embedding.data(), forward.final_norm_output.data(),
395
+ backward.logits.data(), num_tokens,
396
+ grad.embedder_input_embedding.data(), backward.final_norm_output.data(),
397
+ pool);
398
+
399
+ RMSNormVJP(weights.final_norm_scale.data(),
400
+ forward.final_layer_output.data(),
401
+ backward.final_norm_output.data(),
402
+ kModelDim, num_tokens,
403
+ grad.final_norm_scale.data(),
404
+ backward.final_layer_output.data(), pool);
405
+
406
+ for (int layer = static_cast<int>(kLayers) - 1; layer >= 0; --layer) {
407
+ auto type = TConfig::kLayerConfig[layer];
408
+ // TODO(szabadka) Implement Griffin layer vjp.
409
+ HWY_ASSERT(type == LayerAttentionType::kGemma);
410
+ float* next_layer_grad = layer + 1 < kLayers
411
+ ? backward.layers[layer + 1].input.data()
412
+ : backward.final_layer_output.data();
413
+ LayerVJP<TConfig, LayerT>(
414
+ *weights.GetLayer(layer), forward.layers[layer], next_layer_grad,
415
+ num_tokens, *grad.GetLayer(layer), backward.layers[layer], pool);
416
+ }
417
+
418
+ InputEmbeddingVJP(weights.embedder_input_embedding.data(), prompt.tokens,
419
+ kEmbScaling, backward.layers[0].input.data(),
420
+ grad.embedder_input_embedding.data(), kModelDim);
421
+ }
422
+
423
+ // NOLINTNEXTLINE(google-readability-namespace-comments)
424
+ } // namespace HWY_NAMESPACE
425
+ } // namespace gcpp
426
+ HWY_AFTER_NAMESPACE();
427
+
428
+ #endif // NOLINT
gemma.cpp/backprop/backward.cc ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #include "backprop/backward.h"
17
+
18
+ #include "backprop/prompt.h"
19
+ #include "gemma/activations.h"
20
+ #include "gemma/common.h"
21
+ #include "hwy/contrib/thread_pool/thread_pool.h"
22
+
23
+ // Compiles this file for multiple architectures via "foreach_target.h", to
24
+ // which we pass the filename via macro 'argument'.
25
+ #undef HWY_TARGET_INCLUDE
26
+ #define HWY_TARGET_INCLUDE "backprop/backward.cc" // NOLINT
27
+ #include "hwy/foreach_target.h" // IWYU pragma: keep
28
+
29
+ #include "hwy/highway.h"
30
+ // After highway.h
31
+ #include "backprop/backward-inl.h"
32
+ #include "gemma/weights.h"
33
+
34
+ HWY_BEFORE_NAMESPACE();
35
+ namespace gcpp {
36
+ namespace HWY_NAMESPACE {
37
+
38
+ template <typename TConfig>
39
+ void CrossEntropyLossBackwardPass(const Prompt& prompt,
40
+ const ByteStorageT& weights_u8,
41
+ const ByteStorageT& forward_u8,
42
+ ByteStorageT& grad_u8,
43
+ ByteStorageT& backward_u8,
44
+ hwy::ThreadPool& pool) {
45
+ using TWeights = CompressedWeights<TConfig>;
46
+ const auto& weights = *reinterpret_cast<const TWeights*>(weights_u8.get());
47
+ auto& grad = *reinterpret_cast<TWeights*>(grad_u8.get());
48
+ using TAct = ForwardPass<float, TConfig>;
49
+ const auto& forward = *reinterpret_cast<const TAct*>(forward_u8.get());
50
+ auto& backward = *reinterpret_cast<TAct*>(backward_u8.get());
51
+ CrossEntropyLossBackwardPass<TConfig, CompressedWeights, CompressedLayer>(
52
+ prompt, weights, forward, grad, backward, pool);
53
+ }
54
+
55
+ void CrossEntropyLossBackwardPassT(Model model,
56
+ const Prompt& prompt,
57
+ const ByteStorageT& weights,
58
+ const ByteStorageT& forward,
59
+ ByteStorageT& grad,
60
+ ByteStorageT& backward,
61
+ hwy::ThreadPool& pool) {
62
+ // TODO(janwas): use CallFunctorForModel
63
+ switch (model) {
64
+ case Model::GEMMA_2B:
65
+ CrossEntropyLossBackwardPass<ConfigGemma2B<float>>(
66
+ prompt, weights, forward, grad, backward, pool);
67
+ break;
68
+ case Model::GEMMA_TINY:
69
+ CrossEntropyLossBackwardPass<ConfigGemmaTiny<float>>(
70
+ prompt, weights, forward, grad, backward, pool);
71
+ break;
72
+ default:
73
+ HWY_ABORT("Model type %d unknown.", static_cast<int>(model));
74
+ }
75
+ }
76
+
77
+ } // namespace HWY_NAMESPACE
78
+ } // namespace gcpp
79
+ HWY_AFTER_NAMESPACE();
80
+
81
+ #if HWY_ONCE
82
+ namespace gcpp {
83
+
84
+ HWY_EXPORT(CrossEntropyLossBackwardPassT);
85
+
86
+ void CrossEntropyLossBackwardPass(
87
+ const Model& model, const Prompt& prompt,
88
+ const ByteStorageT& weights, const ByteStorageT& forward,
89
+ ByteStorageT& grad, ByteStorageT& backward, hwy::ThreadPool& pool) {
90
+ return HWY_DYNAMIC_DISPATCH(CrossEntropyLossBackwardPassT)(
91
+ model, prompt, weights, forward, grad, backward, pool);
92
+ }
93
+
94
+ } // namespace gcpp
95
+ #endif // HWY_ONCE
gemma.cpp/backprop/backward.h ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_H_
18
+
19
+ #include "backprop/prompt.h"
20
+ #include "gemma/common.h"
21
+ #include "hwy/contrib/thread_pool/thread_pool.h"
22
+
23
+ namespace gcpp {
24
+
25
+ void CrossEntropyLossBackwardPass(
26
+ const Model& model, const Prompt& prompt,
27
+ const ByteStorageT& weights, const ByteStorageT& forward,
28
+ ByteStorageT& grad, ByteStorageT& backward, hwy::ThreadPool& pool);
29
+
30
+ } // namespace gcpp
31
+
32
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_H_
gemma.cpp/backprop/backward_scalar.h ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_SCALAR_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_SCALAR_H_
18
+
19
+ #include <stddef.h>
20
+ #include <string.h>
21
+
22
+ #include <cmath>
23
+ #include <vector>
24
+
25
+ #include "backprop/common_scalar.h"
26
+ #include "backprop/prompt.h"
27
+ #include "gemma/activations.h"
28
+ #include "gemma/common.h" // EmbeddingScaling
29
+ #include "gemma/weights_raw.h"
30
+
31
+ namespace gcpp {
32
+ template<typename T>
33
+ void MatMulVJPT(const T* w, const T* x, const T* dy, T* dw, T* dx,
34
+ size_t N, size_t M, size_t K) {
35
+ memset(dx, 0, M * K * sizeof(dx[0]));
36
+ for (size_t i = 0; i < K; ++i) {
37
+ for (size_t j = 0; j < N; ++j) {
38
+ MulByConstAndAddT(dy[i * N + j], &x[i * M], &dw[j * M], M);
39
+ MulByConstAndAddT(dy[i * N + j], &w[j * M], &dx[i * M], M);
40
+ }
41
+ }
42
+ }
43
+ template<typename T>
44
+ void MultiHeadMatMulVJPT(const T* w, const T* x, const T* dy, T* dw, T* dx,
45
+ size_t H, size_t N, size_t M, size_t K) {
46
+ memset(dx, 0, H * M * K * sizeof(dx[0]));
47
+ for (size_t i = 0; i < K; ++i) {
48
+ for (size_t j = 0; j < N; ++j) {
49
+ for (size_t h = 0; h < H; ++h) {
50
+ MulByConstAndAddT(dy[i * N + j], &x[i * H * M + h * M],
51
+ &dw[h * N * M + j * M], M);
52
+ MulByConstAndAddT(dy[i * N + j], &w[h * N * M + j * M],
53
+ &dx[i * H * M + h * M], M);
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ template<typename T>
60
+ void RMSNormVJPT(const T* w, const T* x, const T* dy, T* dw, T* dx,
61
+ size_t N, size_t K) {
62
+ for (size_t i = 0; i < K; ++i) {
63
+ constexpr T eps(1e-6);
64
+ T ss = SquaredL2(x + i * N, N);
65
+ ss = T(1.0) / std::sqrt(ss / T(N) + eps);
66
+ for (size_t j = 0; j < N; ++j) {
67
+ dw[j] += dy[i * N + j] * x[i * N + j] * ss;
68
+ }
69
+ const T ss3 = ss * ss * ss / T(N);
70
+ T tmp = 0.0;
71
+ for (size_t j = 0; j < N; ++j) {
72
+ tmp += (T(1.0) + w[j]) * dy[i* N + j] * x[i * N + j];
73
+ }
74
+ tmp *= ss3;
75
+ for (size_t j = 0; j < N; ++j) {
76
+ dx[i * N + j] = ss * (T(1.0) + w[j]) * dy[i* N + j] - tmp * x[i * N + j];
77
+ }
78
+ }
79
+ }
80
+ template<typename T>
81
+ void SoftmaxVJPT(const T* y, T* dy, size_t N) {
82
+ T sum = {};
83
+ for (size_t i = 0; i < N; ++i) {
84
+ sum += y[i] * dy[i];
85
+ }
86
+ for (size_t i = 0; i < N; ++i) {
87
+ dy[i] = y[i] * (dy[i] - sum);
88
+ }
89
+ }
90
+ template<typename T>
91
+ void SoftmaxVJPT(const T* y, T* dy, size_t N, size_t K) {
92
+ for (size_t i = 0; i < K; ++i) {
93
+ SoftmaxVJPT(y + i * N, dy + i * N, N);
94
+ }
95
+ }
96
+
97
+ template<typename T>
98
+ T GeluDerivative(T x) {
99
+ static const T kMul = 0.044715;
100
+ static const T kSqrt2OverPi = 0.797884560804236;
101
+ static const T kMul2 = kSqrt2OverPi * T(3.0) * kMul;
102
+
103
+ const T x2 = x * x;
104
+ const T x3 = x2 * x;
105
+ const T arg = kSqrt2OverPi * (kMul * x3 + x);
106
+ const T tanh = std::tanh(arg);
107
+ const T cdf = T(0.5) * (T(1.0) + tanh);
108
+ const T dtanh = T(1.0) - tanh * tanh;
109
+ const T darg = kMul2 * x2 + kSqrt2OverPi;
110
+ return T(0.5) * x * dtanh * darg + cdf;
111
+ }
112
+
113
+ template<typename T>
114
+ void GatedGeluVJP(const T* in, const T* d_out, T* d_in, size_t N, size_t K) {
115
+ for (size_t i = 0; i < K; ++i) {
116
+ const T* x1 = in + i * 2 * N;
117
+ const T* x2 = x1 + N;
118
+ const T* v = d_out + i * N;
119
+ T* dx1 = d_in + i * 2 * N;
120
+ T* dx2 = dx1 + N;
121
+ for (size_t j = 0; j < N; ++j) {
122
+ dx1[j] = v[j] * x2[j] * GeluDerivative(x1[j]);
123
+ dx2[j] = v[j] * Gelu(x1[j]);
124
+ }
125
+ }
126
+ }
127
+
128
+
129
+ template<typename T>
130
+ void MaskedAttentionVJP(const T* qkv, const T* doutput, T* dqkv,
131
+ size_t num_tokens, size_t kHeads, size_t kQKVDim,
132
+ size_t kSeqLen) {
133
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
134
+ const size_t offset = pos * (kHeads + 2) * kQKVDim;
135
+ memset(dqkv + offset, 0, (kHeads + 1) * kQKVDim * sizeof(qkv[0]));
136
+ }
137
+ for (size_t head = 0; head < kHeads; ++head) {
138
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
139
+ const size_t qoffs = (pos * (kHeads + 2) + head) * kQKVDim;
140
+ const size_t aoffs = head * kSeqLen + pos * kHeads * kSeqLen;
141
+ const T* q = qkv + qoffs;
142
+ const T* dout = doutput + aoffs;
143
+ T* dq = dqkv + qoffs;
144
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
145
+ const size_t koffs = (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
146
+ const T* k = qkv + koffs;
147
+ T* dk = dqkv + koffs;
148
+ MulByConstAndAddT(dout[pos2], k, dq, kQKVDim);
149
+ MulByConstAndAddT(dout[pos2], q, dk, kQKVDim);
150
+ }
151
+ }
152
+ }
153
+ }
154
+
155
+ template<typename T>
156
+ void MaskedSoftmaxVJPT(const T* y, T* dy, size_t num_tokens,
157
+ size_t kHeads, size_t kSeqLen) {
158
+ for (size_t head = 0; head < kHeads; ++head) {
159
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
160
+ size_t offset = pos * kHeads * kSeqLen + head * kSeqLen;
161
+ SoftmaxVJPT(y + offset, dy + offset, pos + 1);
162
+ memset(dy + offset + pos + 1, 0, (kSeqLen - pos - 1) * sizeof(T));
163
+ }
164
+ }
165
+ }
166
+
167
+ template<typename T>
168
+ void MixByAttentionVJP(const T* qkv, const T* attention, const T* doutput,
169
+ T* dqkv, T* dattention, size_t num_tokens,
170
+ size_t kHeads, size_t kQKVDim, size_t kSeqLen) {
171
+ auto v_offset = [&](size_t pos) {
172
+ return (pos * (kHeads + 2) + kHeads + 1) * kQKVDim;
173
+ };
174
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
175
+ memset(&dqkv[v_offset(pos)], 0, kQKVDim * sizeof(qkv[0]));
176
+ }
177
+ for (size_t head = 0; head < kHeads; ++head) {
178
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
179
+ const size_t offset = head * kQKVDim + pos * kHeads * kQKVDim;
180
+ const size_t aoffset = head * kSeqLen + pos * kHeads * kSeqLen;
181
+ const T* att = &attention[aoffset];
182
+ const T* dout = &doutput[offset];
183
+ T* datt = &dattention[aoffset];
184
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
185
+ datt[pos2] = DotT(dout, &qkv[v_offset(pos2)], kQKVDim);
186
+ MulByConstAndAddT(att[pos2], dout, &dqkv[v_offset(pos2)], kQKVDim);
187
+ }
188
+ }
189
+ }
190
+ }
191
+
192
+ template<typename T>
193
+ void InputEmbeddingVJPT(const T* w, const std::vector<int>& tokens, T scaling,
194
+ const T* dy, T* dw, size_t N) {
195
+ const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
196
+ for (size_t i = 0; i < num_tokens; ++i) {
197
+ int token = tokens[i];
198
+ MulByConstAndAddT(scaling, dy + i * N, dw + token * N, N);
199
+ }
200
+ }
201
+
202
+ template<typename T, typename TConfig>
203
+ void LayerVJP(const Layer<T, TConfig>& weights,
204
+ const ForwardLayer<T, TConfig>& forward,
205
+ const T* dy,
206
+ Layer<T, TConfig>& grad,
207
+ ForwardLayer<T, TConfig>& backward,
208
+ size_t num_tokens) {
209
+ static constexpr size_t kModelDim = TConfig::kModelDim;
210
+ static constexpr size_t kSeqLen = TConfig::kSeqLen;
211
+ static constexpr size_t kQKVDim = TConfig::kQKVDim;
212
+ static constexpr size_t kHeads = TConfig::kHeads;
213
+ static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
214
+ static const T kQueryScale = 1.0 / std::sqrt(T(kQKVDim));
215
+
216
+ MatMulVJPT(weights.linear_w.data(), forward.ffw_hidden_gated.data(),
217
+ dy, grad.linear_w.data(), backward.ffw_hidden_gated.data(),
218
+ kModelDim, kFFHiddenDim, num_tokens);
219
+
220
+ GatedGeluVJP(forward.ffw_hidden.data(), backward.ffw_hidden_gated.data(),
221
+ backward.ffw_hidden.data(), kFFHiddenDim, num_tokens);
222
+
223
+ MatMulVJPT(weights.gating_einsum_w.data(), forward.bf_pre_ffw_rms_out.data(),
224
+ backward.ffw_hidden.data(), grad.gating_einsum_w.data(),
225
+ backward.bf_pre_ffw_rms_out.data(), kFFHiddenDim * 2, kModelDim,
226
+ num_tokens);
227
+
228
+ RMSNormVJPT(weights.pre_ffw_norm_scale.data(), forward.attention_out.data(),
229
+ backward.bf_pre_ffw_rms_out.data(),
230
+ grad.pre_ffw_norm_scale.data(), backward.attention_out.data(),
231
+ kModelDim, num_tokens);
232
+
233
+ AddFromT(dy, backward.attention_out.data(), num_tokens * kModelDim);
234
+
235
+ MultiHeadMatMulVJPT(weights.attn_vec_einsum_w.data(), forward.att_out.data(),
236
+ backward.attention_out.data(),
237
+ grad.attn_vec_einsum_w.data(),
238
+ backward.att_out.data(),
239
+ kHeads, kModelDim, kQKVDim, num_tokens);
240
+
241
+ MixByAttentionVJP(forward.qkv.data(), forward.att.data(),
242
+ backward.att_out.data(), backward.qkv.data(),
243
+ backward.att.data(), num_tokens, kHeads, kQKVDim,
244
+ kSeqLen);
245
+
246
+ MaskedSoftmaxVJPT(forward.att.data(), backward.att.data(),
247
+ num_tokens, kHeads, kSeqLen);
248
+
249
+ MaskedAttentionVJP(forward.qkv.data(), backward.att.data(),
250
+ backward.qkv.data(), num_tokens, kHeads, kQKVDim, kSeqLen);
251
+
252
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
253
+ T* qkv = backward.qkv.data() + pos * (kHeads + 2) * kQKVDim;
254
+ MulByConstT(kQueryScale, qkv, kHeads * kQKVDim);
255
+ }
256
+
257
+ for (int pos = 0; pos < num_tokens; ++pos) {
258
+ T* qkv = backward.qkv.data() + pos * (kHeads + 2) * kQKVDim;
259
+ for (size_t h = 0; h <= kHeads; ++h) {
260
+ Rope(qkv + h * kQKVDim, kQKVDim, -pos);
261
+ }
262
+ }
263
+
264
+ MatMulVJPT(weights.qkv_einsum_w.data(), forward.pre_att_rms_out.data(),
265
+ backward.qkv.data(), grad.qkv_einsum_w.data(),
266
+ backward.pre_att_rms_out.data(),
267
+ (kHeads + 2) * kQKVDim, kModelDim, num_tokens);
268
+ RMSNormVJPT(weights.pre_attention_norm_scale.data(), forward.input.data(),
269
+ backward.pre_att_rms_out.data(),
270
+ grad.pre_attention_norm_scale.data(),
271
+ backward.input.data(), kModelDim, num_tokens);
272
+
273
+ AddFromT(backward.attention_out.data(), backward.input.data(),
274
+ num_tokens * kModelDim);
275
+ }
276
+
277
+ template<typename T>
278
+ void SoftcapVJPT(const T* y, T* dy, size_t N) {
279
+ size_t imax = std::max_element(y, y + N) - y;
280
+ T cap = 30.0;
281
+ T inv_cap = T(1.0) / cap;
282
+ for (size_t i = 0; i < N; ++i) {
283
+ T scaled = y[i] * inv_cap;
284
+ dy[i] *= (T(1.0) - scaled * scaled);
285
+ }
286
+ dy[imax] = T(0.0);
287
+ for (size_t i = 0; i < N; ++i) {
288
+ if (i != imax) {
289
+ dy[imax] -= dy[i];
290
+ }
291
+ }
292
+ }
293
+
294
+ template<typename T>
295
+ void CrossEntropyLossGrad(const T* x, T* dx, const Prompt& prompt, size_t V) {
296
+ T scaling = -1.0 / std::log(2.0);
297
+ const std::vector<int> tokens = prompt.tokens;
298
+ const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
299
+ memset(dx, 0, V * num_tokens * sizeof(x[0]));
300
+ for (size_t i = 0; i < num_tokens; ++i) {
301
+ if (i + 1 < prompt.context_size) {
302
+ continue;
303
+ }
304
+ const int next_token = tokens[i + 1];
305
+ dx[i * V + next_token] = scaling / x[i * V + next_token];
306
+ }
307
+ }
308
+
309
+ template<typename T, typename TConfig>
310
+ void CrossEntropyLossBackwardPass(const Prompt& prompt,
311
+ const Weights<T, TConfig>& weights,
312
+ const ForwardPass<T, TConfig>& forward,
313
+ Weights<T, TConfig>& grad,
314
+ ForwardPass<T, TConfig>& backward) {
315
+ static constexpr size_t kModelDim = TConfig::kModelDim;
316
+ static constexpr size_t kVocabSize = TConfig::kVocabSize;
317
+ static constexpr size_t kLayers = TConfig::kLayers;
318
+ const std::vector<int> tokens = prompt.tokens;
319
+ const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
320
+
321
+ CrossEntropyLossGrad(forward.probs.data(), backward.logits.data(), prompt,
322
+ kVocabSize);
323
+
324
+ SoftmaxVJPT(forward.probs.data(), backward.logits.data(),
325
+ kVocabSize, num_tokens);
326
+
327
+ for (size_t i = 0; i < num_tokens; ++i) {
328
+ SoftcapVJPT(forward.logits.data() + i * kVocabSize,
329
+ backward.logits.data() + i * kVocabSize,
330
+ kVocabSize);
331
+ }
332
+
333
+ MatMulVJPT(weights.embedder_input_embedding.data(),
334
+ forward.final_norm_output.data(),
335
+ backward.logits.data(),
336
+ grad.embedder_input_embedding.data(),
337
+ backward.final_norm_output.data(),
338
+ kVocabSize, kModelDim, num_tokens);
339
+
340
+ RMSNormVJPT(weights.final_norm_scale.data(),
341
+ forward.final_layer_output.data(),
342
+ backward.final_norm_output.data(),
343
+ grad.final_norm_scale.data(),
344
+ backward.final_layer_output.data(), kModelDim, num_tokens);
345
+
346
+ for (int layer = static_cast<int>(kLayers) - 1; layer >= 0; --layer) {
347
+ T* next_layer_grad = layer + 1 < kLayers
348
+ ? backward.layers[layer + 1].input.data()
349
+ : backward.final_layer_output.data();
350
+ LayerVJP(*weights.GetLayer(layer), forward.layers[layer], next_layer_grad,
351
+ *grad.GetLayer(layer), backward.layers[layer], num_tokens);
352
+ }
353
+
354
+ const T kEmbScaling = EmbeddingScaling(kModelDim);
355
+ InputEmbeddingVJPT(weights.embedder_input_embedding.data(),
356
+ tokens, kEmbScaling, backward.layers[0].input.data(),
357
+ grad.embedder_input_embedding.data(), kModelDim);
358
+ }
359
+
360
+ } // namespace gcpp
361
+
362
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_BACKWARD_SCALAR_H_
gemma.cpp/backprop/backward_scalar_test.cc ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #include "backprop/backward_scalar.h"
17
+
18
+ #include <stddef.h>
19
+ #include <string.h> // memset
20
+
21
+ #include <array>
22
+ #include <complex>
23
+ #include <random>
24
+
25
+ #include "gtest/gtest.h"
26
+ #include "backprop/forward_scalar.h"
27
+ #include "backprop/sampler.h"
28
+ #include "backprop/test_util.h"
29
+ #include "gemma/weights_raw.h"
30
+
31
+ namespace gcpp {
32
+
33
+ TEST(BackPropTest, MatMulVJP) {
34
+ static const size_t kRows = 8;
35
+ static const size_t kCols = 64;
36
+ static const size_t kTokens = 5;
37
+ std::mt19937 gen(42);
38
+ using T = double;
39
+ using TC = std::complex<T>;
40
+ std::array<T, kRows * kCols> weights;
41
+ std::array<T, kTokens * kCols> x;
42
+ std::array<T, kRows * kCols> grad;
43
+ std::array<T, kTokens * kCols> dx;
44
+ std::array<TC, kRows * kCols> c_weights;
45
+ std::array<TC, kTokens * kCols> c_x;
46
+ std::array<TC, kTokens * kRows> c_y;
47
+ std::array<T, kTokens * kRows> dy;
48
+
49
+ for (int iter = 0; iter < 10; ++iter) {
50
+ RandInit(weights, 1.0 * (1 << iter), gen);
51
+ RandInit(x, 1.0 * (1 << iter), gen);
52
+ RandInit(dy, 1.0, gen);
53
+ Complexify(weights, c_weights);
54
+ Complexify(x, c_x);
55
+ auto func = [&]() {
56
+ MatMulT(c_weights.data(), c_x.data(), c_y.data(), kRows, kCols, kTokens);
57
+ return DotT(dy.data(), c_y.data(), kTokens * kRows);
58
+ };
59
+ memset(&grad, 0, sizeof(grad));
60
+ MatMulVJPT(weights.data(), x.data(), dy.data(), grad.data(), dx.data(),
61
+ kRows, kCols, kTokens);
62
+ TestGradient(dx, c_x, func, 1e-11, 1e-12, __LINE__);
63
+ TestGradient(grad, c_weights, func, 1e-14, 1e-12, __LINE__);
64
+ }
65
+ }
66
+
67
+ TEST(BackPropTest, MultiHeadMatMulVJP) {
68
+ static const size_t kRows = 2;
69
+ static const size_t kCols = 16;
70
+ static const size_t kHeads = 4;
71
+ static const size_t kTokens = 3;
72
+ std::mt19937 gen(42);
73
+ using T = double;
74
+ using TC = std::complex<T>;
75
+ std::array<T, kRows * kCols * kHeads> weights;
76
+ std::array<T, kTokens * kCols * kHeads> x;
77
+ std::array<T, kRows * kCols * kHeads> grad;
78
+ std::array<T, kTokens * kCols * kHeads> dx;
79
+ std::array<TC, kRows * kCols * kHeads> c_weights;
80
+ std::array<TC, kTokens * kCols * kHeads> c_x;
81
+ std::array<TC, kTokens * kRows> c_y;
82
+ std::array<T, kTokens * kRows> dy;
83
+
84
+ for (int iter = 0; iter < 10; ++iter) {
85
+ RandInit(weights, 1.0 * (1 << iter), gen);
86
+ RandInit(x, 1.0 * (1 << iter), gen);
87
+ RandInit(dy, 1.0, gen);
88
+ Complexify(weights, c_weights);
89
+ Complexify(x, c_x);
90
+ auto func = [&]() {
91
+ MultiHeadMatMul(c_weights.data(), c_x.data(), c_y.data(), kHeads, kRows,
92
+ kCols, kTokens);
93
+ return DotT(dy.data(), c_y.data(), kTokens * kRows);
94
+ };
95
+ memset(&grad, 0, sizeof(grad));
96
+ MultiHeadMatMulVJPT(weights.data(), x.data(), dy.data(), grad.data(),
97
+ dx.data(), kHeads, kRows, kCols, kTokens);
98
+ TestGradient(dx, c_x, func, 1e-15, 1e-13, __LINE__);
99
+ TestGradient(grad, c_weights, func, 1e-15, 1e-13, __LINE__);
100
+ }
101
+ }
102
+
103
+ TEST(BackPropTest, RMSNormVJP) {
104
+ static const size_t K = 2;
105
+ static const size_t N = 64;
106
+ std::mt19937 gen(42);
107
+ using T = double;
108
+ using TC = std::complex<T>;
109
+ std::array<T, N> weights;
110
+ std::array<T, N> grad;
111
+ std::array<T, K * N> x;
112
+ std::array<T, K * N> dx;
113
+ std::array<T, K * N> dy;
114
+ std::array<TC, N> c_weights;
115
+ std::array<TC, K * N> c_x;
116
+ std::array<TC, K * N> c_y;
117
+
118
+ for (int iter = 0; iter < 10; ++iter) {
119
+ RandInit(weights, 1.0 * (1 << iter), gen);
120
+ RandInit(x, 1.0 * (1 << iter), gen);
121
+ Complexify(weights, c_weights);
122
+ Complexify(x, c_x);
123
+ RandInit(dy, 1.0, gen);
124
+ auto func = [&]() {
125
+ RMSNormT(c_weights.data(), c_x.data(), c_y.data(), N, K);
126
+ return DotT(dy.data(), c_y.data(), K * N);
127
+ };
128
+ memset(&grad, 0, sizeof(grad));
129
+ RMSNormVJPT(weights.data(), x.data(), dy.data(), grad.data(), dx.data(),
130
+ N, K);
131
+ TestGradient(dx, c_x, func, 1e-15, 1e-14, __LINE__);
132
+ TestGradient(grad, c_weights, func, 1e-15, 1e-14, __LINE__);
133
+ }
134
+ }
135
+
136
+ TEST(BackPropTest, SoftmaxVJP) {
137
+ static const size_t N = 64;
138
+ std::mt19937 gen(42);
139
+ using T = double;
140
+ using TC = std::complex<T>;
141
+ std::array<T, N> x;
142
+ std::array<T, N> dx;
143
+ std::array<T, N> dy;
144
+ std::array<TC, N> c_x;
145
+ std::array<TC, N> c_y;
146
+
147
+ for (int iter = 0; iter < 10; ++iter) {
148
+ RandInit(x, 1.0 * (1 << iter), gen);
149
+ Complexify(x, c_x);
150
+ RandInit(dy, 1.0, gen);
151
+ auto func = [&]() {
152
+ memcpy(c_y.data(), c_x.data(), sizeof(c_x));
153
+ Softmax(c_y.data(), N);
154
+ return DotT(dy.data(), c_y.data(), N);
155
+ };
156
+ Softmax(x.data(), N);
157
+ memcpy(dx.data(), dy.data(), N * sizeof(dx[0]));
158
+ SoftmaxVJPT(x.data(), dx.data(), N);
159
+ TestGradient(dx, c_x, func, 1e-15, 1e-15, __LINE__);
160
+ }
161
+ }
162
+
163
+ TEST(BackPropTest, MaskedSoftmaxVJP) {
164
+ static const size_t kSeqLen = 16;
165
+ static const size_t kHeads = 2;
166
+ static const size_t kTokens = 14;
167
+ static const size_t N = kHeads * kSeqLen * kSeqLen;
168
+ std::mt19937 gen(42);
169
+ using T = double;
170
+ using TC = std::complex<T>;
171
+ std::array<T, N> x;
172
+ std::array<T, N> dy;
173
+ std::array<T, N> dx = {};
174
+ std::array<TC, N> c_x;
175
+ std::array<TC, N> c_y;
176
+
177
+ for (int iter = 0; iter < 10; ++iter) {
178
+ RandInit(x, 1.0 * (1 << iter), gen);
179
+ Complexify(x, c_x);
180
+ RandInit(dy, 1.0, gen);
181
+ auto func = [&]() {
182
+ memcpy(c_y.data(), c_x.data(),
183
+ kTokens * kHeads * kSeqLen * sizeof(c_x[0]));
184
+ MaskedSoftmax(c_y.data(), kTokens, kHeads, kSeqLen);
185
+ return DotT(dy.data(), c_y.data(), N);
186
+ };
187
+ MaskedSoftmax(x.data(), kTokens, kHeads, kSeqLen);
188
+ memcpy(dx.data(), dy.data(), kTokens * kHeads * kSeqLen * sizeof(dx[0]));
189
+ MaskedSoftmaxVJPT(x.data(), dx.data(), kTokens, kHeads, kSeqLen);
190
+ TestGradient(dx, c_x, func, 1e-14, 1e-15, __LINE__);
191
+ }
192
+ }
193
+
194
+ TEST(BackPropTest, SoftcapVJP) {
195
+ static const size_t N = 64;
196
+ std::mt19937 gen(42);
197
+ using T = double;
198
+ using TC = std::complex<T>;
199
+ std::array<T, N> x;
200
+ std::array<T, N> dx;
201
+ std::array<T, N> dy;
202
+ std::array<TC, N> c_x;
203
+ std::array<TC, N> c_y;
204
+
205
+ for (int iter = 0; iter < 10; ++iter) {
206
+ RandInit(x, 1.0 * (1 << iter), gen);
207
+ Complexify(x, c_x);
208
+ RandInit(dy, 1.0, gen);
209
+ auto func = [&]() {
210
+ memcpy(c_y.data(), c_x.data(), N * sizeof(c_x[0]));
211
+ Softcap(c_y.data(), N);
212
+ return DotT(dy.data(), c_y.data(), N);
213
+ };
214
+ Softcap(x.data(), N);
215
+ memcpy(dx.data(), dy.data(), N * sizeof(dx[0]));
216
+ SoftcapVJPT(x.data(), dx.data(), N);
217
+ TestGradient(dx, c_x, func, 1e-15, 1e-14, __LINE__);
218
+ }
219
+ }
220
+
221
+ TEST(BackPropTest, CrossEntropyLossGrad) {
222
+ static const size_t K = 8;
223
+ static const size_t V = 64;
224
+ std::mt19937 gen(42);
225
+ using T = double;
226
+ using TC = std::complex<T>;
227
+ std::array<T, K * V> x;
228
+ std::array<T, K * V> dx;
229
+ std::array<TC, K * V> c_x;
230
+ Prompt prompt;
231
+ prompt.tokens = { 0, 1, 2, 3, 0, 3, 2, 1, 0 };
232
+
233
+ for (int iter = 0; iter < 10; ++iter) {
234
+ prompt.context_size = 1 + (iter % 6);
235
+ RandInit(x, 1.0 * (1 << iter), gen);
236
+ Softcap(x.data(), V * K);
237
+ Softmax(x.data(), V, K);
238
+ CrossEntropyLossGrad(x.data(), dx.data(), prompt, V);
239
+ Complexify(x, c_x);
240
+ auto func = [&]() {
241
+ return CrossEntropyLoss(c_x.data(), prompt, V);
242
+ };
243
+ TestGradient(dx, c_x, func, 1e-100, 1e-15, __LINE__);
244
+ }
245
+ }
246
+
247
+ TEST(BackPropTest, GatedGeluVJP) {
248
+ static const size_t K = 2;
249
+ static const size_t N = 64;
250
+ std::mt19937 gen(42);
251
+ using T = double;
252
+ using TC = std::complex<T>;
253
+ std::array<T, K * 2 * N> x;
254
+ std::array<T, K * 2 * N> dx;
255
+ std::array<T, K * N> dy;
256
+ std::array<TC, K * 2 * N> c_x;
257
+ std::array<TC, K * N> c_y;
258
+
259
+ for (int iter = 0; iter < 10; ++iter) {
260
+ RandInit(x, 1.0, gen);
261
+ Complexify(x, c_x);
262
+ RandInit(dy, 1.0, gen);
263
+ auto func = [&]() {
264
+ GatedGelu(c_x.data(), c_y.data(), N, K);
265
+ return DotT(dy.data(), c_y.data(), N * K);
266
+ };
267
+ GatedGeluVJP(x.data(), dy.data(), dx.data(), N, K);
268
+ TestGradient(dx, c_x, func, 1e-15, 1e-15, __LINE__);
269
+ }
270
+ }
271
+
272
+ TEST(BackPropTest, MaskedAttentionVJP) {
273
+ static const size_t kSeqLen = 16;
274
+ static const size_t kHeads = 2;
275
+ static const size_t kQKVDim = 8;
276
+ static const size_t kTokens = 14;
277
+ static const size_t kQKVSize = kSeqLen * (kHeads + 2) * kQKVDim;
278
+ static const size_t kOutSize = kSeqLen * kHeads * kSeqLen;
279
+ std::mt19937 gen(42);
280
+ using T = double;
281
+ using TC = std::complex<T>;
282
+ std::array<T, kQKVSize> x;
283
+ std::array<T, kQKVSize> dx = {};
284
+ std::array<T, kOutSize> dy;
285
+ std::array<TC, kQKVSize> c_x;
286
+ std::array<TC, kOutSize> c_y;
287
+
288
+ for (int iter = 0; iter < 10; ++iter) {
289
+ RandInit(x, 1.0, gen);
290
+ Complexify(x, c_x);
291
+ RandInit(dy, 1.0, gen);
292
+ auto func = [&]() {
293
+ MaskedAttention(c_x.data(), c_y.data(), kTokens, kHeads, kQKVDim,
294
+ kSeqLen);
295
+ return DotT(dy.data(), c_y.data(), kOutSize);
296
+ };
297
+ MaskedAttentionVJP(x.data(), dy.data(), dx.data(),
298
+ kTokens, kHeads, kQKVDim, kSeqLen);
299
+ TestGradient(dx, c_x, func, 1e-14, 1e-15, __LINE__);
300
+ }
301
+ }
302
+
303
+ TEST(BackPropTest, MixByAttentionVJP) {
304
+ static const size_t kSeqLen = 16;
305
+ static const size_t kHeads = 2;
306
+ static const size_t kQKVDim = 8;
307
+ static const size_t kTokens = 14;
308
+ static const size_t kQKVSize = kSeqLen * (kHeads + 2) * kQKVDim;
309
+ static const size_t kAttnSize = kSeqLen * kHeads * kSeqLen;
310
+ static const size_t kOutSize = kSeqLen * kHeads * kQKVDim;
311
+ std::mt19937 gen(42);
312
+ using T = double;
313
+ using TC = std::complex<T>;
314
+ std::array<T, kQKVSize> qkv;
315
+ std::array<T, kQKVSize> dqkv = {};
316
+ std::array<T, kAttnSize> attn;
317
+ std::array<T, kAttnSize> dattn = {};
318
+ std::array<T, kOutSize> dy;
319
+ std::array<TC, kQKVSize> c_qkv;
320
+ std::array<TC, kAttnSize> c_attn;
321
+ std::array<TC, kOutSize> c_y;
322
+
323
+ for (int iter = 0; iter < 10; ++iter) {
324
+ RandInit(qkv, 1.0, gen);
325
+ RandInit(attn, 1.0, gen);
326
+ Complexify(qkv, c_qkv);
327
+ Complexify(attn, c_attn);
328
+ RandInit(dy, 1.0, gen);
329
+ auto func = [&]() {
330
+ MixByAttention(c_qkv.data(), c_attn.data(), c_y.data(),
331
+ kTokens, kHeads, kQKVDim, kSeqLen);
332
+ return DotT(dy.data(), c_y.data(), kOutSize);
333
+ };
334
+ MixByAttentionVJP(qkv.data(), attn.data(), dy.data(), dqkv.data(),
335
+ dattn.data(), kTokens, kHeads, kQKVDim, kSeqLen);
336
+ TestGradient(dqkv, c_qkv, func, 1e-14, 1e-15, __LINE__);
337
+ TestGradient(dattn, c_attn, func, 1e-14, 1e-15, __LINE__);
338
+ }
339
+ }
340
+
341
+ TEST(BackPropTest, InputEmbeddingVJP) {
342
+ static const size_t kSeqLen = 8;
343
+ static const size_t kVocabSize = 4;
344
+ static const size_t kModelDim = 16;
345
+ std::mt19937 gen(42);
346
+ using T = double;
347
+ using TC = std::complex<T>;
348
+ std::array<T, kVocabSize * kModelDim> weights;
349
+ std::array<T, kVocabSize * kModelDim> grad;
350
+ std::array<T, kSeqLen * kModelDim> dy;
351
+ std::array<TC, kVocabSize * kModelDim> c_weights;
352
+ std::array<TC, kSeqLen * kModelDim> c_y;
353
+ std::vector<int> tokens = { 0, 1, 2, 3, 0, 1, 2 };
354
+ size_t num_tokens = tokens.size() - 1;
355
+
356
+ for (size_t iter = 0; iter < 10; ++iter) {
357
+ RandInit(weights, 1.0, gen);
358
+ RandInit(dy, 1.0, gen);
359
+ Complexify(weights, c_weights);
360
+ auto func = [&]() {
361
+ InputEmbedding(c_weights.data(), tokens, TC(3.0), c_y.data(), kModelDim);
362
+ return DotT(dy.data(), c_y.data(), num_tokens * kModelDim);
363
+ };
364
+ memset(&grad, 0, sizeof(grad));
365
+ InputEmbeddingVJPT(weights.data(), tokens, 3.0, dy.data(), grad.data(),
366
+ kModelDim);
367
+ TestGradient(grad, c_weights, func, 1e-16, 1e-14, __LINE__);
368
+ }
369
+ }
370
+
371
+ struct TestConfig {
372
+ static constexpr int kSeqLen = 18;
373
+ static constexpr int kVocabSize = 12;
374
+ static constexpr int kModelDim = 32;
375
+ static constexpr int kHeads = 3;
376
+ static constexpr int kQKVDim = 12;
377
+ static constexpr int kFFHiddenDim = 48;
378
+ static constexpr std::array<LayerAttentionType, 2> kLayerConfig =
379
+ FixedLayerConfig<2>(LayerAttentionType::kGemma);
380
+ static constexpr int kLayers = kLayerConfig.size();
381
+ static constexpr bool kAbsolutePE = false;
382
+ static constexpr bool kPostNormScale = false;
383
+
384
+ static constexpr int kKVHeads = 1;
385
+ static constexpr int kConv1dWidth = 0;
386
+ static constexpr bool kFFBiases = false;
387
+ static constexpr bool kSoftmaxAttnOutputBiases = false;
388
+ static constexpr int kGemmaLayers = kLayers;
389
+ static constexpr int kGriffinLayers = 0;
390
+ static constexpr int kNumTensorScales = 0;
391
+ };
392
+
393
+ TEST(BackPropTest, LayerVJP) {
394
+ std::mt19937 gen(42);
395
+ using T = double;
396
+ using TC = std::complex<T>;
397
+ const size_t kOutputSize = TestConfig::kSeqLen * TestConfig::kModelDim;
398
+ Layer<T, TestConfig> weights;
399
+ Layer<T, TestConfig> grad;
400
+ ForwardLayer<T, TestConfig> forward;
401
+ ForwardLayer<T, TestConfig> backward = {};
402
+ Layer<TC, TestConfig> c_weights;
403
+ ForwardLayer<TC, TestConfig> c_forward;
404
+ std::array<T, kOutputSize> y;
405
+ std::array<T, kOutputSize> dy;
406
+ std::array<TC, kOutputSize> c_y;
407
+ const size_t num_tokens = 3;
408
+
409
+ for (size_t iter = 0; iter < 10; ++iter) {
410
+ RandInit(weights, 1.0, gen);
411
+ RandInit(forward.input, 1.0, gen);
412
+ RandInit(dy, 1.0, gen);
413
+ Complexify(weights, c_weights);
414
+ Complexify(forward.input, c_forward.input);
415
+ auto func = [&]() {
416
+ ApplyLayer(c_weights, c_forward, num_tokens, c_y.data());
417
+ return DotT(dy.data(), c_y.data(), num_tokens * TestConfig::kModelDim);
418
+ };
419
+ memset(&grad, 0, sizeof(grad));
420
+ ApplyLayer(weights, forward, num_tokens, y.data());
421
+ LayerVJP(weights, forward, dy.data(), grad, backward, num_tokens);
422
+ TestGradient(backward.input, c_forward.input, func, 1e-11, 5e-11,
423
+ __LINE__);
424
+ TestGradient(grad, c_weights, func, 1e-11);
425
+ }
426
+ }
427
+
428
+ TEST(BackPropTest, EndToEnd) {
429
+ std::mt19937 gen(42);
430
+ using T = double;
431
+ using TC = std::complex<T>;
432
+ WeightsWrapper<T, TestConfig> weights;
433
+ WeightsWrapper<T, TestConfig> grad;
434
+ ForwardPass<T, TestConfig> forward;
435
+ ForwardPass<T, TestConfig> backward;
436
+ WeightsWrapper<TC, TestConfig> c_weights;
437
+ ForwardPass<TC, TestConfig> c_forward;
438
+
439
+ ReverseSequenceSampler training_task({0, 0, 1, 1});
440
+ std::vector<Prompt> batch = training_task.SampleBatch(3, gen);
441
+
442
+ for (const Prompt& prompt : batch) {
443
+ ReverseSequenceSampler::LogPrompt(prompt);
444
+ RandInit(weights.get(), 1.0, gen);
445
+ CrossEntropyLossForwardPass(prompt, weights.get(), forward);
446
+ grad.clear();
447
+ CrossEntropyLossBackwardPass(
448
+ prompt, weights.get(), forward, grad.get(), backward);
449
+
450
+ Complexify(weights.get(), c_weights.get());
451
+ auto func = [&]() {
452
+ return CrossEntropyLossForwardPass(prompt, c_weights.get(), c_forward);
453
+ };
454
+
455
+ TestGradient(grad.get(), c_weights.get(), func, 1e-11);
456
+ }
457
+ }
458
+
459
+ template<typename T, typename TConfig>
460
+ void MulByConstAndAddT(T c, const Layer<T, TConfig>& x,
461
+ Layer<T, TConfig>& out) {
462
+ MulByConstAndAddT(c, x.pre_attention_norm_scale,
463
+ out.pre_attention_norm_scale);
464
+ MulByConstAndAddT(c, x.attn_vec_einsum_w, out.attn_vec_einsum_w);
465
+ MulByConstAndAddT(c, x.qkv_einsum_w, out.qkv_einsum_w);
466
+ MulByConstAndAddT(c, x.pre_ffw_norm_scale, out.pre_ffw_norm_scale);
467
+ MulByConstAndAddT(c, x.gating_einsum_w, out.gating_einsum_w);
468
+ MulByConstAndAddT(c, x.linear_w, out.linear_w);
469
+ }
470
+
471
+ template<typename T, typename TConfig>
472
+ void MulByConstAndAddT(T c, const Weights<T, TConfig>& x,
473
+ Weights<T, TConfig>& out) {
474
+ static constexpr size_t kLayers = TConfig::kLayers;
475
+ MulByConstAndAddT(c, x.embedder_input_embedding,
476
+ out.embedder_input_embedding);
477
+ MulByConstAndAddT(c, x.final_norm_scale, out.final_norm_scale);
478
+ for (size_t i = 0; i < kLayers; ++i) {
479
+ MulByConstAndAddT(c, *x.GetLayer(i), *out.GetLayer(i));
480
+ }
481
+ }
482
+
483
+ // Evaluates forward pass on a batch.
484
+ template<typename T, typename TConfig>
485
+ T CrossEntropyLossForwardPass(const std::vector<Prompt>& batch,
486
+ const WeightsWrapper<T, TConfig>& weights,
487
+ ForwardPass<T, TConfig>& forward) {
488
+ T loss = 0.0;
489
+ for (const Prompt& prompt : batch) {
490
+ loss += CrossEntropyLossForwardPass(prompt, weights.get(), forward);
491
+ }
492
+ T scale = 1.0 / batch.size();
493
+ return loss * scale;
494
+ }
495
+
496
+ // Evaluates forward pass on a batch by applying gradient with the given
497
+ // learning rate. Does not update weights, but uses the given tmp weights
498
+ // instead.
499
+ template<typename T, typename TConfig>
500
+ T CrossEntropyLossForwardPass(T learning_rate,
501
+ const std::vector<Prompt>& batch,
502
+ const WeightsWrapper<T, TConfig>& weights,
503
+ const WeightsWrapper<T, TConfig>& grad,
504
+ WeightsWrapper<T, TConfig>& tmp,
505
+ ForwardPass<T, TConfig>& forward) {
506
+ tmp.copy(weights);
507
+ const T scale = -learning_rate / batch.size();
508
+ MulByConstAndAddT(scale, grad.get(), tmp.get());
509
+ return CrossEntropyLossForwardPass(batch, tmp, forward);
510
+ }
511
+
512
+ // Uses line search in the negative gradient direction to update weights. We do
513
+ // this so that we can test that each step during the gradient descent can
514
+ // decrease the objective function value.
515
+ template<typename T, typename TConfig>
516
+ T FindOptimalUpdate(const WeightsWrapper<T, TConfig>& grad,
517
+ WeightsWrapper<T, TConfig>& weights,
518
+ WeightsWrapper<T, TConfig>& tmp,
519
+ ForwardPass<T, TConfig>& forward,
520
+ const std::vector<Prompt>& batch,
521
+ T loss, T initial_learning_rate) {
522
+ T lr0 = initial_learning_rate;
523
+ T loss0 = CrossEntropyLossForwardPass(
524
+ lr0, batch, weights, grad, tmp, forward);
525
+ for (size_t iter = 0; iter < 30; ++iter) {
526
+ T lr1 = lr0 * 0.5;
527
+ T loss1 = CrossEntropyLossForwardPass(
528
+ lr1, batch, weights, grad, tmp, forward);
529
+ if (loss0 < loss && loss1 >= loss0) {
530
+ break;
531
+ }
532
+ loss0 = loss1;
533
+ lr0 = lr1;
534
+ }
535
+ for (size_t iter = 0; iter < 30; ++iter) {
536
+ T lr1 = lr0 * 2.0;
537
+ T loss1 = CrossEntropyLossForwardPass(
538
+ lr1, batch, weights, grad, tmp, forward);
539
+ if (loss1 >= loss0) {
540
+ break;
541
+ }
542
+ loss0 = loss1;
543
+ lr0 = lr1;
544
+ }
545
+ const T scale = -lr0 / batch.size();
546
+ MulByConstAndAddT(scale, grad.get(), weights.get());
547
+ return lr0;
548
+ }
549
+
550
+ TEST(BackProptest, Convergence) {
551
+ std::mt19937 gen(42);
552
+ using T = float;
553
+ using TC = std::complex<double>;
554
+ WeightsWrapper<T, TestConfig> weights;
555
+ WeightsWrapper<T, TestConfig> grad;
556
+ WeightsWrapper<T, TestConfig> tmp;
557
+ ForwardPass<T, TestConfig> forward;
558
+ ForwardPass<T, TestConfig> backward;
559
+ WeightsWrapper<TC, TestConfig> c_weights;
560
+ ForwardPass<TC, TestConfig> c_forward;
561
+ constexpr size_t kBatchSize = 5;
562
+ ReverseSequenceSampler training_task({0, 0, 0, 1, 1});
563
+ T learning_rate = 0.01;
564
+
565
+ RandInit(weights.get(), T(1.0), gen);
566
+
567
+ printf("Sample batch:\n");
568
+ for (size_t i = 0; i < 10; ++i) {
569
+ ReverseSequenceSampler::LogPrompt(training_task.Sample(gen));
570
+ }
571
+
572
+ T prev_loss = std::numeric_limits<T>::max();
573
+ bool stop = false;
574
+ size_t step = 0;
575
+ while (!stop) {
576
+ T loss = 0.0;
577
+ grad.clear();
578
+ std::mt19937 sgen(42);
579
+ std::vector<Prompt> batch = training_task.SampleBatch(kBatchSize, sgen);
580
+ for (const Prompt& prompt : batch) {
581
+ loss += CrossEntropyLossForwardPass(prompt, weights.get(), forward);
582
+ CrossEntropyLossBackwardPass(
583
+ prompt, weights.get(), forward, grad.get(), backward);
584
+ }
585
+
586
+ if (step % 250 == 0) {
587
+ printf("Checking gradient...\n");
588
+ Complexify(weights.get(), c_weights.get());
589
+ auto func = [&]() {
590
+ TC scale = batch.size();
591
+ return CrossEntropyLossForwardPass(batch, c_weights, c_forward) * scale;
592
+ };
593
+
594
+ TestGradient(grad.get(), c_weights.get(), func, 5e-3f);
595
+ }
596
+
597
+ loss /= batch.size();
598
+ EXPECT_LT(loss, prev_loss);
599
+ stop = step >= 10000 || loss < 1e-2;
600
+ if (step % 10 == 0 || stop) {
601
+ printf("step: %5zu loss: %.15f learning_rate: %.15f\n",
602
+ step, loss, learning_rate);
603
+ }
604
+ if (!stop) {
605
+ learning_rate = FindOptimalUpdate(
606
+ grad, weights, tmp, forward, batch, loss, learning_rate);
607
+ ++step;
608
+ }
609
+ prev_loss = loss;
610
+ }
611
+ EXPECT_LT(step, 1000);
612
+ }
613
+
614
+ } // namespace gcpp
gemma.cpp/backprop/backward_test.cc ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2023 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HWY_DISABLED_TARGETS
17
+ #define HWY_DISABLED_TARGETS HWY_SCALAR
18
+ #endif
19
+
20
+ #include <stddef.h>
21
+
22
+ #include <array>
23
+ #include <complex>
24
+ #include <random>
25
+ #include <vector>
26
+
27
+ #include "backprop/backward_scalar.h"
28
+ #include "backprop/forward_scalar.h"
29
+ #include "backprop/sampler.h"
30
+ #include "backprop/test_util.h"
31
+ #include "gemma/activations.h"
32
+ #include "gemma/weights_raw.h"
33
+ #include "hwy/base.h"
34
+ #include "hwy/contrib/thread_pool/thread_pool.h"
35
+
36
+ // clang-format off
37
+ #undef HWY_TARGET_INCLUDE
38
+ #define HWY_TARGET_INCLUDE "backprop/backward_test.cc" //NOLINT
39
+ // clang-format on
40
+ #include "hwy/foreach_target.h" // IWYU pragma: keep
41
+ #include "hwy/highway.h"
42
+ #include "hwy/tests/test_util-inl.h"
43
+ // After highway.h
44
+ #include "backprop/backward-inl.h"
45
+ #include "backprop/forward-inl.h"
46
+ #include "gemma/ops.h"
47
+
48
+ HWY_BEFORE_NAMESPACE();
49
+ namespace gcpp {
50
+ namespace HWY_NAMESPACE {
51
+
52
+ void TestMatMulVJP() {
53
+ static const size_t kRows = 8;
54
+ static const size_t kCols = 64;
55
+ static const size_t kTokens = 5;
56
+ hwy::ThreadPool pool(8);
57
+ std::mt19937 gen(42);
58
+ HWY_ALIGN std::array<float, kRows * kCols> weights;
59
+ HWY_ALIGN std::array<float, kTokens * kCols> x;
60
+ HWY_ALIGN std::array<float, kTokens * kRows> dy;
61
+ HWY_ALIGN std::array<float, kRows * kCols> grad;
62
+ HWY_ALIGN std::array<float, kTokens * kCols> dx;
63
+ HWY_ALIGN std::array<float, kRows * kCols> grad_scalar;
64
+ HWY_ALIGN std::array<float, kTokens * kCols> dx_scalar;
65
+ using TC = std::complex<double>;
66
+ std::array<TC, kRows * kCols> c_weights;
67
+ std::array<TC, kTokens * kCols> c_x;
68
+ std::array<TC, kTokens * kRows> c_y;
69
+
70
+ for (int iter = 0; iter < 10; ++iter) {
71
+ RandInit(weights, 1.0f * (1 << iter), gen);
72
+ RandInit(x, 1.0f * (1 << iter), gen);
73
+ RandInit(dy, 1.0f, gen);
74
+ Complexify(weights, c_weights);
75
+ Complexify(x, c_x);
76
+ auto func = [&]() {
77
+ MatMulT(c_weights.data(), c_x.data(), c_y.data(), kRows, kCols, kTokens);
78
+ return DotT(dy.data(), c_y.data(), kTokens * kRows);
79
+ };
80
+
81
+ hwy::ZeroBytes(&grad, sizeof(grad));
82
+ MatMulVJP<kCols, kRows>(weights.data(), x.data(), dy.data(), kTokens,
83
+ grad.data(), dx.data(), pool);
84
+ TestGradient(dx, c_x, func, 5e-5, 5e-5, __LINE__);
85
+ TestGradient(grad, c_weights, func, 5e-5, 5e-5, __LINE__);
86
+
87
+ hwy::ZeroBytes(&grad_scalar, sizeof(grad_scalar));
88
+ MatMulVJPT(weights.data(), x.data(), dy.data(), grad_scalar.data(),
89
+ dx_scalar.data(), kRows, kCols, kTokens);
90
+ TestNear(dx, dx_scalar, 5e-5, 1e-4, __LINE__);
91
+ TestNear(grad, grad_scalar, 5e-5, 5e-5, __LINE__);
92
+ }
93
+ }
94
+
95
+ void TestMultiHeadMatMulVJP() {
96
+ static const size_t kRows = 2;
97
+ static const size_t kCols = 16;
98
+ static const size_t kHeads = 4;
99
+ static const size_t kTokens = 3;
100
+ hwy::ThreadPool pool(8);
101
+ std::mt19937 gen(42);
102
+ HWY_ALIGN std::array<float, kRows * kCols * kHeads> weights;
103
+ HWY_ALIGN std::array<float, kTokens * kCols * kHeads> x;
104
+ HWY_ALIGN std::array<float, kRows * kCols * kHeads> grad;
105
+ HWY_ALIGN std::array<float, kTokens * kCols * kHeads> dx;
106
+ HWY_ALIGN std::array<float, kTokens * kRows> dy;
107
+ HWY_ALIGN std::array<float, kRows * kCols * kHeads> grad_scalar;
108
+ HWY_ALIGN std::array<float, kTokens * kCols * kHeads> dx_scalar;
109
+ using TC = std::complex<double>;
110
+ std::array<TC, kRows * kCols * kHeads> c_weights;
111
+ std::array<TC, kTokens * kCols * kHeads> c_x;
112
+ std::array<TC, kTokens * kRows> c_y;
113
+
114
+ for (int iter = 0; iter < 10; ++iter) {
115
+ RandInit(weights, 1.0f * (1 << iter), gen);
116
+ RandInit(x, 1.0f * (1 << iter), gen);
117
+ RandInit(dy, 1.0f, gen);
118
+ Complexify(weights, c_weights);
119
+ Complexify(x, c_x);
120
+ auto func = [&]() {
121
+ MultiHeadMatMul(c_weights.data(), c_x.data(), c_y.data(), kHeads, kRows,
122
+ kCols, kTokens);
123
+ return DotT(dy.data(), c_y.data(), kTokens * kRows);
124
+ };
125
+
126
+ hwy::ZeroBytes(&grad, sizeof(grad));
127
+ MultiHeadMatMulVJP<kHeads, kCols, kRows>(
128
+ weights.data(), x.data(), dy.data(), kTokens, grad.data(), dx.data(),
129
+ pool);
130
+ TestGradient(dx, c_x, func, 5e-5, 5e-5, __LINE__);
131
+ TestGradient(grad, c_weights, func, 5e-5, 5e-5, __LINE__);
132
+
133
+ hwy::ZeroBytes(&grad_scalar, sizeof(grad_scalar));
134
+ MultiHeadMatMulVJPT(weights.data(), x.data(), dy.data(), grad_scalar.data(),
135
+ dx_scalar.data(), kHeads, kRows, kCols, kTokens);
136
+ TestNear(dx, dx_scalar, 5e-5, 5e-5, __LINE__);
137
+ TestNear(grad, grad_scalar, 5e-5, 5e-5, __LINE__);
138
+ }
139
+ }
140
+
141
+ void TestRMSNormVJP() {
142
+ static const size_t K = 2;
143
+ static const size_t N = 64;
144
+ hwy::ThreadPool pool(8);
145
+ std::mt19937 gen(42);
146
+ HWY_ALIGN std::array<float, N> weights;
147
+ HWY_ALIGN std::array<float, K * N> x;
148
+ HWY_ALIGN std::array<float, N> grad;
149
+ HWY_ALIGN std::array<float, K * N> dx;
150
+ HWY_ALIGN std::array<float, K * N> dy;
151
+ HWY_ALIGN std::array<float, N> grad_scalar;
152
+ HWY_ALIGN std::array<float, K * N> dx_scalar;
153
+ using TC = std::complex<double>;
154
+ std::array<TC, N> c_weights;
155
+ std::array<TC, K * N> c_x;
156
+ std::array<TC, K * N> c_y;
157
+
158
+ for (int iter = 0; iter < 10; ++iter) {
159
+ RandInit(weights, 1.0f * (1 << iter), gen);
160
+ RandInit(x, 1.0f * (1 << iter), gen);
161
+ RandInit(dy, 1.0f, gen);
162
+ Complexify(weights, c_weights);
163
+ Complexify(x, c_x);
164
+ auto func = [&]() {
165
+ RMSNormT(c_weights.data(), c_x.data(), c_y.data(), N, K);
166
+ return DotT(dy.data(), c_y.data(), K * N);
167
+ };
168
+
169
+ hwy::ZeroBytes(&grad, sizeof(grad));
170
+ RMSNormVJP(weights.data(), x.data(), dy.data(), N, K, grad.data(),
171
+ dx.data(), pool);
172
+ TestGradient(dx, c_x, func, 5e-5, 5e-5, __LINE__);
173
+ TestGradient(grad, c_weights, func, 5e-5, 5e-5, __LINE__);
174
+
175
+ hwy::ZeroBytes(&grad_scalar, sizeof(grad_scalar));
176
+ RMSNormVJPT(weights.data(), x.data(), dy.data(), grad_scalar.data(),
177
+ dx_scalar.data(), N, K);
178
+ TestNear(dx, dx_scalar, 0, 2e-5, __LINE__);
179
+ TestNear(grad, grad_scalar, 0, 2e-5, __LINE__);
180
+ }
181
+ }
182
+
183
+ struct TestConfig {
184
+ static constexpr int kSeqLen = 24;
185
+ static constexpr int kVocabSize = 16;
186
+ static constexpr int kModelDim = 32;
187
+ static constexpr int kHeads = 3;
188
+ static constexpr int kQKVDim = 16;
189
+ static constexpr int kFFHiddenDim = 64;
190
+ static constexpr std::array<LayerAttentionType, 2> kLayerConfig =
191
+ FixedLayerConfig<2>(LayerAttentionType::kGemma);
192
+ static constexpr int kLayers = kLayerConfig.size();
193
+ static constexpr bool kAbsolutePE = false;
194
+ static constexpr bool kPostNormScale = false;
195
+
196
+ static constexpr int kKVHeads = 1;
197
+ static constexpr int kConv1dWidth = 0;
198
+ static constexpr bool kFFBiases = false;
199
+ static constexpr bool kSoftmaxAttnOutputBiases = false;
200
+ static constexpr int kGemmaLayers = kLayers;
201
+ static constexpr int kGriffinLayers = 0;
202
+ static constexpr int kNumTensorScales = 0;
203
+ };
204
+
205
+ void TestEndToEnd() {
206
+ std::mt19937 gen(42);
207
+ hwy::ThreadPool pool(0);
208
+ WeightsWrapper<float, TestConfig> weights;
209
+ WeightsWrapper<float, TestConfig> grad;
210
+ ActivationsWrapper<float, TestConfig> forward0;
211
+ ActivationsWrapper<float, TestConfig> forward1;
212
+ ActivationsWrapper<float, TestConfig> backward;
213
+ using TC = std::complex<double>;
214
+ WeightsWrapper<TC, TestConfig> c_weights;
215
+ ForwardPass<TC, TestConfig> c_forward;
216
+
217
+ ReverseSequenceSampler training_task({0, 0, 1, 1});
218
+ std::vector<Prompt> batch = training_task.SampleBatch(3, gen);
219
+
220
+ for (const Prompt& prompt : batch) {
221
+ ReverseSequenceSampler::LogPrompt(prompt);
222
+ RandInit(weights.get(), 1.0f, gen);
223
+
224
+ float loss0 = CrossEntropyLossForwardPass(
225
+ prompt, weights.get(), forward0.get());
226
+
227
+ float loss1 = CrossEntropyLossForwardPass<TestConfig, WeightsF, LayerF>(
228
+ prompt.tokens, prompt.context_size, weights.get(), forward1.get(),
229
+ pool);
230
+
231
+ EXPECT_NEAR(loss1, loss0, std::abs(loss0) * 2e-5);
232
+
233
+ grad.clear();
234
+ CrossEntropyLossBackwardPass<TestConfig, WeightsF, LayerF>(
235
+ prompt, weights.get(), forward1.get(), grad.get(), backward.get(),
236
+ pool);
237
+
238
+ Complexify(weights.get(), c_weights.get());
239
+ auto func = [&]() {
240
+ return CrossEntropyLossForwardPass(prompt, c_weights.get(), c_forward);
241
+ };
242
+
243
+ TestGradient(grad.get(), c_weights.get(), func, 2e-3f);
244
+ }
245
+ }
246
+
247
+ // NOLINTNEXTLINE(google-readability-namespace-comments)
248
+ } // namespace HWY_NAMESPACE
249
+ } // namespace gcpp
250
+ HWY_AFTER_NAMESPACE();
251
+
252
+ #if HWY_ONCE
253
+
254
+ namespace gcpp {
255
+ HWY_BEFORE_TEST(BackwardTest);
256
+ HWY_EXPORT_AND_TEST_P(BackwardTest, TestMatMulVJP);
257
+ HWY_EXPORT_AND_TEST_P(BackwardTest, TestMultiHeadMatMulVJP);
258
+ HWY_EXPORT_AND_TEST_P(BackwardTest, TestRMSNormVJP);
259
+ HWY_EXPORT_AND_TEST_P(BackwardTest, TestEndToEnd);
260
+ HWY_AFTER_TEST();
261
+
262
+ } // namespace gcpp
263
+
264
+ #endif
gemma.cpp/backprop/common_scalar.h ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_COMMON_SCALAR_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_COMMON_SCALAR_H_
18
+
19
+ #include <stddef.h>
20
+
21
+ #include <array>
22
+ #include <complex>
23
+
24
+ namespace gcpp {
25
+
26
+ template<typename T, typename U>
27
+ U DotT(const T* a, const U* b, size_t N) {
28
+ U sum = {};
29
+ for (size_t i = 0; i < N; ++i) {
30
+ sum += a[i] * b[i];
31
+ }
32
+ return sum;
33
+ }
34
+
35
+ template<>
36
+ inline std::complex<double> DotT(const float* a, const std::complex<double>* b,
37
+ size_t N) {
38
+ std::complex<double> sum = {};
39
+ for (size_t i = 0; i < N; ++i) {
40
+ sum += static_cast<double>(a[i]) * b[i];
41
+ }
42
+ return sum;
43
+ }
44
+
45
+ template<typename T>
46
+ void MulByConstT(T c, T* x, size_t N) {
47
+ for (size_t i = 0; i < N; ++i) {
48
+ x[i] *= c;
49
+ }
50
+ }
51
+
52
+ // out += c * x
53
+ template<typename T>
54
+ void MulByConstAndAddT(T c, const T* x, T* out, size_t N) {
55
+ for (size_t i = 0; i < N; ++i) {
56
+ out[i] += c * x[i];
57
+ }
58
+ }
59
+
60
+ template<typename T, size_t N>
61
+ void MulByConstAndAddT(T c, const std::array<T, N>& x, std::array<T, N>& out) {
62
+ MulByConstAndAddT(c, x.data(), out.data(), N);
63
+ }
64
+
65
+ template<typename T>
66
+ void AddFromT(const T* a, T* out, size_t N) {
67
+ for (size_t i = 0; i < N; ++i) {
68
+ out[i] += a[i];
69
+ }
70
+ }
71
+
72
+ template<typename T>
73
+ T SquaredL2(const T* x, size_t N) {
74
+ T sum = {};
75
+ for (size_t i = 0; i < N; ++i) {
76
+ sum += x[i] * x[i];
77
+ }
78
+ return sum;
79
+ }
80
+
81
+ template<typename T>
82
+ T Gelu(T x) {
83
+ static const T kMul = 0.044715;
84
+ static const T kSqrt2OverPi = 0.797884560804236;
85
+
86
+ const T x3 = x * x * x;
87
+ const T arg = kSqrt2OverPi * (kMul * x3 + x);
88
+ const T cdf = T(0.5) * (T(1.0) + std::tanh(arg));
89
+ return x * cdf;
90
+ }
91
+
92
+ template<typename T, typename U>
93
+ void Rope(T* x, U base, size_t N, int i) {
94
+ const size_t N2 = N / 2;
95
+ for (size_t dim = 0; dim < N2; ++dim) {
96
+ const T freq_exponents = T(2 * dim) / T(N);
97
+ const T timescale = std::pow(base, freq_exponents);
98
+ const T theta = T(i) / timescale;
99
+ const T cos_val = std::cos(theta);
100
+ const T sin_val = std::sin(theta);
101
+ const T x0 = x[dim];
102
+ const T x1 = x[dim + N2];
103
+ x[dim] = x0 * cos_val - x1 * sin_val;
104
+ x[dim + N2] = x0 * sin_val + x1 * cos_val;
105
+ }
106
+ }
107
+
108
+ template<typename T>
109
+ void Rope(T* x, size_t N, int i) {
110
+ Rope(x, T(10000.0), N, i);
111
+ }
112
+
113
+ template<typename T>
114
+ void Rope(std::complex<T>* x, size_t N, int i) {
115
+ Rope(x, T(10000.0), N, i);
116
+ }
117
+
118
+ } // namespace gcpp
119
+
120
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_COMMON_SCALAR_H_
gemma.cpp/backprop/forward-inl.h ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ // Include guard for non-SIMD code.
17
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_INL_H_
18
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_INL_H_
19
+
20
+ #include <stddef.h>
21
+ #include <stdint.h>
22
+
23
+ #include <cmath>
24
+ #include <vector>
25
+
26
+ #include "gemma/activations.h"
27
+ #include "gemma/common.h"
28
+ #include "gemma/configs.h"
29
+ #include "hwy/base.h"
30
+ #include "hwy/contrib/thread_pool/thread_pool.h"
31
+
32
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_INL_H_
33
+
34
+ // Include guard for (potentially) SIMD code.
35
+ #if defined(THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE) == defined(HWY_TARGET_TOGGLE)
36
+ #ifdef THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE
37
+ #undef THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE
38
+ #else
39
+ #define THIRD_PARTY_GEMMA_CPP_FORWARD_TOGGLE
40
+ #endif
41
+
42
+ #include "gemma/ops.h"
43
+ #include "hwy/highway.h"
44
+
45
+ HWY_BEFORE_NAMESPACE();
46
+ namespace gcpp {
47
+ namespace HWY_NAMESPACE {
48
+
49
+ template <typename ArrayT>
50
+ void InputEmbedding(const ArrayT& weights, const std::vector<int>& prompt,
51
+ const float scaling, float* HWY_RESTRICT output,
52
+ size_t model_dim) {
53
+ HWY_ASSERT(!prompt.empty());
54
+ for (size_t pos = 0; pos < prompt.size() - 1; ++pos) {
55
+ int token = prompt[pos];
56
+ Decompress(weights, token * model_dim, output + pos * model_dim, model_dim);
57
+ MulByConst(scaling, output + pos * model_dim, model_dim);
58
+ }
59
+ }
60
+
61
+ template<typename WT, typename XT, typename OutT>
62
+ void ApplyRMSNorm(const WT* HWY_RESTRICT weights, const XT* HWY_RESTRICT x,
63
+ size_t model_dim, size_t num_tokens,
64
+ OutT* HWY_RESTRICT output,
65
+ hwy::ThreadPool& pool) {
66
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
67
+ const size_t offset = pos * model_dim;
68
+ RMSNorm(x + offset, weights, output + offset, model_dim);
69
+ }
70
+ }
71
+
72
+ static HWY_NOINLINE float CrossEntropyLoss(const float* HWY_RESTRICT probs,
73
+ const std::vector<int>& prompt,
74
+ size_t context_size,
75
+ size_t vocab_size,
76
+ hwy::ThreadPool& pool) {
77
+ HWY_ASSERT(!prompt.empty());
78
+ float loss = 0.0f;
79
+ for (size_t pos = 0; pos < prompt.size() - 1; ++pos) {
80
+ if (pos + 1 < context_size) {
81
+ continue; // next token is part of context, don't try to predict it
82
+ }
83
+ const int next_token = prompt[pos + 1];
84
+ loss += std::log(probs[pos * vocab_size + next_token]);
85
+ }
86
+ float scaling = -1.0 / std::log(2.0);
87
+ return loss * scaling;
88
+ }
89
+
90
+ template <typename TConfig, template<typename> typename LayerT>
91
+ void ApplyForwardLayer(const LayerT<TConfig>& weights,
92
+ ForwardLayer<float, TConfig>& activations,
93
+ size_t num_tokens,
94
+ float* HWY_RESTRICT output,
95
+ hwy::ThreadPool& pool) {
96
+ static constexpr size_t kModelDim = TConfig::kModelDim;
97
+ static constexpr size_t kSeqLen = TConfig::kSeqLen;
98
+ static constexpr size_t kQKVDim = TConfig::kQKVDim;
99
+ static constexpr size_t kHeads = TConfig::kHeads;
100
+ static const float kQueryScale =
101
+ static_cast<float>(1.0 / sqrt(static_cast<double>(kQKVDim)));
102
+ HWY_ASSERT(num_tokens <= kSeqLen);
103
+
104
+ ApplyRMSNorm(weights.pre_attention_norm_scale.data(),
105
+ activations.input.data(), kModelDim, num_tokens,
106
+ activations.pre_att_rms_out.data(), pool);
107
+
108
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
109
+ MatVec<(kHeads + 2) * kQKVDim, kModelDim>(
110
+ weights.qkv_einsum_w, 0,
111
+ activations.pre_att_rms_out.data() + pos * kModelDim, nullptr,
112
+ activations.qkv.data() + pos * (kHeads + 2) * kQKVDim, pool);
113
+ }
114
+ const size_t num_tasks = kHeads * num_tokens;
115
+
116
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
117
+ float* HWY_RESTRICT k =
118
+ activations.qkv.data() + (pos * (kHeads + 2) + kHeads) * kQKVDim;
119
+ Rope(k, kQKVDim, pos);
120
+ }
121
+ pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
122
+ const size_t head = task % kHeads;
123
+ const size_t pos = task / kHeads;
124
+ float* HWY_RESTRICT q =
125
+ activations.qkv.data() + (pos * (kHeads + 2) + head) * kQKVDim;
126
+ Rope(q, kQKVDim, pos);
127
+ MulByConst(kQueryScale, q, kQKVDim);
128
+ });
129
+
130
+ pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
131
+ const size_t head = task % kHeads;
132
+ const size_t pos = task / kHeads;
133
+ const float* HWY_RESTRICT q =
134
+ activations.qkv.data() + (pos * (kHeads + 2) + head) * kQKVDim;
135
+ float* HWY_RESTRICT head_att =
136
+ activations.att.data() + (pos * kHeads + head) * kSeqLen;
137
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
138
+ const float* HWY_RESTRICT k2 =
139
+ activations.qkv.data() + (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
140
+ const float score = Dot(q, k2, kQKVDim);
141
+ head_att[pos2] = score;
142
+ }
143
+ });
144
+
145
+ pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
146
+ const size_t head = task % kHeads;
147
+ const size_t pos = task / kHeads;
148
+ float* HWY_RESTRICT head_att =
149
+ activations.att.data() + (pos * kHeads + head) * kSeqLen;
150
+ Softmax(head_att, pos + 1);
151
+ });
152
+
153
+ pool.Run(0, num_tasks, [&](const uint64_t task, size_t thread) HWY_ATTR {
154
+ const size_t head = task % kHeads;
155
+ const size_t pos = task / kHeads;
156
+ const float* HWY_RESTRICT head_att =
157
+ activations.att.data() + (pos * kHeads + head) * kSeqLen;
158
+ float* HWY_RESTRICT att_out =
159
+ activations.att_out.data() + (pos * kHeads + head) * kQKVDim;
160
+ hwy::ZeroBytes(att_out, kQKVDim * sizeof(*att_out));
161
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
162
+ float* HWY_RESTRICT v2 =
163
+ activations.qkv.data() + (pos2 * (kHeads + 2) + kHeads + 1) * kQKVDim;
164
+ MulByConstAndAdd(head_att[pos2], v2, att_out, kQKVDim);
165
+ }
166
+ });
167
+
168
+ hwy::ZeroBytes(activations.attention_out.data(),
169
+ num_tokens * kModelDim * sizeof(activations.attention_out[0]));
170
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
171
+ for (size_t head = 0; head < kHeads; ++head) {
172
+ MatVec<kModelDim, kQKVDim>(
173
+ weights.attn_vec_einsum_w, head * kModelDim * kQKVDim,
174
+ activations.att_out.data() + pos * kHeads * kQKVDim + head * kQKVDim,
175
+ nullptr, activations.att_post1.data() + pos * kModelDim, pool);
176
+ AddFrom(activations.att_post1.data() + pos * kModelDim,
177
+ activations.attention_out.data() + pos * kModelDim, kModelDim);
178
+ }
179
+ }
180
+
181
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
182
+ AddFrom(activations.input.data() + pos * kModelDim,
183
+ activations.attention_out.data() + pos * kModelDim, kModelDim);
184
+ }
185
+
186
+ ApplyRMSNorm(weights.pre_ffw_norm_scale.data(),
187
+ activations.attention_out.data(), kModelDim, num_tokens,
188
+ activations.bf_pre_ffw_rms_out.data(), pool);
189
+ static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
190
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
191
+ MatVec<kFFHiddenDim * 2, kModelDim>(
192
+ weights.gating_einsum_w, 0,
193
+ activations.bf_pre_ffw_rms_out.data() + pos * kModelDim, nullptr,
194
+ activations.ffw_hidden.data() + pos * kFFHiddenDim * 2, pool);
195
+ }
196
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
197
+ const size_t hidden_offset = pos * kFFHiddenDim * 2;
198
+ const float* HWY_RESTRICT out =
199
+ activations.ffw_hidden.data() + hidden_offset;
200
+ const float* HWY_RESTRICT out_mul = out + kFFHiddenDim;
201
+ float* HWY_RESTRICT out_gated =
202
+ activations.ffw_hidden_gated.data() + pos * kFFHiddenDim;
203
+ namespace hn = hwy::HWY_NAMESPACE;
204
+ using DF = hn::ScalableTag<float>;
205
+ DF df;
206
+ for (size_t i = 0; i < kFFHiddenDim; i += Lanes(df)) {
207
+ const auto y = hn::Load(df, out + i);
208
+ const auto x = hn::Load(df, out_mul + i);
209
+ hn::Store(hn::Mul(x, Gelu(df, y)), df, out_gated + i);
210
+ }
211
+ }
212
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
213
+ MatVec<kModelDim, kFFHiddenDim>(
214
+ weights.linear_w, 0,
215
+ activations.ffw_hidden_gated.data() + pos * kFFHiddenDim,
216
+ nullptr, output + pos * kModelDim, pool);
217
+ }
218
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
219
+ AddFrom(activations.attention_out.data() + pos * kModelDim,
220
+ output + pos * kModelDim, kModelDim);
221
+ }
222
+ }
223
+
224
+ template <typename TConfig, template<typename...> typename WeightsT,
225
+ template<typename> typename LayerT>
226
+ float CrossEntropyLossForwardPass(const std::vector<int>& prompt,
227
+ size_t context_size,
228
+ const WeightsT<TConfig>& weights,
229
+ ForwardPass<float, TConfig>& forward,
230
+ hwy::ThreadPool& pool) {
231
+ static constexpr size_t kVocabSize = TConfig::kVocabSize;
232
+ static constexpr size_t kModelDim = TConfig::kModelDim;
233
+ static constexpr size_t kLayers = TConfig::kLayers;
234
+ const float kEmbScaling = EmbeddingScaling<TConfig>();
235
+ static_assert(!TConfig::kAbsolutePE);
236
+ static_assert(!TConfig::kPostNormScale);
237
+ static_assert(TConfig::kKVHeads == 1);
238
+
239
+ HWY_DASSERT(context_size > 0);
240
+ HWY_DASSERT(context_size < prompt.size());
241
+ const size_t num_tokens = prompt.size() - 1;
242
+
243
+ InputEmbedding(weights.embedder_input_embedding, prompt, kEmbScaling,
244
+ forward.layers[0].input.data(), kModelDim);
245
+
246
+ for (size_t layer = 0; layer < kLayers; ++layer) {
247
+ auto type = TConfig::kLayerConfig[layer];
248
+ // TODO(szabadka) Implement Griffin layer.
249
+ HWY_ASSERT(type == LayerAttentionType::kGemma);
250
+ float* HWY_RESTRICT output = layer + 1 < kLayers ?
251
+ forward.layers[layer + 1].input.data() :
252
+ forward.final_layer_output.data();
253
+ ApplyForwardLayer<TConfig, LayerT>(
254
+ *weights.GetLayer(layer), forward.layers[layer],
255
+ num_tokens, output, pool);
256
+ }
257
+
258
+ ApplyRMSNorm(weights.final_norm_scale.data(),
259
+ forward.final_layer_output.data(),
260
+ kModelDim, num_tokens, forward.final_norm_output.data(), pool);
261
+
262
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
263
+ MatVec<kVocabSize, kModelDim>(
264
+ weights.embedder_input_embedding, 0,
265
+ forward.final_norm_output.data() + pos * kModelDim, nullptr,
266
+ forward.logits.data() + pos * kVocabSize, pool);
267
+ }
268
+
269
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
270
+ LogitsSoftCap(30.0f, forward.logits.data() + pos * kVocabSize, kVocabSize);
271
+ }
272
+
273
+ hwy::CopyBytes(forward.logits.data(), forward.probs.data(),
274
+ num_tokens * kVocabSize * sizeof(forward.logits[0]));
275
+
276
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
277
+ Softmax(forward.probs.data() + pos * kVocabSize, kVocabSize);
278
+ }
279
+
280
+ return CrossEntropyLoss(forward.probs.data(), prompt, context_size,
281
+ kVocabSize, pool);
282
+ }
283
+
284
+ // NOLINTNEXTLINE(google-readability-namespace-comments)
285
+ } // namespace HWY_NAMESPACE
286
+ } // namespace gcpp
287
+ HWY_AFTER_NAMESPACE();
288
+
289
+ #endif // NOLINT
gemma.cpp/backprop/forward.cc ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #include "backprop/forward.h"
17
+
18
+ #include "backprop/prompt.h"
19
+ #include "gemma/activations.h"
20
+ #include "gemma/common.h"
21
+ #include "hwy/contrib/thread_pool/thread_pool.h"
22
+
23
+ // Compiles this file for multiple architectures via "foreach_target.h", to
24
+ // which we pass the filename via macro 'argument'.
25
+ #undef HWY_TARGET_INCLUDE
26
+ #define HWY_TARGET_INCLUDE "backprop/forward.cc" // NOLINT
27
+ #include "hwy/foreach_target.h" // IWYU pragma: keep
28
+
29
+ #include "hwy/highway.h"
30
+ // After highway.h
31
+ #include "backprop/forward-inl.h"
32
+ #include "gemma/weights.h"
33
+
34
+ HWY_BEFORE_NAMESPACE();
35
+ namespace gcpp {
36
+ namespace HWY_NAMESPACE {
37
+
38
+ template <typename TConfig>
39
+ float CrossEntropyLossForwardPass(const Prompt& prompt,
40
+ const ByteStorageT& weights_u8,
41
+ ByteStorageT& forward_u8,
42
+ hwy::ThreadPool& pool) {
43
+ const auto& weights =
44
+ *reinterpret_cast<CompressedWeights<TConfig>*>(weights_u8.get());
45
+ auto& forward =
46
+ *reinterpret_cast<ForwardPass<float, TConfig>*>(forward_u8.get());
47
+ return
48
+ CrossEntropyLossForwardPass<TConfig, CompressedWeights, CompressedLayer>(
49
+ prompt.tokens, prompt.context_size, weights, forward, pool);
50
+ }
51
+
52
+ float CrossEntropyLossForwardPassT(Model model, const Prompt& prompt,
53
+ const ByteStorageT& weights,
54
+ ByteStorageT& forward,
55
+ hwy::ThreadPool& pool) {
56
+ // TODO(janwas): use CallFunctorForModel
57
+ switch (model) {
58
+ case Model::GEMMA_2B:
59
+ return CrossEntropyLossForwardPass<ConfigGemma2B<float>>(prompt, weights,
60
+ forward, pool);
61
+ case Model::GEMMA_TINY:
62
+ return CrossEntropyLossForwardPass<ConfigGemmaTiny<float>>(
63
+ prompt, weights, forward, pool);
64
+ default:
65
+ HWY_ABORT("Model type %d unknown.", static_cast<int>(model));
66
+ }
67
+ }
68
+
69
+ } // namespace HWY_NAMESPACE
70
+ } // namespace gcpp
71
+ HWY_AFTER_NAMESPACE();
72
+
73
+ #if HWY_ONCE
74
+ namespace gcpp {
75
+
76
+ HWY_EXPORT(CrossEntropyLossForwardPassT);
77
+
78
+ float CrossEntropyLossForwardPass(
79
+ const Model& model, const Prompt& prompt, const ByteStorageT& weights,
80
+ ByteStorageT& forward, hwy::ThreadPool& pool) {
81
+ return HWY_DYNAMIC_DISPATCH(CrossEntropyLossForwardPassT)(
82
+ model, prompt, weights, forward, pool);
83
+ }
84
+
85
+ } // namespace gcpp
86
+ #endif // HWY_ONCE
gemma.cpp/backprop/forward.h ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_H_
18
+
19
+ #include <vector>
20
+
21
+ #include "backprop/prompt.h"
22
+ #include "gemma/common.h"
23
+ #include "hwy/contrib/thread_pool/thread_pool.h"
24
+
25
+ namespace gcpp {
26
+
27
+ float CrossEntropyLossForwardPass(
28
+ const Model& model, const Prompt& prompt, const ByteStorageT& weights,
29
+ ByteStorageT& forward, hwy::ThreadPool& pool);
30
+
31
+ } // namespace gcpp
32
+
33
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_H_
gemma.cpp/backprop/forward_scalar.h ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_SCALAR_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_SCALAR_H_
18
+
19
+ #include <stddef.h>
20
+ #include <string.h>
21
+
22
+ #include <cmath>
23
+ #include <complex>
24
+ #include <vector>
25
+
26
+ #include "backprop/common_scalar.h"
27
+ #include "backprop/prompt.h"
28
+ #include "gemma/activations.h"
29
+ #include "gemma/common.h" // EmbeddingScaling
30
+ #include "gemma/weights_raw.h"
31
+
32
+ namespace gcpp {
33
+
34
+ // w is N x M matrix in row-major order, x is M x K matrix in column-major order
35
+ // y = w * x is N x K matrix in column-major order.
36
+ template<typename T>
37
+ void MatMulT(const T* w, const T* x, T* y, size_t N, size_t M, size_t K) {
38
+ for (size_t i = 0; i < K; ++i) {
39
+ for (size_t j = 0; j < N; ++j) {
40
+ y[i * N + j] = DotT(&w[j * M], &x[i * M], M);
41
+ }
42
+ }
43
+ }
44
+
45
+ // w is H concatenated N x M matrix in row-major order, x is HM x K matrix in
46
+ // column-major order and y = w' * x is N x K matrix in column-major order,
47
+ // where w' is the rearrangement of w into an N x HM matrix.
48
+ template<typename T>
49
+ void MultiHeadMatMul(const T* w, const T* x, T* y, size_t H, size_t N,
50
+ size_t M, size_t K) {
51
+ memset(y, 0, N * K * sizeof(y[0]));
52
+ for (size_t i = 0; i < K; ++i) {
53
+ for (size_t h = 0; h < H; ++h) {
54
+ for (size_t j = 0; j < N; ++j) {
55
+ y[i * N + j] += DotT(&w[h * N * M + j * M], &x[i * H * M + h * M], M);
56
+ }
57
+ }
58
+ }
59
+ }
60
+
61
+ template<typename T>
62
+ void RMSNormT(const T* w, const T* x, T* out, size_t N, size_t K) {
63
+ constexpr T eps(1e-6);
64
+ for (size_t i = 0; i < K; ++i) {
65
+ T ss = SquaredL2(x + i * N, N);
66
+ ss = T(1.0) / std::sqrt(ss / T(N) + eps);
67
+ for (size_t j = 0; j < N; j++) {
68
+ out[i * N + j] = (T(1.0) + w[j]) * (ss * x[i * N + j]);
69
+ }
70
+ }
71
+ }
72
+ template<typename T>
73
+ void Softmax(T* x, size_t N) {
74
+ T sum = {};
75
+ auto maxreal = std::real(x[0]);
76
+ for (size_t i = 1; i < N; ++i) {
77
+ if (std::real(x[i]) > maxreal) {
78
+ maxreal = std::real(x[i]);
79
+ }
80
+ }
81
+ for (size_t i = 0; i < N; ++i) {
82
+ x[i] = std::exp(x[i] - maxreal);
83
+ sum += x[i];
84
+ }
85
+ T scale = T(1.0) / sum;
86
+ for (size_t i = 0; i < N; ++i) {
87
+ x[i] *= scale;
88
+ }
89
+ }
90
+ template<typename T>
91
+ void Softmax(T* x, size_t N, size_t K) {
92
+ for (size_t i = 0; i < K; ++i) {
93
+ Softmax(x + i * N, N);
94
+ }
95
+ }
96
+ template<typename T>
97
+ void Softcap(T* x, size_t N) {
98
+ auto maxreal = std::real(x[0]);
99
+ size_t imax = 0;
100
+ for (size_t i = 1; i < N; ++i) {
101
+ if (std::real(x[i]) > maxreal) {
102
+ maxreal = std::real(x[i]);
103
+ imax = i;
104
+ }
105
+ }
106
+ T cap = 30.0;
107
+ T inv_cap = T(1.0) / cap;
108
+ T xmax = x[imax];
109
+ for (size_t i = 0; i < N; ++i) {
110
+ x[i] = cap * std::tanh((x[i] - xmax) * inv_cap);
111
+ }
112
+ }
113
+
114
+ template<typename T>
115
+ void GatedGelu(const T* in, T* out, size_t N, size_t K) {
116
+ for (size_t i = 0; i < K; ++i) {
117
+ const T* x1 = in + i * 2 * N;
118
+ const T* x2 = x1 + N;
119
+ T* y = out + i * N;
120
+ for (size_t j = 0; j < N; ++j) {
121
+ y[j] = x2[j] * Gelu(x1[j]);
122
+ }
123
+ }
124
+ }
125
+
126
+ template<typename T>
127
+ void InputEmbedding(const T* w, const std::vector<int>& tokens, T scaling,
128
+ T* y, size_t N) {
129
+ const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
130
+ for (size_t i = 0; i < num_tokens; ++i) {
131
+ int token = tokens[i];
132
+ memcpy(y + i * N, w + token * N, N * sizeof(y[0]));
133
+ MulByConstT(scaling, y + i * N, N);
134
+ }
135
+ }
136
+
137
+ template<typename T>
138
+ void MaskedAttention(const T* qkv, T* output, size_t num_tokens,
139
+ size_t kHeads, size_t kQKVDim, size_t kSeqLen) {
140
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
141
+ for (size_t head = 0; head < kHeads; ++head) {
142
+ const size_t qoffset = pos * (kHeads + 2) * kQKVDim;
143
+ const size_t aoffset = pos * kHeads * kSeqLen + head * kSeqLen;
144
+ const T* q = qkv + qoffset + head * kQKVDim;
145
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
146
+ const T* k = qkv + (pos2 * (kHeads + 2) + kHeads) * kQKVDim;
147
+ output[aoffset + pos2] = DotT(q, k, kQKVDim);
148
+ }
149
+ }
150
+ }
151
+ }
152
+ template<typename T>
153
+ void MaskedSoftmax(T* x, size_t num_tokens, size_t kHeads, size_t kSeqLen) {
154
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
155
+ for (size_t head = 0; head < kHeads; ++head) {
156
+ size_t offset = pos * kHeads * kSeqLen + head * kSeqLen;
157
+ Softmax(x + offset, pos + 1);
158
+ memset(x + offset + pos + 1, 0, (kSeqLen - pos - 1) * sizeof(T));
159
+ }
160
+ }
161
+ }
162
+ template<typename T>
163
+ void MixByAttention(const T* qkv, const T* attention, T* output,
164
+ size_t num_tokens, size_t kHeads, size_t kQKVDim,
165
+ size_t kSeqLen) {
166
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
167
+ for (size_t head = 0; head < kHeads; ++head) {
168
+ const T* att = &attention[pos * kHeads * kSeqLen + head * kSeqLen];
169
+ T* out = &output[head * kQKVDim + pos * kHeads * kQKVDim];
170
+ memset(out, 0, kQKVDim * sizeof(out[0]));
171
+ for (size_t pos2 = 0; pos2 <= pos; ++pos2) {
172
+ size_t v_offset = (pos2 * (kHeads + 2) + kHeads + 1) * kQKVDim;
173
+ const T* v = &qkv[v_offset];
174
+ MulByConstAndAddT(att[pos2], v, out, kQKVDim);
175
+ }
176
+ }
177
+ }
178
+ }
179
+ template<typename T, typename TConfig>
180
+ void ApplyLayer(const Layer<T, TConfig>& weights,
181
+ ForwardLayer<T, TConfig>& activations,
182
+ size_t num_tokens, T* output) {
183
+ static constexpr size_t kModelDim = TConfig::kModelDim;
184
+ static constexpr size_t kSeqLen = TConfig::kSeqLen;
185
+ static constexpr size_t kQKVDim = TConfig::kQKVDim;
186
+ static constexpr size_t kHeads = TConfig::kHeads;
187
+ static constexpr size_t kFFHiddenDim = TConfig::kFFHiddenDim;
188
+ static const T kQueryScale = T(1.0) / std::sqrt(T(kQKVDim));
189
+
190
+ RMSNormT(weights.pre_attention_norm_scale.data(), activations.input.data(),
191
+ activations.pre_att_rms_out.data(), kModelDim, num_tokens);
192
+
193
+ MatMulT(weights.qkv_einsum_w.data(), activations.pre_att_rms_out.data(),
194
+ activations.qkv.data(), (kHeads + 2) * kQKVDim, kModelDim,
195
+ num_tokens);
196
+
197
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
198
+ T* qkv = activations.qkv.data() + pos * (kHeads + 2) * kQKVDim;
199
+ for (size_t h = 0; h <= kHeads; ++h) {
200
+ Rope(qkv + h * kQKVDim, kQKVDim, pos);
201
+ }
202
+ }
203
+
204
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
205
+ T* qkv = activations.qkv.data() + pos * (kHeads + 2) * kQKVDim;
206
+ MulByConstT(kQueryScale, qkv, kHeads * kQKVDim);
207
+ }
208
+
209
+ MaskedAttention(activations.qkv.data(), activations.att.data(),
210
+ num_tokens, kHeads, kQKVDim, kSeqLen);
211
+
212
+ MaskedSoftmax(activations.att.data(), num_tokens, kHeads, kSeqLen);
213
+
214
+ MixByAttention(activations.qkv.data(), activations.att.data(),
215
+ activations.att_out.data(), num_tokens, kHeads, kQKVDim,
216
+ kSeqLen);
217
+
218
+ MultiHeadMatMul(weights.attn_vec_einsum_w.data(), activations.att_out.data(),
219
+ activations.attention_out.data(), kHeads, kModelDim, kQKVDim,
220
+ num_tokens);
221
+
222
+ AddFromT(activations.input.data(), activations.attention_out.data(),
223
+ num_tokens * kModelDim);
224
+
225
+ RMSNormT(weights.pre_ffw_norm_scale.data(), activations.attention_out.data(),
226
+ activations.bf_pre_ffw_rms_out.data(), kModelDim, num_tokens);
227
+
228
+ MatMulT(weights.gating_einsum_w.data(), activations.bf_pre_ffw_rms_out.data(),
229
+ activations.ffw_hidden.data(), kFFHiddenDim * 2, kModelDim,
230
+ num_tokens);
231
+
232
+ GatedGelu(activations.ffw_hidden.data(), activations.ffw_hidden_gated.data(),
233
+ kFFHiddenDim, num_tokens);
234
+
235
+ MatMulT(weights.linear_w.data(), activations.ffw_hidden_gated.data(),
236
+ output, kModelDim, kFFHiddenDim, num_tokens);
237
+
238
+ AddFromT(activations.attention_out.data(), output, num_tokens * kModelDim);
239
+ }
240
+
241
+ template<typename T>
242
+ T CrossEntropyLoss(const T* x, const Prompt& prompt, size_t V) {
243
+ T loss = {};
244
+ const std::vector<int> tokens = prompt.tokens;
245
+ const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
246
+ for (size_t i = 0; i < num_tokens; ++i) {
247
+ if (i + 1 < prompt.context_size) {
248
+ continue; // next token is part of context, don't try to predict it
249
+ }
250
+ const int next_token = tokens[i + 1];
251
+ loss += std::log(x[i * V + next_token]);
252
+ }
253
+ T scaling = -1.0 / std::log(2.0);
254
+ return loss * scaling;
255
+ }
256
+
257
+ template<typename T, typename TConfig>
258
+ T CrossEntropyLossForwardPass(const Prompt& prompt,
259
+ const Weights<T, TConfig>& weights,
260
+ ForwardPass<T, TConfig>& forward) {
261
+ static constexpr size_t kModelDim = TConfig::kModelDim;
262
+ static constexpr size_t kVocabSize = TConfig::kVocabSize;
263
+ static constexpr size_t kLayers = TConfig::kLayers;
264
+ const std::vector<int> tokens = prompt.tokens;
265
+ const size_t num_tokens = tokens.empty() ? 0 : tokens.size() - 1;
266
+
267
+ const T kEmbScaling = EmbeddingScaling(kModelDim);
268
+ InputEmbedding(weights.embedder_input_embedding.data(), tokens,
269
+ kEmbScaling, forward.layers[0].input.data(), kModelDim);
270
+
271
+ for (size_t layer = 0; layer < kLayers; ++layer) {
272
+ T* output = layer + 1 < kLayers ?
273
+ forward.layers[layer + 1].input.data() :
274
+ forward.final_layer_output.data();
275
+ ApplyLayer(*weights.GetLayer(layer), forward.layers[layer], num_tokens,
276
+ output);
277
+ }
278
+
279
+ RMSNormT(weights.final_norm_scale.data(),
280
+ forward.final_layer_output.data(),
281
+ forward.final_norm_output.data(), kModelDim, num_tokens);
282
+
283
+ MatMulT(weights.embedder_input_embedding.data(),
284
+ forward.final_norm_output.data(),
285
+ forward.logits.data(), kVocabSize, kModelDim, num_tokens);
286
+
287
+ for (size_t pos = 0; pos < num_tokens; ++pos) {
288
+ Softcap(forward.logits.data() + pos * kVocabSize, kVocabSize);
289
+ }
290
+
291
+ memcpy(forward.probs.data(), forward.logits.data(),
292
+ num_tokens * kVocabSize * sizeof(forward.logits[0]));
293
+ Softmax(forward.probs.data(), kVocabSize, num_tokens);
294
+
295
+ return CrossEntropyLoss(forward.probs.data(), prompt, kVocabSize);
296
+ }
297
+
298
+ } // namespace gcpp
299
+
300
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_FORWARD_SCALAR_H_
gemma.cpp/backprop/optimize_test.cc ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #include <stddef.h>
17
+
18
+ #include <limits>
19
+ #include <random>
20
+ #include <vector>
21
+
22
+ #include "gtest/gtest.h"
23
+ #include "backprop/backward.h"
24
+ #include "backprop/forward.h"
25
+ #include "backprop/optimizer.h"
26
+ #include "backprop/prompt.h"
27
+ #include "backprop/sampler.h"
28
+ #include "gemma/activations.h"
29
+ #include "gemma/common.h"
30
+ #include "gemma/gemma.h"
31
+ #include "gemma/weights.h"
32
+ #include "hwy/contrib/thread_pool/thread_pool.h"
33
+
34
+ namespace gcpp {
35
+
36
+ TEST(OptimizeTest, GradientDescent) {
37
+ hwy::ThreadPool pool(0);
38
+ std::mt19937 gen(42);
39
+
40
+ Model model_type = Model::GEMMA_TINY;
41
+ Type weight_type = Type::kF32;
42
+ ByteStorageT grad = CallForModelAndWeight<AllocateCompressedWeights>(
43
+ model_type, weight_type, pool);
44
+ ByteStorageT grad_m = CallForModelAndWeight<AllocateCompressedWeights>(
45
+ model_type, weight_type, pool);
46
+ ByteStorageT grad_v = CallForModelAndWeight<AllocateCompressedWeights>(
47
+ model_type, weight_type, pool);
48
+ ByteStorageT forward =
49
+ CallForModelAndWeight<AllocateForwardPass>(model_type, weight_type);
50
+ ByteStorageT backward =
51
+ CallForModelAndWeight<AllocateForwardPass>(model_type, weight_type);
52
+ KVCache kv_cache = KVCache::Create(model_type);
53
+
54
+ Gemma gemma(GemmaTokenizer(), model_type, weight_type, pool);
55
+
56
+ const auto generate = [&](const std::vector<int>& prompt) {
57
+ std::vector<int> reply;
58
+ auto stream_token = [&reply](int token, float) {
59
+ reply.push_back(token);
60
+ return token != ReverseSequenceSampler::kEndToken;
61
+ };
62
+ RuntimeConfig runtime = {
63
+ .max_tokens = 32,
64
+ .max_generated_tokens = 16,
65
+ .temperature = 1.0f,
66
+ .verbosity = 0,
67
+ .gen = &gen,
68
+ .stream_token = stream_token,
69
+ .eos_id = ReverseSequenceSampler::kEndToken,
70
+ };
71
+ TimingInfo timing_info;
72
+ gemma.Generate(runtime, prompt, 0, kv_cache, timing_info);
73
+ return reply;
74
+ };
75
+
76
+ auto verify = [&](const Prompt& prompt) {
77
+ auto context = prompt.context();
78
+ std::vector<int> reply = generate(context);
79
+ bool ok = true;
80
+ for (size_t i = 0; ok && i < prompt.tokens.size(); ++i) {
81
+ if (i >= reply.size() || reply[i] != prompt.tokens[i]) {
82
+ ok = false;
83
+ }
84
+ }
85
+ return ok;
86
+ };
87
+
88
+ RandInitWeights(model_type, weight_type, gemma.Weights(), pool, gen);
89
+ CallForModelAndWeight<ZeroInitCompressedWeights>(
90
+ model_type, weight_type, grad_m, pool);
91
+ CallForModelAndWeight<ZeroInitCompressedWeights>(
92
+ model_type, weight_type, grad_v, pool);
93
+
94
+ printf("Initial weights:\n");
95
+ LogWeightStats(model_type, weight_type, gemma.Weights());
96
+
97
+ constexpr size_t kBatchSize = 8;
98
+ const float alpha = 0.001f;
99
+ const float beta1 = 0.9f;
100
+ const float beta2 = 0.999f;
101
+ const float epsilon = 1e-8f;
102
+
103
+ ReverseSequenceSampler training_task({
104
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1});
105
+ size_t steps = 0;
106
+ float prev_loss = std::numeric_limits<float>::max();
107
+ size_t num_ok;
108
+ for (; steps < 1000000; ++steps) {
109
+ std::mt19937 sgen(42);
110
+ CallForModelAndWeight<ZeroInitCompressedWeights>(
111
+ model_type, weight_type, grad, pool);
112
+ float total_loss = 0.0f;
113
+ num_ok = 0;
114
+ for (size_t i = 0; i < kBatchSize; ++i) {
115
+ Prompt prompt = training_task.Sample(sgen);
116
+ total_loss += CrossEntropyLossForwardPass(model_type, prompt,
117
+ gemma.Weights(), forward, pool);
118
+ CrossEntropyLossBackwardPass(model_type, prompt, gemma.Weights(), forward,
119
+ grad, backward, pool);
120
+ num_ok += verify(prompt) ? 1 : 0;
121
+ }
122
+ total_loss /= kBatchSize;
123
+
124
+ AdamUpdate(model_type, weight_type, grad, alpha, beta1, beta2, epsilon,
125
+ steps + 1, gemma.Weights(), grad_m, grad_v, pool);
126
+ printf("step: %zu total_loss: %.15f num_ok: %zu/%zu\n",
127
+ steps, total_loss, num_ok, kBatchSize);
128
+ if (steps % 100 == 0) {
129
+ printf("Batch gradient:\n");
130
+ LogWeightStats(model_type, weight_type, grad);
131
+ }
132
+ if (total_loss < 0.5f) {
133
+ break;
134
+ }
135
+ prev_loss = total_loss;
136
+ }
137
+ printf("Num steps: %zu\n", steps);
138
+ printf("Final weights:\n");
139
+ LogWeightStats(model_type, weight_type, gemma.Weights());
140
+ EXPECT_LT(steps, 300);
141
+ EXPECT_EQ(num_ok, kBatchSize);
142
+ }
143
+
144
+ } // namespace gcpp
gemma.cpp/backprop/optimizer.cc ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #include "backprop/optimizer.h"
17
+
18
+ #include <cmath>
19
+ #include <random>
20
+
21
+ #include "compression/compress.h"
22
+ #include "gemma/common.h"
23
+ #include "gemma/weights.h"
24
+ #include "hwy/base.h"
25
+ #include "hwy/contrib/thread_pool/thread_pool.h"
26
+
27
+ namespace gcpp {
28
+
29
+ namespace {
30
+
31
+ class WeightInitializer {
32
+ public:
33
+ WeightInitializer(std::mt19937& gen) : dist_(0.0f, 1.0f), gen_(gen) {}
34
+
35
+ template <size_t N>
36
+ void operator()(const char* name, CompressedArray<float, N>& tensor) {
37
+ float* data = tensor.data();
38
+ for (size_t i = 0; i < N; ++i) {
39
+ data[i] = dist_(gen_);
40
+ }
41
+ tensor.set_scale(1.0f);
42
+ }
43
+ private:
44
+ std::normal_distribution<float> dist_;
45
+ std::mt19937& gen_;
46
+ };
47
+
48
+ template <typename TConfig>
49
+ struct RandInitWeightsT {
50
+ void operator()(const ByteStorageT& weights_u8, hwy::ThreadPool& pool,
51
+ std::mt19937& gen) const {
52
+ auto& weights =
53
+ *reinterpret_cast<CompressedWeights<TConfig>*>(weights_u8.get());
54
+ // TODO(szabadka) Use the same weight initialization method as in the python
55
+ // version.
56
+ WeightInitializer init(gen);
57
+ ForEachTensor1<TConfig>(init, weights);
58
+ }
59
+ };
60
+
61
+ class AdamUpdater {
62
+ public:
63
+ explicit AdamUpdater(float alpha, float beta1, float beta2, float epsilon,
64
+ size_t t)
65
+ : alpha_(alpha), beta1_(beta1), beta2_(beta2), cbeta1_(1.0f - beta1),
66
+ cbeta2_(1.0f - beta2), norm1_(1.0 / (1.0 - std::pow(beta1, t))),
67
+ norm2_(1.0 / (1.0 - std::pow(beta2, t))), epsilon_(epsilon) {}
68
+
69
+ template <size_t kCapacity>
70
+ void operator()(const char* name,
71
+ const CompressedArray<float, kCapacity>& grad,
72
+ CompressedArray<float, kCapacity>& weights,
73
+ CompressedArray<float, kCapacity>& grad_m,
74
+ CompressedArray<float, kCapacity>& grad_v) {
75
+ const float* HWY_RESTRICT g = grad.data();
76
+ float* HWY_RESTRICT w = weights.data();
77
+ float* HWY_RESTRICT m = grad_m.data();
78
+ float* HWY_RESTRICT v = grad_v.data();
79
+ for (size_t i = 0; i < kCapacity; ++i) {
80
+ m[i] *= beta1_;
81
+ m[i] += cbeta1_ * g[i];
82
+ v[i] *= beta2_;
83
+ v[i] += cbeta2_ * g[i] * g[i];
84
+ const float mhat = m[i] * norm1_;
85
+ const float vhat = v[i] * norm2_;
86
+ w[i] -= alpha_ * mhat / (std::sqrt(vhat) + epsilon_);
87
+ }
88
+ }
89
+
90
+ private:
91
+ float alpha_;
92
+ float beta1_;
93
+ float beta2_;
94
+ float cbeta1_;
95
+ float cbeta2_;
96
+ float norm1_;
97
+ float norm2_;
98
+ float epsilon_;
99
+ };
100
+
101
+ template <typename TConfig>
102
+ struct AdamUpdateT {
103
+ void operator()(const ByteStorageT& grad_u8, float alpha, float beta1,
104
+ float beta2, float epsilon, size_t t,
105
+ const ByteStorageT& weights_u8, const ByteStorageT& grad_m_u8,
106
+ const ByteStorageT& grad_v_u8, hwy::ThreadPool& pool) const {
107
+ using TWeights = CompressedWeights<TConfig>;
108
+ const auto& grad = *reinterpret_cast<const TWeights*>(grad_u8.get());
109
+ auto& weights = *reinterpret_cast<TWeights*>(weights_u8.get());
110
+ auto& grad_m = *reinterpret_cast<TWeights*>(grad_m_u8.get());
111
+ auto& grad_v = *reinterpret_cast<TWeights*>(grad_v_u8.get());
112
+ AdamUpdater updater(alpha, beta1, beta2, epsilon, t);
113
+ ForEachTensor4<TConfig>(updater, grad, weights, grad_m, grad_v);
114
+ }
115
+ };
116
+
117
+ } // namespace
118
+
119
+ void RandInitWeights(Model model_type, Type weight_type,
120
+ const ByteStorageT& weights, hwy::ThreadPool& pool,
121
+ std::mt19937& gen) {
122
+ HWY_ASSERT(weight_type == Type::kF32);
123
+ CallForModel<float, RandInitWeightsT>(model_type, weights, pool, gen);
124
+ }
125
+
126
+ void AdamUpdate(Model model_type, Type weight_type, const ByteStorageT& grad,
127
+ float alpha, float beta1, float beta2, float epsilon, size_t t,
128
+ const ByteStorageT& weights, const ByteStorageT& grad_m,
129
+ const ByteStorageT& grad_v, hwy::ThreadPool& pool) {
130
+ HWY_ASSERT(weight_type == Type::kF32);
131
+ CallForModel<float, AdamUpdateT>(model_type, grad, alpha, beta1, beta2,
132
+ epsilon, t, weights, grad_m, grad_v, pool);
133
+ }
134
+
135
+ } // namespace gcpp
gemma.cpp/backprop/optimizer.h ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_OPTIMIZER_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_OPTIMIZER_H_
18
+
19
+ #include <random>
20
+
21
+ #include "gemma/common.h"
22
+ #include "hwy/contrib/thread_pool/thread_pool.h"
23
+
24
+ namespace gcpp {
25
+
26
+ void RandInitWeights(Model model_type, Type weight_type,
27
+ const ByteStorageT& weights, hwy::ThreadPool& pool,
28
+ std::mt19937& gen);
29
+
30
+ void AdamUpdate(Model model_type, Type weight_type, const ByteStorageT& grad,
31
+ float alpha, float beta1, float beta2, float epsilon, size_t t,
32
+ const ByteStorageT& weights, const ByteStorageT& grad_m,
33
+ const ByteStorageT& grad_v, hwy::ThreadPool& pool);
34
+
35
+ } // namespace gcpp
36
+
37
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_OPTIMIZER_H_
gemma.cpp/backprop/prompt.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_PROMPT_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_PROMPT_H_
18
+
19
+ #include <stddef.h>
20
+ #include <vector>
21
+
22
+ namespace gcpp {
23
+
24
+ struct Prompt {
25
+ std::vector<int> tokens;
26
+ size_t context_size;
27
+ std::vector<int> context() const {
28
+ return std::vector<int>(tokens.begin(), tokens.begin() + context_size);
29
+ }
30
+ };
31
+
32
+ } // namespace gcpp
33
+
34
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_PROMPT_H_
gemma.cpp/backprop/sampler.h ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_SAMPLER_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_SAMPLER_H_
18
+
19
+ #include <random>
20
+ #include <vector>
21
+
22
+ #include "backprop/prompt.h"
23
+
24
+ namespace gcpp {
25
+
26
+ class PromptSampler {
27
+ public:
28
+ virtual Prompt Sample(std::mt19937& gen) = 0;
29
+ virtual ~PromptSampler() = default;
30
+
31
+ std::vector<Prompt> SampleBatch(size_t batch_size, std::mt19937& gen) {
32
+ std::vector<Prompt> batch;
33
+ batch.reserve(batch_size);
34
+ for (size_t i = 0; i < batch_size; ++i) {
35
+ batch.emplace_back(Sample(gen));
36
+ }
37
+ return batch;
38
+ }
39
+ };
40
+
41
+ class ReverseSequenceSampler : public PromptSampler {
42
+ public:
43
+ explicit ReverseSequenceSampler(const std::vector<int>& length_histo)
44
+ : token_dist_(0, 9) {
45
+ for (int i = 0; i < length_histo.size(); ++i) {
46
+ const int count = length_histo[i];
47
+ for (int j = 0; j < count; ++j) {
48
+ length_lut_.push_back(i + 1);
49
+ }
50
+ }
51
+ length_dist_ = std::uniform_int_distribution<>(0, length_lut_.size() - 1);
52
+ }
53
+ virtual ~ReverseSequenceSampler() = default;
54
+
55
+ static constexpr int kReverseToken = 10;
56
+ static constexpr int kEndToken = 11;
57
+
58
+ Prompt Sample(std::mt19937& gen) override {
59
+ Prompt prompt;
60
+ int len = length_lut_[length_dist_(gen)];
61
+ prompt.tokens.resize(2 * len + 2);
62
+ prompt.tokens[len] = kReverseToken;
63
+ prompt.tokens[2 * len + 1] = kEndToken;
64
+ for (size_t i = 0; i < len; ++i) {
65
+ prompt.tokens[i] = prompt.tokens[2 * len - i] = token_dist_(gen);
66
+ }
67
+ prompt.context_size = len + 1;
68
+ return prompt;
69
+ }
70
+
71
+ static void LogPrompt(const Prompt& prompt) {
72
+ static const char* kVocab[] = {
73
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "-->", "|",
74
+ };
75
+ for (int token : prompt.tokens) printf("%s", kVocab[token]);
76
+ printf(" [context_size: %zu]\n", prompt.context_size);
77
+ }
78
+
79
+ private:
80
+ std::uniform_int_distribution<> token_dist_;
81
+ std::uniform_int_distribution<> length_dist_;
82
+ std::vector<int> length_lut_;
83
+ };
84
+
85
+ } // namespace gcpp
86
+
87
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_SAMPLER_H_
gemma.cpp/backprop/test_util.h ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // https://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_
17
+ #define THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_
18
+
19
+ #include <stddef.h>
20
+
21
+ #include <array>
22
+ #include <complex>
23
+
24
+ #include "gtest/gtest.h"
25
+ #include "gemma/weights_raw.h"
26
+
27
+ namespace gcpp {
28
+
29
+ template<typename T, typename U, size_t kLen>
30
+ void Complexify(const std::array<T, kLen>& x,
31
+ std::array<std::complex<U>, kLen>& c_x) {
32
+ for (size_t i = 0; i < kLen; ++i) {
33
+ c_x[i] = std::complex<U>(x[i], 0.0);
34
+ }
35
+ }
36
+
37
+
38
+ template<typename T, typename U, typename TConfig>
39
+ void Complexify(const Layer<T, TConfig>& w,
40
+ Layer<std::complex<U>, TConfig>& c_w) {
41
+ Complexify(w.pre_attention_norm_scale, c_w.pre_attention_norm_scale);
42
+ Complexify(w.attn_vec_einsum_w, c_w.attn_vec_einsum_w);
43
+ Complexify(w.qkv_einsum_w, c_w.qkv_einsum_w);
44
+ Complexify(w.pre_ffw_norm_scale, c_w.pre_ffw_norm_scale);
45
+ Complexify(w.gating_einsum_w, c_w.gating_einsum_w);
46
+ Complexify(w.linear_w, c_w.linear_w);
47
+ }
48
+
49
+ template<typename T, typename U, typename TConfig>
50
+ void Complexify(const Weights<T, TConfig>& w,
51
+ Weights<std::complex<U>, TConfig>& c_w) {
52
+ static constexpr size_t kLayers = TConfig::kLayers;
53
+ Complexify(w.embedder_input_embedding, c_w.embedder_input_embedding);
54
+ Complexify(w.final_norm_scale, c_w.final_norm_scale);
55
+ for (size_t i = 0; i < kLayers; ++i) {
56
+ Complexify(*w.GetLayer(i), *c_w.GetLayer(i));
57
+ }
58
+ }
59
+
60
+ template<typename T, typename U, size_t N>
61
+ void TestNear(const std::array<T, N>& actual, const std::array<U, N>& expected,
62
+ double max_abs_err, double max_rel_err, int line) {
63
+ double sum0 = 0;
64
+ double sum1 = 0;
65
+ double sum01 = 0;
66
+ for (size_t i = 0; i < N; ++i) {
67
+ sum0 += actual[i] * actual[i];
68
+ sum1 += expected[i] * expected[i];
69
+ sum01 += actual[i] * expected[i];
70
+ ASSERT_NEAR(actual[i], expected[i],
71
+ std::max(max_abs_err, std::abs(expected[i]) * max_rel_err))
72
+ << "line: " << line << " dim=" << N << " i=" << i;
73
+ }
74
+ if (sum0 > 1e-40) {
75
+ double norm_dot = sum01 / std::sqrt(sum0) / std::sqrt(sum1);
76
+ ASSERT_NEAR(norm_dot, 1.0, 1e-7)
77
+ << "line: " << line << " sum0: " << sum0 << " sum1: " << sum1
78
+ << " sum01: " << sum01;
79
+ }
80
+ }
81
+
82
+ // Compute gradient with the finite difference method in the complex plane.
83
+ // If f : R->R is the tested function and F : C->C is its extension on the
84
+ // complex plane so that F is complex differentiable in x, then
85
+ //
86
+ // F(x + ih) = F(x) + ih F'(x) + O(h^2) F''(x)
87
+ //
88
+ // which means that
89
+ //
90
+ // F'(x) ~= Imag(F(x + ih)) / h
91
+ //
92
+ // This method is more numerically stable than the real-valued finite difference
93
+ // method since we don't need to subtract floating point numbers that are near
94
+ // to each other.
95
+ template<typename T, typename U, size_t N, typename FUNC>
96
+ void TestGradient(const std::array<T, N>& grad,
97
+ std::array<std::complex<U>, N>& x, FUNC func,
98
+ U step, T max_abs_err, T max_rel_err, int line) {
99
+ std::array<T, N> exp_grad;
100
+ const U inv_step = 1.0 / step;
101
+ for (size_t i = 0; i < N; ++i) {
102
+ const U x0 = std::real(x[i]);
103
+ const std::complex<U> x1 = std::complex<U>(x0, step);
104
+ x[i] = x1;
105
+ const std::complex<U> f1 = func();
106
+ exp_grad [i] = std::imag(f1) * inv_step;
107
+ x[i] = x0;
108
+ }
109
+ TestNear(grad, exp_grad, max_abs_err, max_rel_err, line);
110
+ }
111
+
112
+ template<size_t N, typename FUNC>
113
+ void TestGradient(const std::array<float, N>& grad,
114
+ std::array<std::complex<float>, N>& x, FUNC func,
115
+ float max_abs_err, float max_rel_error, int line) {
116
+ TestGradient(grad, x, func, 1e-30f, max_abs_err, max_rel_error, line);
117
+ }
118
+
119
+ template<size_t N, typename FUNC>
120
+ void TestGradient(const std::array<float, N>& grad,
121
+ std::array<std::complex<double>, N>& x, FUNC func,
122
+ float max_abs_err, float max_rel_error, int line) {
123
+ TestGradient(grad, x, func, 1e-50, max_abs_err, max_rel_error, line);
124
+ }
125
+
126
+ template<size_t N, typename FUNC>
127
+ void TestGradient(const std::array<double, N>& grad,
128
+ std::array<std::complex<double>, N>& x, FUNC func,
129
+ double max_abs_err, double max_rel_error, int line) {
130
+ TestGradient(grad, x, func, 1e-50, max_abs_err, max_rel_error, line);
131
+ }
132
+
133
+ template<typename T, typename U, typename TConfig, typename FUNC>
134
+ void TestGradient(const Layer<T, TConfig>& grad,
135
+ Layer<std::complex<U>, TConfig>& c_weights,
136
+ FUNC func, T max_err) {
137
+ TestGradient(grad.pre_attention_norm_scale,
138
+ c_weights.pre_attention_norm_scale,
139
+ func, max_err, max_err, __LINE__);
140
+ TestGradient(grad.attn_vec_einsum_w, c_weights.attn_vec_einsum_w,
141
+ func, max_err, max_err, __LINE__);
142
+ TestGradient(grad.qkv_einsum_w, c_weights.qkv_einsum_w,
143
+ func, max_err, max_err, __LINE__);
144
+ TestGradient(grad.pre_ffw_norm_scale, c_weights.pre_ffw_norm_scale,
145
+ func, max_err, max_err, __LINE__);
146
+ TestGradient(grad.gating_einsum_w, c_weights.gating_einsum_w,
147
+ func, max_err, max_err, __LINE__);
148
+ TestGradient(grad.linear_w, c_weights.linear_w,
149
+ func, max_err, max_err, __LINE__);
150
+ }
151
+
152
+ template<typename T, typename U, typename TConfig, typename FUNC>
153
+ void TestGradient(const Weights<T, TConfig>& grad,
154
+ Weights<std::complex<U>, TConfig>& c_weights,
155
+ FUNC func, T max_err) {
156
+ TestGradient(grad.embedder_input_embedding,
157
+ c_weights.embedder_input_embedding,
158
+ func, 2 * max_err, max_err, __LINE__);
159
+ TestGradient(grad.final_norm_scale, c_weights.final_norm_scale,
160
+ func, max_err, max_err, __LINE__);
161
+ for (int i = 0; i < TConfig::kLayers; ++i) {
162
+ TestGradient(*grad.GetLayer(i), *c_weights.GetLayer(i), func, max_err);
163
+ }
164
+ }
165
+
166
+ } // namespace gcpp
167
+
168
+ #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_
gemma.cpp/bazel/BUILD ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Required for referencing bazel:sentencepiece.patch
2
+ package(
3
+ default_applicable_licenses = ["//:license"],
4
+ default_visibility = ["//:__subpackages__"],
5
+ )
gemma.cpp/bazel/sentencepiece.bazel ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package(
2
+ default_visibility = ["//visibility:public"],
3
+ features = [
4
+ "layering_check",
5
+ "parse_headers",
6
+ ],
7
+ )
8
+
9
+ licenses(["notice"]) # Apache 2, BSD, MIT
10
+
11
+ proto_library(
12
+ name = "sentencepiece_proto",
13
+ srcs = ["src/sentencepiece.proto"],
14
+ )
15
+
16
+ cc_proto_library(
17
+ name = "sentencepiece_cc_proto",
18
+ deps = [":sentencepiece_proto"],
19
+ )
20
+
21
+ proto_library(
22
+ name = "sentencepiece_model_proto",
23
+ srcs = ["src/sentencepiece_model.proto"],
24
+ )
25
+
26
+ cc_proto_library(
27
+ name = "sentencepiece_model_cc_proto",
28
+ deps = [":sentencepiece_model_proto"],
29
+ )
30
+
31
+ genrule(
32
+ name = "config_h",
33
+ srcs = ["config.h.in"],
34
+ outs = ["config.h"],
35
+ cmd = "cp $< $@",
36
+ )
37
+
38
+ cc_library(
39
+ name = "common",
40
+ hdrs = [
41
+ "config.h",
42
+ "src/common.h",
43
+ ],
44
+ deps = [
45
+ "@com_google_absl//absl/base",
46
+ ],
47
+ )
48
+
49
+ cc_library(
50
+ name = "sentencepiece_processor",
51
+ srcs = [
52
+ "src/bpe_model.cc",
53
+ "src/char_model.cc",
54
+ "src/error.cc",
55
+ "src/filesystem.cc",
56
+ "src/model_factory.cc",
57
+ "src/model_interface.cc",
58
+ "src/normalizer.cc",
59
+ "src/sentencepiece_processor.cc",
60
+ "src/unigram_model.cc",
61
+ "src/util.cc",
62
+ "src/word_model.cc",
63
+ ],
64
+ hdrs = [
65
+ "src/bpe_model.h",
66
+ "src/char_model.h",
67
+ "src/filesystem.h",
68
+ "src/freelist.h",
69
+ "src/model_factory.h",
70
+ "src/model_interface.h",
71
+ "src/normalizer.h",
72
+ "src/sentencepiece_processor.h",
73
+ "src/trainer_interface.h",
74
+ "src/unigram_model.h",
75
+ "src/util.h",
76
+ "src/word_model.h",
77
+ ],
78
+ defines = ["_USE_TF_STRING_VIEW"],
79
+ includes = [
80
+ ".",
81
+ "src",
82
+ ],
83
+ linkstatic = 1,
84
+ deps =
85
+ [
86
+ ":common",
87
+ ":sentencepiece_cc_proto",
88
+ ":sentencepiece_model_cc_proto",
89
+ "@com_google_absl//absl/container:flat_hash_map",
90
+ "@com_google_absl//absl/container:flat_hash_set",
91
+ "@com_google_absl//absl/memory",
92
+ "@com_google_absl//absl/status",
93
+ "@com_google_absl//absl/strings",
94
+ "@com_google_absl//absl/strings:str_format",
95
+ "@darts_clone",
96
+ ],
97
+ )
gemma.cpp/bazel/sentencepiece.patch ADDED
@@ -0,0 +1,2339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/src/bpe_model.cc b/src/bpe_model.cc
2
+ index 22cd115..97e0bda 100644
3
+ --- a/src/bpe_model.cc
4
+ +++ b/src/bpe_model.cc
5
+ @@ -21,7 +21,7 @@
6
+
7
+ #include "bpe_model.h"
8
+ #include "freelist.h"
9
+ -#include "third_party/absl/container/flat_hash_map.h"
10
+ +#include "absl/container/flat_hash_map.h"
11
+ #include "util.h"
12
+
13
+ namespace sentencepiece {
14
+ diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc
15
+ index 964d44e..64878cd 100644
16
+ --- a/src/bpe_model_trainer.cc
17
+ +++ b/src/bpe_model_trainer.cc
18
+ @@ -18,7 +18,8 @@
19
+ #include <vector>
20
+
21
+ #include "bpe_model_trainer.h"
22
+ -#include "third_party/absl/container/flat_hash_set.h"
23
+ +#include "absl/container/flat_hash_set.h"
24
+ +#include "absl/status/status.h"
25
+ #include "util.h"
26
+
27
+ namespace sentencepiece {
28
+ @@ -171,7 +172,7 @@ void Trainer::UpdateActiveSymbols() {
29
+ active_symbols_.insert(symbols.begin(), symbols.begin() + size);
30
+ }
31
+
32
+ -util::Status Trainer::Train() {
33
+ +absl::Status Trainer::Train() {
34
+ RETURN_IF_ERROR(status());
35
+
36
+ CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
37
+ diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h
38
+ index e011a37..a17e580 100644
39
+ --- a/src/bpe_model_trainer.h
40
+ +++ b/src/bpe_model_trainer.h
41
+ @@ -20,7 +20,8 @@
42
+ #include <vector>
43
+
44
+ #include "sentencepiece_model.pb.h"
45
+ -#include "third_party/absl/container/flat_hash_map.h"
46
+ +#include "absl/container/flat_hash_map.h"
47
+ +#include "absl/status/status.h"
48
+ #include "trainer_interface.h"
49
+
50
+ namespace sentencepiece {
51
+ @@ -35,7 +36,7 @@ class Trainer : public TrainerInterface {
52
+ : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
53
+ denormalizer_spec) {}
54
+
55
+ - util::Status Train() override;
56
+ + absl::Status Train() override;
57
+
58
+ private:
59
+ // Symbol represents a character or symbol bigram.
60
+ diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc
61
+ index 173eb9c..2a43c3a 100644
62
+ --- a/src/bpe_model_trainer_test.cc
63
+ +++ b/src/bpe_model_trainer_test.cc
64
+ @@ -20,8 +20,8 @@
65
+ #include "sentencepiece_processor.h"
66
+ #include "sentencepiece_trainer.h"
67
+ #include "testharness.h"
68
+ -#include "third_party/absl/strings/str_cat.h"
69
+ -#include "third_party/absl/strings/str_join.h"
70
+ +#include "absl/strings/str_cat.h"
71
+ +#include "absl/strings/str_join.h"
72
+ #include "util.h"
73
+
74
+ namespace sentencepiece {
75
+ diff --git a/src/builder.cc b/src/builder.cc
76
+ index 378aaa0..fd8edf8 100644
77
+ --- a/src/builder.cc
78
+ +++ b/src/builder.cc
79
+ @@ -18,10 +18,11 @@
80
+
81
+ #include "builder.h"
82
+ #include "filesystem.h"
83
+ -#include "third_party/absl/strings/str_join.h"
84
+ -#include "third_party/absl/strings/str_replace.h"
85
+ -#include "third_party/absl/strings/str_split.h"
86
+ -#include "third_party/absl/strings/strip.h"
87
+ +#include "absl/strings/str_join.h"
88
+ +#include "absl/strings/str_replace.h"
89
+ +#include "absl/strings/str_split.h"
90
+ +#include "absl/strings/strip.h"
91
+ +#include "absl/status/status.h"
92
+
93
+ #ifdef ENABLE_NFKC_COMPILE
94
+ #include <unicode/errorcode.h>
95
+ @@ -36,7 +37,7 @@
96
+
97
+ #include "normalization_rule.h"
98
+ #include "normalizer.h"
99
+ -#include "third_party/darts_clone/darts.h"
100
+ +#include "include/darts.h"
101
+ #include "util.h"
102
+
103
+ namespace sentencepiece {
104
+ @@ -145,7 +146,7 @@ Builder::Chars Normalize(const Builder::CharsMap &chars_map,
105
+ } // namespace
106
+
107
+ // static
108
+ -util::Status Builder::CompileCharsMap(const CharsMap &chars_map,
109
+ +absl::Status Builder::CompileCharsMap(const CharsMap &chars_map,
110
+ std::string *output) {
111
+ CHECK_OR_RETURN(output);
112
+ CHECK_OR_RETURN(!chars_map.empty());
113
+ @@ -212,7 +213,7 @@ util::Status Builder::CompileCharsMap(const CharsMap &chars_map,
114
+ }
115
+
116
+ // static
117
+ -util::Status Builder::DecompileCharsMap(absl::string_view blob,
118
+ +absl::Status Builder::DecompileCharsMap(absl::string_view blob,
119
+ Builder::CharsMap *chars_map) {
120
+ CHECK_OR_RETURN(chars_map);
121
+ chars_map->clear();
122
+ @@ -265,7 +266,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob,
123
+ }
124
+
125
+ // static
126
+ -util::Status Builder::GetPrecompiledCharsMap(const std::string &name,
127
+ +absl::Status Builder::GetPrecompiledCharsMap(const std::string &name,
128
+ std::string *output) {
129
+ CHECK_OR_RETURN(output);
130
+
131
+ @@ -282,12 +283,12 @@ util::Status Builder::GetPrecompiledCharsMap(const std::string &name,
132
+ return util::OkStatus();
133
+ }
134
+ }
135
+ - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
136
+ + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
137
+ << "No precompiled charsmap is found: " << name;
138
+ }
139
+
140
+ // static
141
+ -util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
142
+ +absl::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
143
+ #ifdef ENABLE_NFKC_COMPILE
144
+ LOG(INFO) << "Running BuildNFKCMap";
145
+
146
+ @@ -345,7 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
147
+ return util::OkStatus();
148
+ }
149
+
150
+ -util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
151
+ +absl::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
152
+ #ifdef ENABLE_NFKC_COMPILE
153
+ LOG(INFO) << "Running BuildNmtNFKCMap";
154
+
155
+ @@ -420,7 +421,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
156
+ }
157
+
158
+ // static
159
+ -util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
160
+ +absl::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
161
+ #ifdef ENABLE_NFKC_COMPILE
162
+ for (auto &c : *chars_map) {
163
+ std::vector<char32> trg;
164
+ @@ -445,7 +446,7 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
165
+ }
166
+
167
+ // static
168
+ -util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
169
+ +absl::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
170
+ #ifdef ENABLE_NFKC_COMPILE
171
+ CharsMap nfkc_map;
172
+ RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map));
173
+ @@ -460,7 +461,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
174
+ }
175
+
176
+ // static
177
+ -util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
178
+ +absl::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
179
+ #ifdef ENABLE_NFKC_COMPILE
180
+ CharsMap nfkc_map;
181
+ RETURN_IF_ERROR(Builder::BuildNmtNFKCMap(&nfkc_map));
182
+ @@ -475,7 +476,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
183
+ }
184
+
185
+ // static
186
+ -util::Status Builder::LoadCharsMap(absl::string_view filename,
187
+ +absl::Status Builder::LoadCharsMap(absl::string_view filename,
188
+ CharsMap *chars_map) {
189
+ LOG(INFO) << "Loading mapping file: " << filename.data();
190
+ CHECK_OR_RETURN(chars_map);
191
+ @@ -510,7 +511,7 @@ util::Status Builder::LoadCharsMap(absl::string_view filename,
192
+ }
193
+
194
+ // static
195
+ -util::Status Builder::SaveCharsMap(absl::string_view filename,
196
+ +absl::Status Builder::SaveCharsMap(absl::string_view filename,
197
+ const Builder::CharsMap &chars_map) {
198
+ auto output = filesystem::NewWritableFile(filename);
199
+ RETURN_IF_ERROR(output->status());
200
+ @@ -540,7 +541,7 @@ util::Status Builder::SaveCharsMap(absl::string_view filename,
201
+ }
202
+
203
+ // static
204
+ -util::Status Builder::RemoveRedundantMap(CharsMap *chars_map) {
205
+ +absl::Status Builder::RemoveRedundantMap(CharsMap *chars_map) {
206
+ CHECK_OR_RETURN(chars_map);
207
+
208
+ CharsMap new_chars_map;
209
+ diff --git a/src/builder.h b/src/builder.h
210
+ index 49d2884..8ad872c 100644
211
+ --- a/src/builder.h
212
+ +++ b/src/builder.h
213
+ @@ -22,7 +22,8 @@
214
+ #include "common.h"
215
+ #include "sentencepiece_model.pb.h"
216
+ #include "sentencepiece_processor.h"
217
+ -#include "third_party/absl/strings/string_view.h"
218
+ +#include "absl/strings/string_view.h"
219
+ +#include "absl/status/status.h"
220
+
221
+ namespace sentencepiece {
222
+ namespace normalizer {
223
+ @@ -43,15 +44,15 @@ class Builder {
224
+ // String-to-string mapping.
225
+ using CharsMap = std::map<Chars, Chars>;
226
+
227
+ - static util::Status CompileCharsMap(const CharsMap &chars_map,
228
+ + static absl::Status CompileCharsMap(const CharsMap &chars_map,
229
+ std::string *output);
230
+
231
+ // Decompiles `blob` into `chars_map`.
232
+ - static util::Status DecompileCharsMap(absl::string_view blob,
233
+ + static absl::Status DecompileCharsMap(absl::string_view blob,
234
+ CharsMap *chars_map);
235
+
236
+ // Returns a pre-compiled binary index with `name`.
237
+ - static util::Status GetPrecompiledCharsMap(const std::string &name,
238
+ + static absl::Status GetPrecompiledCharsMap(const std::string &name,
239
+ std::string *output);
240
+
241
+ // Makes a normalization mapping based on NFKC.
242
+ @@ -89,30 +90,30 @@ class Builder {
243
+ // normalizer is the goal of SentencePiece.
244
+ //
245
+ // TODO(taku): Make NFC, NFD, and NFKD mapping if necessary.
246
+ - static util::Status BuildNFKCMap(CharsMap *chars_map);
247
+ + static absl::Status BuildNFKCMap(CharsMap *chars_map);
248
+
249
+ // Makes an NFKC-based mapping with NMT specific modifications around
250
+ // whitespaces.
251
+ - static util::Status BuildNmtNFKCMap(CharsMap *chars_map);
252
+ + static absl::Status BuildNmtNFKCMap(CharsMap *chars_map);
253
+
254
+ // Merge Unicode case folding mapping into `chars_map`.
255
+ - static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
256
+ + static absl::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
257
+
258
+ // Makes NFKC with Unicode case folding.
259
+ - static util::Status BuildNFKC_CFMap(CharsMap *chars_map);
260
+ + static absl::Status BuildNFKC_CFMap(CharsMap *chars_map);
261
+
262
+ // Makes NMT NFKC with Unicode case folding.
263
+ - static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
264
+ + static absl::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
265
+
266
+ // Builds Chars map save in `filename`.
267
+ // Format:
268
+ // src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...
269
+ // (src|trg)_ucharX must be a hex of Unicode code point.
270
+ - static util::Status LoadCharsMap(absl::string_view filename,
271
+ + static absl::Status LoadCharsMap(absl::string_view filename,
272
+ CharsMap *chars_map);
273
+
274
+ // Saves Chars map to `filename` as TSV.
275
+ - static util::Status SaveCharsMap(absl::string_view filename,
276
+ + static absl::Status SaveCharsMap(absl::string_view filename,
277
+ const CharsMap &chars_map);
278
+
279
+ private:
280
+ @@ -121,7 +122,7 @@ class Builder {
281
+ // Removes redundant rules from `chars_map`.
282
+ // When char_maps have "aa" => "bb" and "a" => "b", the first
283
+ // rule is not necessary since the second rule can cover the first rule.
284
+ - static util::Status RemoveRedundantMap(CharsMap *chars_map);
285
+ + static absl::Status RemoveRedundantMap(CharsMap *chars_map);
286
+ };
287
+ } // namespace normalizer
288
+ } // namespace sentencepiece
289
+ diff --git a/src/builder_test.cc b/src/builder_test.cc
290
+ index 4acb7b3..1dee5c7 100644
291
+ --- a/src/builder_test.cc
292
+ +++ b/src/builder_test.cc
293
+ @@ -18,7 +18,7 @@
294
+ #include "normalizer.h"
295
+ #include "sentencepiece_trainer.h"
296
+ #include "testharness.h"
297
+ -#include "third_party/absl/strings/str_cat.h"
298
+ +#include "absl/strings/str_cat.h"
299
+ #include "util.h"
300
+
301
+ namespace sentencepiece {
302
+ diff --git a/src/char_model_trainer.cc b/src/char_model_trainer.cc
303
+ index f438d78..4f4c603 100644
304
+ --- a/src/char_model_trainer.cc
305
+ +++ b/src/char_model_trainer.cc
306
+ @@ -16,12 +16,13 @@
307
+
308
+ #include "char_model.h"
309
+ #include "char_model_trainer.h"
310
+ +#include "absl/status/status.h"
311
+ #include "util.h"
312
+
313
+ namespace sentencepiece {
314
+ namespace character {
315
+
316
+ -util::Status Trainer::Train() {
317
+ +absl::Status Trainer::Train() {
318
+ RETURN_IF_ERROR(status());
319
+
320
+ CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
321
+ diff --git a/src/char_model_trainer.h b/src/char_model_trainer.h
322
+ index e563819..a5d021c 100644
323
+ --- a/src/char_model_trainer.h
324
+ +++ b/src/char_model_trainer.h
325
+ @@ -17,6 +17,7 @@
326
+
327
+ #include "sentencepiece_model.pb.h"
328
+ #include "trainer_interface.h"
329
+ +#include "absl/status/status.h"
330
+
331
+ namespace sentencepiece {
332
+ namespace character {
333
+ @@ -30,7 +31,7 @@ class Trainer : public TrainerInterface {
334
+ : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
335
+ denormalizer_spec) {}
336
+
337
+ - util::Status Train() override;
338
+ + absl::Status Train() override;
339
+ };
340
+ } // namespace character
341
+ } // namespace sentencepiece
342
+ diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc
343
+ index 8c2e4b7..e8b4979 100644
344
+ --- a/src/char_model_trainer_test.cc
345
+ +++ b/src/char_model_trainer_test.cc
346
+ @@ -19,8 +19,8 @@
347
+ #include "filesystem.h"
348
+ #include "sentencepiece_processor.h"
349
+ #include "testharness.h"
350
+ -#include "third_party/absl/strings/str_cat.h"
351
+ -#include "third_party/absl/strings/str_join.h"
352
+ +#include "absl/strings/str_cat.h"
353
+ +#include "absl/strings/str_join.h"
354
+ #include "util.h"
355
+
356
+ namespace sentencepiece {
357
+ diff --git a/src/common.h b/src/common.h
358
+ index 7595634..339f831 100644
359
+ --- a/src/common.h
360
+ +++ b/src/common.h
361
+ @@ -146,6 +146,7 @@ inline const char *BaseName(const char *path) {
362
+ } // namespace logging
363
+ } // namespace sentencepiece
364
+
365
+ +#ifndef LOG
366
+ #define LOG(severity) \
367
+ (::sentencepiece::logging::GetMinLogLevel() > \
368
+ ::sentencepiece::logging::LOG_##severity) \
369
+ @@ -156,6 +157,7 @@ inline const char *BaseName(const char *path) {
370
+ std::cerr << ::sentencepiece::logging::BaseName(__FILE__) << "(" \
371
+ << __LINE__ << ") " \
372
+ << "LOG(" << #severity << ") "
373
+ +#endif // LOG
374
+
375
+ #define CHECK(condition) \
376
+ (condition) ? 0 \
377
+ diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc
378
+ index c5a5188..e5db1d7 100644
379
+ --- a/src/compile_charsmap_main.cc
380
+ +++ b/src/compile_charsmap_main.cc
381
+ @@ -22,8 +22,9 @@
382
+ #include "filesystem.h"
383
+ #include "init.h"
384
+ #include "sentencepiece_processor.h"
385
+ -#include "third_party/absl/flags/flag.h"
386
+ -#include "third_party/absl/strings/string_view.h"
387
+ +#include "absl/flags/flag.h"
388
+ +#include "absl/strings/string_view.h"
389
+ +#include "absl/status/status.h"
390
+
391
+ using sentencepiece::normalizer::Builder;
392
+
393
+ @@ -160,7 +161,7 @@ int main(int argc, char **argv) {
394
+
395
+ const std::vector<std::pair<
396
+ std::string,
397
+ - std::function<sentencepiece::util::Status(Builder::CharsMap *)>>>
398
+ + std::function<sentencepiece::absl::Status(Builder::CharsMap *)>>>
399
+ kRuleList = {{"nfkc", Builder::BuildNFKCMap},
400
+ {"nmt_nfkc", Builder::BuildNmtNFKCMap},
401
+ {"nfkc_cf", Builder::BuildNFKC_CFMap},
402
+ diff --git a/src/error.cc b/src/error.cc
403
+ index a226d98..ab4675d 100644
404
+ --- a/src/error.cc
405
+ +++ b/src/error.cc
406
+ @@ -20,8 +20,8 @@
407
+ #ifdef _USE_EXTERNAL_ABSL
408
+ // Naive workaround to define minloglevel on external absl package.
409
+ // We want to define them in other cc file.
410
+ -#include "third_party/absl/flags/flag.h"
411
+ -#include "third_party/absl/flags/parse.h"
412
+ +#include "absl/flags/flag.h"
413
+ +#include "absl/flags/parse.h"
414
+ ABSL_FLAG(int32, minloglevel, 0,
415
+ "Messages logged at a lower level than this don't actually.");
416
+ #endif
417
+ diff --git a/src/filesystem.cc b/src/filesystem.cc
418
+ index 833c8f7..9a1b6c9 100644
419
+ --- a/src/filesystem.cc
420
+ +++ b/src/filesystem.cc
421
+ @@ -15,7 +15,8 @@
422
+ #include <iostream>
423
+
424
+ #include "filesystem.h"
425
+ -#include "third_party/absl/memory/memory.h"
426
+ +#include "absl/status/status.h"
427
+ +#include "absl/memory/memory.h"
428
+ #include "util.h"
429
+
430
+ #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE)
431
+ @@ -36,7 +37,7 @@ class PosixReadableFile : public ReadableFile {
432
+ is_binary ? std::ios::binary | std::ios::in
433
+ : std::ios::in)) {
434
+ if (!*is_)
435
+ - status_ = util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
436
+ + status_ = util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
437
+ << "\"" << filename.data() << "\": " << util::StrError(errno);
438
+ }
439
+
440
+ @@ -44,7 +45,7 @@ class PosixReadableFile : public ReadableFile {
441
+ if (is_ != &std::cin) delete is_;
442
+ }
443
+
444
+ - util::Status status() const { return status_; }
445
+ + absl::Status status() const { return status_; }
446
+
447
+ bool ReadLine(std::string *line) {
448
+ return static_cast<bool>(std::getline(*is_, *line));
449
+ @@ -61,7 +62,7 @@ class PosixReadableFile : public ReadableFile {
450
+ }
451
+
452
+ private:
453
+ - util::Status status_;
454
+ + absl::Status status_;
455
+ std::istream *is_;
456
+ };
457
+
458
+ @@ -75,7 +76,7 @@ class PosixWritableFile : public WritableFile {
459
+ : std::ios::out)) {
460
+ if (!*os_)
461
+ status_ =
462
+ - util::StatusBuilder(util::StatusCode::kPermissionDenied, GTL_LOC)
463
+ + util::StatusBuilder(absl::StatusCode::kPermissionDenied, GTL_LOC)
464
+ << "\"" << filename.data() << "\": " << util::StrError(errno);
465
+ }
466
+
467
+ @@ -83,7 +84,7 @@ class PosixWritableFile : public WritableFile {
468
+ if (os_ != &std::cout) delete os_;
469
+ }
470
+
471
+ - util::Status status() const { return status_; }
472
+ + absl::Status status() const { return status_; }
473
+
474
+ bool Write(absl::string_view text) {
475
+ os_->write(text.data(), text.size());
476
+ @@ -93,7 +94,7 @@ class PosixWritableFile : public WritableFile {
477
+ bool WriteLine(absl::string_view text) { return Write(text) && Write("\n"); }
478
+
479
+ private:
480
+ - util::Status status_;
481
+ + absl::Status status_;
482
+ std::ostream *os_;
483
+ };
484
+
485
+ diff --git a/src/filesystem.h b/src/filesystem.h
486
+ index e572b4b..6e8e305 100644
487
+ --- a/src/filesystem.h
488
+ +++ b/src/filesystem.h
489
+ @@ -23,7 +23,8 @@
490
+
491
+ #include "common.h"
492
+ #include "sentencepiece_processor.h"
493
+ -#include "third_party/absl/strings/string_view.h"
494
+ +#include "absl/strings/string_view.h"
495
+ +#include "absl/status/status.h"
496
+
497
+ namespace sentencepiece {
498
+ namespace filesystem {
499
+ @@ -33,7 +34,7 @@ class ReadableFile {
500
+ explicit ReadableFile(absl::string_view filename, bool is_binary = false) {}
501
+ virtual ~ReadableFile() {}
502
+
503
+ - virtual util::Status status() const = 0;
504
+ + virtual absl::Status status() const = 0;
505
+ virtual bool ReadLine(std::string *line) = 0;
506
+ virtual bool ReadAll(std::string *line) = 0;
507
+ };
508
+ @@ -44,7 +45,7 @@ class WritableFile {
509
+ explicit WritableFile(absl::string_view filename, bool is_binary = false) {}
510
+ virtual ~WritableFile() {}
511
+
512
+ - virtual util::Status status() const = 0;
513
+ + virtual absl::Status status() const = 0;
514
+ virtual bool Write(absl::string_view text) = 0;
515
+ virtual bool WriteLine(absl::string_view text) = 0;
516
+ };
517
+ diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc
518
+ index 790e756..39ece99 100644
519
+ --- a/src/filesystem_test.cc
520
+ +++ b/src/filesystem_test.cc
521
+ @@ -14,7 +14,7 @@
522
+
523
+ #include "filesystem.h"
524
+ #include "testharness.h"
525
+ -#include "third_party/absl/strings/str_cat.h"
526
+ +#include "absl/strings/str_cat.h"
527
+ #include "util.h"
528
+
529
+ namespace sentencepiece {
530
+ diff --git a/src/init.h b/src/init.h
531
+ index 090a2d9..acfda8a 100644
532
+ --- a/src/init.h
533
+ +++ b/src/init.h
534
+ @@ -16,8 +16,8 @@
535
+ #define INIT_H_
536
+
537
+ #include "common.h"
538
+ -#include "third_party/absl/flags/flag.h"
539
+ -#include "third_party/absl/flags/parse.h"
540
+ +#include "absl/flags/flag.h"
541
+ +#include "absl/flags/parse.h"
542
+
543
+ ABSL_DECLARE_FLAG(int32, minloglevel);
544
+
545
+ diff --git a/src/model_factory.cc b/src/model_factory.cc
546
+ index be99501..040c00c 100644
547
+ --- a/src/model_factory.cc
548
+ +++ b/src/model_factory.cc
549
+ @@ -15,7 +15,7 @@
550
+ #include "bpe_model.h"
551
+ #include "char_model.h"
552
+ #include "model_factory.h"
553
+ -#include "third_party/absl/memory/memory.h"
554
+ +#include "absl/memory/memory.h"
555
+ #include "unigram_model.h"
556
+ #include "word_model.h"
557
+
558
+ diff --git a/src/model_interface.cc b/src/model_interface.cc
559
+ index c49be1e..22c6378 100644
560
+ --- a/src/model_interface.cc
561
+ +++ b/src/model_interface.cc
562
+ @@ -16,8 +16,8 @@
563
+
564
+ #include "model_interface.h"
565
+ #include "sentencepiece_model.pb.h"
566
+ -#include "third_party/absl/memory/memory.h"
567
+ -#include "third_party/absl/strings/str_format.h"
568
+ +#include "absl/memory/memory.h"
569
+ +#include "absl/strings/str_format.h"
570
+ #include "util.h"
571
+
572
+ namespace sentencepiece {
573
+ diff --git a/src/model_interface.h b/src/model_interface.h
574
+ index aef5b53..c7858fb 100644
575
+ --- a/src/model_interface.h
576
+ +++ b/src/model_interface.h
577
+ @@ -25,9 +25,10 @@
578
+ #include "normalizer.h"
579
+ #include "sentencepiece_model.pb.h"
580
+ #include "sentencepiece_processor.h"
581
+ -#include "third_party/absl/container/flat_hash_map.h"
582
+ -#include "third_party/absl/strings/string_view.h"
583
+ -#include "third_party/darts_clone/darts.h"
584
+ +#include "absl/container/flat_hash_map.h"
585
+ +#include "absl/strings/string_view.h"
586
+ +#include "absl/status/status.h"
587
+ +#include "include/darts.h"
588
+ #include "util.h"
589
+
590
+ namespace sentencepiece {
591
+ @@ -69,7 +70,7 @@ class ModelInterface {
592
+
593
+ // Returns Status.
594
+ // Encode/Decode functions are valid only when status is OK.
595
+ - virtual util::Status status() const { return status_; }
596
+ + virtual absl::Status status() const { return status_; }
597
+
598
+ virtual const ModelProto &model_proto() const { return *model_proto_; }
599
+
600
+ @@ -82,7 +83,7 @@ class ModelInterface {
601
+ // normally users do not need to call this function. This function is provided
602
+ // just in case that a user want to manually choose which encoder version to
603
+ // use.
604
+ - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) {
605
+ + virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version) {
606
+ encoder_version_ = encoder_version;
607
+ return util::OkStatus();
608
+ }
609
+ @@ -261,7 +262,7 @@ class ModelInterface {
610
+ EncoderVersion encoder_version_ = EncoderVersion::kOptimized;
611
+
612
+ // status.
613
+ - util::Status status_;
614
+ + absl::Status status_;
615
+ };
616
+ } // namespace sentencepiece
617
+ #endif // MODEL_INTERFACE_H_
618
+ diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc
619
+ index 69ee4e6..26a1e05 100644
620
+ --- a/src/model_interface_test.cc
621
+ +++ b/src/model_interface_test.cc
622
+ @@ -15,7 +15,7 @@
623
+ #include "model_factory.h"
624
+ #include "model_interface.h"
625
+ #include "testharness.h"
626
+ -#include "third_party/absl/container/flat_hash_map.h"
627
+ +#include "absl/container/flat_hash_map.h"
628
+ #include "util.h"
629
+
630
+ namespace sentencepiece {
631
+ diff --git a/src/normalizer.cc b/src/normalizer.cc
632
+ index 100b875..c553906 100644
633
+ --- a/src/normalizer.cc
634
+ +++ b/src/normalizer.cc
635
+ @@ -18,11 +18,12 @@
636
+ #include <vector>
637
+
638
+ #include "common.h"
639
+ -#include "third_party/absl/memory/memory.h"
640
+ -#include "third_party/absl/strings/match.h"
641
+ -#include "third_party/absl/strings/string_view.h"
642
+ -#include "third_party/absl/strings/strip.h"
643
+ -#include "third_party/darts_clone/darts.h"
644
+ +#include "absl/memory/memory.h"
645
+ +#include "absl/strings/match.h"
646
+ +#include "absl/strings/string_view.h"
647
+ +#include "absl/strings/strip.h"
648
+ +#include "absl/status/status.h"
649
+ +#include "include/darts.h"
650
+ #include "util.h"
651
+
652
+ namespace sentencepiece {
653
+ @@ -71,7 +72,7 @@ void Normalizer::Init() {
654
+ }
655
+ }
656
+
657
+ -util::Status Normalizer::Normalize(absl::string_view input,
658
+ +absl::Status Normalizer::Normalize(absl::string_view input,
659
+ std::string *normalized,
660
+ std::vector<size_t> *norm_to_orig) const {
661
+ norm_to_orig->clear();
662
+ @@ -274,7 +275,7 @@ std::string Normalizer::EncodePrecompiledCharsMap(
663
+ }
664
+
665
+ // static
666
+ -util::Status Normalizer::DecodePrecompiledCharsMap(
667
+ +absl::Status Normalizer::DecodePrecompiledCharsMap(
668
+ absl::string_view blob, absl::string_view *trie_blob,
669
+ absl::string_view *normalized, std::string *buffer) {
670
+ uint32 trie_blob_size = 0;
671
+ diff --git a/src/normalizer.h b/src/normalizer.h
672
+ index 622bbd2..21d1385 100644
673
+ --- a/src/normalizer.h
674
+ +++ b/src/normalizer.h
675
+ @@ -24,8 +24,9 @@
676
+ #include "common.h"
677
+ #include "sentencepiece_model.pb.h"
678
+ #include "sentencepiece_processor.h"
679
+ -#include "third_party/absl/strings/string_view.h"
680
+ -#include "third_party/darts_clone/darts.h"
681
+ +#include "absl/strings/string_view.h"
682
+ +#include "absl/status/status.h"
683
+ +#include "include/darts.h"
684
+ #include "util.h"
685
+
686
+ namespace sentencepiece {
687
+ @@ -75,7 +76,7 @@ class Normalizer {
688
+
689
+ // Returns Status.
690
+ // Normalizes function is valid only when status is OK.
691
+ - virtual util::Status status() const { return status_; }
692
+ + virtual absl::Status status() const { return status_; }
693
+
694
+ // Normalizes a plain utf8 string into an internal representation for
695
+ // Sentencepiece model. |norm_to_orig| stores the byte-alignment from
696
+ @@ -86,7 +87,7 @@ class Normalizer {
697
+ // - Adds a prefix space.
698
+ // - Replaces a space with a meta symbol.
699
+ // - Removing heading, tailing and other redundant spaces.
700
+ - virtual util::Status Normalize(absl::string_view input,
701
+ + virtual absl::Status Normalize(absl::string_view input,
702
+ std::string *normalized,
703
+ std::vector<size_t> *norm_to_orig) const;
704
+
705
+ @@ -121,7 +122,7 @@ class Normalizer {
706
+ absl::string_view normalized);
707
+
708
+ // Decodes blob into trie_blob and normalized string.
709
+ - static util::Status DecodePrecompiledCharsMap(absl::string_view blob,
710
+ + static absl::Status DecodePrecompiledCharsMap(absl::string_view blob,
711
+ absl::string_view *trie_blob,
712
+ absl::string_view *normalized,
713
+ std::string *buffer = nullptr);
714
+ @@ -153,7 +154,7 @@ class Normalizer {
715
+ #endif
716
+
717
+ // Normalizer's status.
718
+ - util::Status status_;
719
+ + absl::Status status_;
720
+ };
721
+ } // namespace normalizer
722
+ } // namespace sentencepiece
723
+ diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc
724
+ index 049658e..8021511 100644
725
+ --- a/src/pretokenizer_for_training.cc
726
+ +++ b/src/pretokenizer_for_training.cc
727
+ @@ -14,7 +14,7 @@
728
+ #include <string>
729
+
730
+ #include "pretokenizer_for_training.h"
731
+ -#include "third_party/absl/strings/str_replace.h"
732
+ +#include "absl/strings/str_replace.h"
733
+
734
+ namespace sentencepiece {
735
+ namespace pretokenizer {
736
+ diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h
737
+ index 2d3bc82..b4a6de3 100644
738
+ --- a/src/pretokenizer_for_training.h
739
+ +++ b/src/pretokenizer_for_training.h
740
+ @@ -21,7 +21,8 @@
741
+ #include "common.h"
742
+ #include "sentencepiece.pb.h"
743
+ #include "sentencepiece_processor.h"
744
+ -#include "third_party/absl/strings/string_view.h"
745
+ +#include "absl/strings/string_view.h"
746
+ +#include "absl/status/status.h"
747
+
748
+ namespace sentencepiece {
749
+ namespace pretokenizer {
750
+ @@ -30,7 +31,7 @@ class PretokenizerForTrainingInterface {
751
+ public:
752
+ PretokenizerForTrainingInterface() {}
753
+ virtual ~PretokenizerForTrainingInterface() {}
754
+ - virtual util::Status status() const = 0;
755
+ + virtual absl::Status status() const = 0;
756
+
757
+ // Puts kUPPBoundaryStr before and after the pre-tokenizer's segmentation
758
+ // when there are no spaces between these tokens.
759
+ diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc
760
+ index 80f4787..de89fe3 100644
761
+ --- a/src/pretokenizer_for_training_test.cc
762
+ +++ b/src/pretokenizer_for_training_test.cc
763
+ @@ -13,8 +13,9 @@
764
+ // limitations under the License.!
765
+ #include "pretokenizer_for_training.h"
766
+ #include "testharness.h"
767
+ -#include "third_party/absl/strings/str_cat.h"
768
+ +#include "absl/strings/str_cat.h"
769
+ #include "trainer_interface.h"
770
+ +#include "absl/status/status.h"
771
+
772
+ namespace sentencepiece {
773
+ namespace pretokenizer {
774
+ @@ -28,7 +29,7 @@ class MockPretokenizer : public PretokenizerForTrainingInterface {
775
+ return spt_;
776
+ }
777
+
778
+ - util::Status status() const override { return util::OkStatus(); }
779
+ + absl::Status status() const override { return util::OkStatus(); }
780
+
781
+ void SetOutput(const SentencePieceText &spt) { spt_ = spt; }
782
+
783
+ diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc
784
+ index 1e4e7a0..78ae527 100644
785
+ --- a/src/sentencepiece_processor.cc
786
+ +++ b/src/sentencepiece_processor.cc
787
+ @@ -23,14 +23,15 @@
788
+ #include "normalizer.h"
789
+ #include "sentencepiece.pb.h"
790
+ #include "sentencepiece_processor.h"
791
+ -#include "third_party/absl/memory/memory.h"
792
+ -#include "third_party/absl/strings/numbers.h"
793
+ -#include "third_party/absl/strings/str_cat.h"
794
+ -#include "third_party/absl/strings/str_join.h"
795
+ -#include "third_party/absl/strings/str_replace.h"
796
+ -#include "third_party/absl/strings/str_split.h"
797
+ -#include "third_party/absl/strings/string_view.h"
798
+ -#include "third_party/absl/strings/strip.h"
799
+ +#include "absl/memory/memory.h"
800
+ +#include "absl/strings/numbers.h"
801
+ +#include "absl/strings/str_cat.h"
802
+ +#include "absl/strings/str_join.h"
803
+ +#include "absl/strings/str_replace.h"
804
+ +#include "absl/strings/str_split.h"
805
+ +#include "absl/strings/string_view.h"
806
+ +#include "absl/strings/strip.h"
807
+ +#include "absl/status/status.h"
808
+ #include "unigram_model.h"
809
+ #include "util.h"
810
+
811
+ @@ -52,7 +53,7 @@ const char kReplacementCharacter[] = "\xef\xbf\xbd";
812
+ SentencePieceProcessor::SentencePieceProcessor() {}
813
+ SentencePieceProcessor::~SentencePieceProcessor() {}
814
+
815
+ -util::Status SentencePieceProcessor::Load(absl::string_view filename) {
816
+ +absl::Status SentencePieceProcessor::Load(absl::string_view filename) {
817
+ auto model_proto = absl::make_unique<ModelProto>();
818
+ RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get()));
819
+ return Load(std::move(model_proto));
820
+ @@ -62,13 +63,13 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) {
821
+ CHECK_OK(Load(filename));
822
+ }
823
+
824
+ -util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
825
+ +absl::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
826
+ auto model_proto_copy = absl::make_unique<ModelProto>();
827
+ *model_proto_copy = model_proto;
828
+ return Load(std::move(model_proto_copy));
829
+ }
830
+
831
+ -util::Status SentencePieceProcessor::LoadFromSerializedProto(
832
+ +absl::Status SentencePieceProcessor::LoadFromSerializedProto(
833
+ absl::string_view serialized) {
834
+ auto model_proto = absl::make_unique<ModelProto>();
835
+ CHECK_OR_RETURN(
836
+ @@ -76,7 +77,7 @@ util::Status SentencePieceProcessor::LoadFromSerializedProto(
837
+ return Load(std::move(model_proto));
838
+ }
839
+
840
+ -util::Status SentencePieceProcessor::Load(
841
+ +absl::Status SentencePieceProcessor::Load(
842
+ std::unique_ptr<ModelProto> model_proto) {
843
+ model_proto_ = std::move(model_proto);
844
+ model_ = ModelFactory::Create(*model_proto_);
845
+ @@ -117,7 +118,7 @@ util::Status SentencePieceProcessor::Load(
846
+ return util::OkStatus();
847
+ }
848
+
849
+ -util::Status SentencePieceProcessor::SetEncoderVersion(
850
+ +absl::Status SentencePieceProcessor::SetEncoderVersion(
851
+ EncoderVersion encoder_version) {
852
+ return model_->SetEncoderVersion(encoder_version);
853
+ }
854
+ @@ -126,17 +127,17 @@ EncoderVersion SentencePieceProcessor::GetEncoderVersion() const {
855
+ return model_->GetEncoderVersion();
856
+ }
857
+
858
+ -util::Status SentencePieceProcessor::SetEncodeExtraOptions(
859
+ +absl::Status SentencePieceProcessor::SetEncodeExtraOptions(
860
+ absl::string_view extra_options) {
861
+ return ParseExtraOptions(extra_options, &encode_extra_options_);
862
+ }
863
+
864
+ -util::Status SentencePieceProcessor::SetDecodeExtraOptions(
865
+ +absl::Status SentencePieceProcessor::SetDecodeExtraOptions(
866
+ absl::string_view extra_options) {
867
+ return ParseExtraOptions(extra_options, &decode_extra_options_);
868
+ }
869
+
870
+ -util::Status SentencePieceProcessor::status() const {
871
+ +absl::Status SentencePieceProcessor::status() const {
872
+ CHECK_OR_RETURN(model_) << "Model is not initialized.";
873
+ CHECK_OR_RETURN(normalizer_) << "Normalizer is not initialized.";
874
+ RETURN_IF_ERROR(model_->status());
875
+ @@ -144,7 +145,7 @@ util::Status SentencePieceProcessor::status() const {
876
+ return util::OkStatus();
877
+ }
878
+
879
+ -util::Status SentencePieceProcessor::SetVocabulary(
880
+ +absl::Status SentencePieceProcessor::SetVocabulary(
881
+ const std::vector<std::string> &valid_vocab) {
882
+ RETURN_IF_ERROR(status());
883
+
884
+ @@ -174,7 +175,7 @@ util::Status SentencePieceProcessor::SetVocabulary(
885
+ return util::OkStatus();
886
+ }
887
+
888
+ -util::Status SentencePieceProcessor::ResetVocabulary() {
889
+ +absl::Status SentencePieceProcessor::ResetVocabulary() {
890
+ RETURN_IF_ERROR(status());
891
+ for (auto &piece : *(model_proto_->mutable_pieces())) {
892
+ if (piece.type() == ModelProto::SentencePiece::UNUSED)
893
+ @@ -184,7 +185,7 @@ util::Status SentencePieceProcessor::ResetVocabulary() {
894
+ return util::OkStatus();
895
+ }
896
+
897
+ -util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
898
+ +absl::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
899
+ int threshold) {
900
+ auto input = filesystem::NewReadableFile(filename);
901
+ RETURN_IF_ERROR(input->status());
902
+ @@ -221,7 +222,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
903
+
904
+ //////////////////////////////////////////////////////////////
905
+ // Simple API.
906
+ -util::Status SentencePieceProcessor::Encode(
907
+ +absl::Status SentencePieceProcessor::Encode(
908
+ absl::string_view input, std::vector<std::string> *pieces) const {
909
+ CHECK_OR_RETURN_STATUS_STL(pieces);
910
+
911
+ @@ -234,7 +235,7 @@ util::Status SentencePieceProcessor::Encode(
912
+ return util::OkStatus();
913
+ }
914
+
915
+ -util::Status SentencePieceProcessor::Encode(absl::string_view input,
916
+ +absl::Status SentencePieceProcessor::Encode(absl::string_view input,
917
+ std::vector<int> *ids) const {
918
+ CHECK_OR_RETURN_STATUS_STL(ids);
919
+
920
+ @@ -247,7 +248,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input,
921
+ return util::OkStatus();
922
+ }
923
+
924
+ -util::Status SentencePieceProcessor::Decode(
925
+ +absl::Status SentencePieceProcessor::Decode(
926
+ const std::vector<std::string> &pieces, std::string *detokenized) const {
927
+ CHECK_OR_RETURN_STATUS_STL(detokenized);
928
+
929
+ @@ -258,7 +259,7 @@ util::Status SentencePieceProcessor::Decode(
930
+ return util::OkStatus();
931
+ }
932
+
933
+ -util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
934
+ +absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
935
+ std::string *detokenized) const {
936
+ CHECK_OR_RETURN_STATUS_STL(detokenized);
937
+
938
+ @@ -269,7 +270,7 @@ util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
939
+ return util::OkStatus();
940
+ }
941
+
942
+ -util::Status SentencePieceProcessor::NBestEncode(
943
+ +absl::Status SentencePieceProcessor::NBestEncode(
944
+ absl::string_view input, int nbest_size,
945
+ std::vector<std::vector<std::string>> *pieces) const {
946
+ CHECK_OR_RETURN_STATUS_STL(pieces);
947
+ @@ -287,7 +288,7 @@ util::Status SentencePieceProcessor::NBestEncode(
948
+ return util::OkStatus();
949
+ }
950
+
951
+ -util::Status SentencePieceProcessor::NBestEncode(
952
+ +absl::Status SentencePieceProcessor::NBestEncode(
953
+ absl::string_view input, int nbest_size,
954
+ std::vector<std::vector<int>> *ids) const {
955
+ CHECK_OR_RETURN_STATUS_STL(ids);
956
+ @@ -305,7 +306,7 @@ util::Status SentencePieceProcessor::NBestEncode(
957
+ return util::OkStatus();
958
+ }
959
+
960
+ -util::Status SentencePieceProcessor::SampleEncode(
961
+ +absl::Status SentencePieceProcessor::SampleEncode(
962
+ absl::string_view input, int nbest_size, float alpha,
963
+ std::vector<std::string> *pieces) const {
964
+ CHECK_OR_RETURN_STATUS_STL(pieces);
965
+ @@ -319,7 +320,7 @@ util::Status SentencePieceProcessor::SampleEncode(
966
+ return util::OkStatus();
967
+ }
968
+
969
+ -util::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
970
+ +absl::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
971
+ int nbest_size, float alpha,
972
+ std::vector<int> *ids) const {
973
+ CHECK_OR_RETURN_STATUS_STL(ids);
974
+ @@ -333,7 +334,7 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
975
+ return util::OkStatus();
976
+ }
977
+
978
+ -util::Status SentencePieceProcessor::PopulateSentencePieceText(
979
+ +absl::Status SentencePieceProcessor::PopulateSentencePieceText(
980
+ absl::string_view input, absl::string_view normalized,
981
+ const std::vector<size_t> &norm_to_orig, const EncodeResult &result,
982
+ SentencePieceText *spt) const {
983
+ @@ -424,7 +425,7 @@ util::Status SentencePieceProcessor::PopulateSentencePieceText(
984
+ return util::OkStatus();
985
+ } // namespace sentencepiece
986
+
987
+ -util::Status SentencePieceProcessor::Encode(absl::string_view input,
988
+ +absl::Status SentencePieceProcessor::Encode(absl::string_view input,
989
+ SentencePieceText *spt) const {
990
+ CHECK_OR_RETURN_STATUS_PROTO(spt);
991
+
992
+ @@ -439,7 +440,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input,
993
+ return util::OkStatus();
994
+ }
995
+
996
+ -util::Status SentencePieceProcessor::NBestEncode(
997
+ +absl::Status SentencePieceProcessor::NBestEncode(
998
+ absl::string_view input, int nbest_size,
999
+ NBestSentencePieceText *nbest_spt) const {
1000
+ CHECK_OR_RETURN_STATUS_PROTO(nbest_spt);
1001
+ @@ -464,7 +465,7 @@ util::Status SentencePieceProcessor::NBestEncode(
1002
+ return util::OkStatus();
1003
+ }
1004
+
1005
+ -util::Status SentencePieceProcessor::SampleEncode(
1006
+ +absl::Status SentencePieceProcessor::SampleEncode(
1007
+ absl::string_view input, int nbest_size, float alpha,
1008
+ SentencePieceText *spt) const {
1009
+ CHECK_OR_RETURN_STATUS_PROTO(spt);
1010
+ @@ -503,7 +504,7 @@ util::Status SentencePieceProcessor::SampleEncode(
1011
+ return util::OkStatus();
1012
+ }
1013
+
1014
+ -util::Status SentencePieceProcessor::SampleEncodeAndScore(
1015
+ +absl::Status SentencePieceProcessor::SampleEncodeAndScore(
1016
+ absl::string_view input, int samples, float theta, bool wor,
1017
+ bool include_best, NBestSentencePieceText *samples_spt) const {
1018
+ CHECK_OR_RETURN(model_->IsSampleEncodeAndScoreAvailable())
1019
+ @@ -527,7 +528,7 @@ util::Status SentencePieceProcessor::SampleEncodeAndScore(
1020
+ return util::OkStatus();
1021
+ }
1022
+
1023
+ -util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input,
1024
+ +absl::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input,
1025
+ float theta,
1026
+ float *entropy) const {
1027
+ CHECK_OR_RETURN(model_->IsCalculateEntropyAvailable())
1028
+ @@ -540,7 +541,7 @@ util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input,
1029
+ return util::OkStatus();
1030
+ }
1031
+
1032
+ -util::Status SentencePieceProcessor::Decode(
1033
+ +absl::Status SentencePieceProcessor::Decode(
1034
+ const std::vector<std::string> &pieces, SentencePieceText *spt) const {
1035
+ CHECK_OR_RETURN_STATUS_PROTO(spt);
1036
+
1037
+ @@ -591,7 +592,7 @@ util::Status SentencePieceProcessor::Decode(
1038
+ };
1039
+
1040
+ auto ProcessBytePieces = [&](int token_index_begin,
1041
+ - int token_index_end) -> util::Status {
1042
+ + int token_index_end) -> absl::Status {
1043
+ if (token_index_begin >= token_index_end) {
1044
+ return util::OkStatus();
1045
+ }
1046
+ @@ -661,14 +662,14 @@ util::Status SentencePieceProcessor::Decode(
1047
+ return util::OkStatus();
1048
+ }
1049
+
1050
+ -util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
1051
+ +absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
1052
+ SentencePieceText *spt) const {
1053
+ std::vector<std::string> pieces;
1054
+ const int num_pieces = GetPieceSize();
1055
+ pieces.reserve(ids.size());
1056
+ for (const int id : ids) {
1057
+ if (id < 0 || id >= num_pieces) {
1058
+ - return util::Status(util::StatusCode::kOutOfRange,
1059
+ + return absl::Status(absl::StatusCode::kOutOfRange,
1060
+ absl::StrCat("Invalid id: ", id));
1061
+ }
1062
+ pieces.emplace_back(IdToPiece(id));
1063
+ @@ -783,7 +784,7 @@ int SentencePieceProcessor::pad_id() const {
1064
+ }
1065
+
1066
+ // static
1067
+ -util::Status SentencePieceProcessor::ApplyExtraOptions(
1068
+ +absl::Status SentencePieceProcessor::ApplyExtraOptions(
1069
+ const std::vector<ExtraOption> &extra_options,
1070
+ SentencePieceText *spt) const {
1071
+ for (const auto &extra_option : extra_options) {
1072
+ @@ -818,7 +819,7 @@ util::Status SentencePieceProcessor::ApplyExtraOptions(
1073
+ }
1074
+
1075
+ // static
1076
+ -util::Status SentencePieceProcessor::ParseExtraOptions(
1077
+ +absl::Status SentencePieceProcessor::ParseExtraOptions(
1078
+ absl::string_view _extra_option,
1079
+ std::vector<SentencePieceProcessor::ExtraOption> *extra_options) const {
1080
+ absl::string_view extra_option(_extra_option.data(), _extra_option.size());
1081
+ @@ -877,7 +878,7 @@ void SetRandomGeneratorSeed(unsigned int seed);
1082
+
1083
+ namespace io {
1084
+
1085
+ -util::Status LoadModelProto(absl::string_view filename,
1086
+ +absl::Status LoadModelProto(absl::string_view filename,
1087
+ ModelProto *model_proto) {
1088
+ if (filename.empty()) {
1089
+ return util::NotFoundError("model file path should not be empty.");
1090
+ @@ -893,7 +894,7 @@ util::Status LoadModelProto(absl::string_view filename,
1091
+ return util::OkStatus();
1092
+ }
1093
+
1094
+ -util::Status SaveModelProto(absl::string_view filename,
1095
+ +absl::Status SaveModelProto(absl::string_view filename,
1096
+ const ModelProto &model_proto) {
1097
+ if (filename.empty()) {
1098
+ return util::NotFoundError("model file path should not be empty.");
1099
+ diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h
1100
+ index e8bd5f5..346fb0e 100644
1101
+ --- a/src/sentencepiece_processor.h
1102
+ +++ b/src/sentencepiece_processor.h
1103
+ @@ -20,9 +20,10 @@
1104
+ #include <string>
1105
+ #include <utility>
1106
+ #include <vector>
1107
+ +#include "absl/status/status.h"
1108
+
1109
+ #if defined(_USE_INTERNAL_STRING_VIEW)
1110
+ -#include "third_party/absl/strings/string_view.h"
1111
+ +#include "absl/strings/string_view.h"
1112
+ #elif defined(_USE_TF_STRING_VIEW)
1113
+ #include "absl/strings/string_view.h"
1114
+ #else
1115
+ @@ -185,7 +186,7 @@ class SentencePieceProcessor {
1116
+
1117
+ // Loads model from `filename`.
1118
+ // Returns false if `filename` cannot be loaded.
1119
+ - virtual util::Status Load(absl::string_view filename);
1120
+ + virtual absl::Status Load(absl::string_view filename);
1121
+
1122
+ // Loads model from `filename`.
1123
+ // Crash if `filename` cannot be loaded.
1124
+ @@ -193,24 +194,24 @@ class SentencePieceProcessor {
1125
+
1126
+ // Loads model from `model_proto`.
1127
+ // `model_proto` is copied.
1128
+ - virtual util::Status Load(const ModelProto &model_proto);
1129
+ + virtual absl::Status Load(const ModelProto &model_proto);
1130
+
1131
+ // Loads model from `model_proto`.
1132
+ // `model_proto` is moved.
1133
+ - virtual util::Status Load(std::unique_ptr<ModelProto> model_proto);
1134
+ + virtual absl::Status Load(std::unique_ptr<ModelProto> model_proto);
1135
+
1136
+ // Loads model from `serialized`, which is a string-serialized model proto.
1137
+ // Useful to load the model from a platform independent blob object.
1138
+ - virtual util::Status LoadFromSerializedProto(absl::string_view serialized);
1139
+ + virtual absl::Status LoadFromSerializedProto(absl::string_view serialized);
1140
+
1141
+ // Returns the status. Encode/Decode methods are valid when status is OK.
1142
+ - virtual util::Status status() const;
1143
+ + virtual absl::Status status() const;
1144
+
1145
+ // Sets encode extra_option sequence.
1146
+ - virtual util::Status SetEncodeExtraOptions(absl::string_view extra_option);
1147
+ + virtual absl::Status SetEncodeExtraOptions(absl::string_view extra_option);
1148
+
1149
+ // Sets decode extra_option sequence.
1150
+ - virtual util::Status SetDecodeExtraOptions(absl::string_view extra_option);
1151
+ + virtual absl::Status SetDecodeExtraOptions(absl::string_view extra_option);
1152
+
1153
+ //////////////////////////////////////////////////////////////
1154
+ // Vocabulary restriction.
1155
+ @@ -219,41 +220,41 @@ class SentencePieceProcessor {
1156
+
1157
+ // Restricts the vocabulary set.
1158
+ // The input sentences are encoded into the tokens in `valid_vocab`.
1159
+ - virtual util::Status SetVocabulary(
1160
+ + virtual absl::Status SetVocabulary(
1161
+ const std::vector<std::string> &valid_vocab);
1162
+
1163
+ // Reverts the vocabulary restriction.
1164
+ - virtual util::Status ResetVocabulary();
1165
+ + virtual absl::Status ResetVocabulary();
1166
+
1167
+ // Loads the valid vocabulary set from `filename` in TSV format.
1168
+ // Format: <token> <tab> <freq>.
1169
+ // Any token with frequency < threshold will be treated as OOV.
1170
+ - virtual util::Status LoadVocabulary(absl::string_view filename,
1171
+ + virtual absl::Status LoadVocabulary(absl::string_view filename,
1172
+ int threshold);
1173
+
1174
+ //////////////////////////////////////////////////////////////
1175
+ // Simple API.
1176
+ //
1177
+ // Given a UTF8 input, encodes it into a sequence of sentence pieces.
1178
+ - virtual util::Status Encode(absl::string_view input,
1179
+ + virtual absl::Status Encode(absl::string_view input,
1180
+ std::vector<std::string> *pieces) const;
1181
+
1182
+ // Given a UTF8 input, encodes it into a sequence of ids.
1183
+ - virtual util::Status Encode(absl::string_view input,
1184
+ + virtual absl::Status Encode(absl::string_view input,
1185
+ std::vector<int> *ids) const;
1186
+
1187
+ // Given a sequence of pieces, decodes it into a detokenized output.
1188
+ - virtual util::Status Decode(const std::vector<std::string> &pieces,
1189
+ + virtual absl::Status Decode(const std::vector<std::string> &pieces,
1190
+ std::string *detokenized) const;
1191
+
1192
+ // Given a sequence of ids, decodes it into a detokenized output.
1193
+ - virtual util::Status Decode(const std::vector<int> &ids,
1194
+ + virtual absl::Status Decode(const std::vector<int> &ids,
1195
+ std::string *detokenized) const;
1196
+
1197
+ // Sets the encoder version. Normally users do not need to call this function.
1198
+ // But they can call this fucntion just in case if they want to fall back to
1199
+ // the original encoder.
1200
+ - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version);
1201
+ + virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version);
1202
+
1203
+ // Returns the current encoder version in use.
1204
+ virtual EncoderVersion GetEncoderVersion() const;
1205
+ @@ -261,12 +262,12 @@ class SentencePieceProcessor {
1206
+ //////////////////////////////////////////////////////////////
1207
+ // NBest API.
1208
+ // Same as Encode, but returns nbest results.
1209
+ - virtual util::Status NBestEncode(
1210
+ + virtual absl::Status NBestEncode(
1211
+ absl::string_view input, int nbest_size,
1212
+ std::vector<std::vector<std::string>> *pieces) const;
1213
+
1214
+ // Same as Encode, but returns nbest results.
1215
+ - virtual util::Status NBestEncode(absl::string_view input, int nbest_size,
1216
+ + virtual absl::Status NBestEncode(absl::string_view input, int nbest_size,
1217
+ std::vector<std::vector<int>> *ids) const;
1218
+
1219
+ //////////////////////////////////////////////////////////////
1220
+ @@ -289,12 +290,12 @@ class SentencePieceProcessor {
1221
+ // in https://arxiv.org/abs/1910.13267
1222
+ // Nbest-based sampling is not supported so nbest_size parameter is ignored in
1223
+ // BPE.
1224
+ - virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
1225
+ + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size,
1226
+ float alpha,
1227
+ std::vector<std::string> *pieces) const;
1228
+
1229
+ // Same as above, but returns a sequence of ids.
1230
+ - virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
1231
+ + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size,
1232
+ float alpha, std::vector<int> *ids) const;
1233
+
1234
+ //////////////////////////////////////////////////////////////
1235
+ @@ -303,16 +304,16 @@ class SentencePieceProcessor {
1236
+ // and internal sentencepiece sequence.
1237
+ //
1238
+ // Given a UTF8 input, encodes it into SentencePieceText.
1239
+ - virtual util::Status Encode(absl::string_view input,
1240
+ + virtual absl::Status Encode(absl::string_view input,
1241
+ SentencePieceText *spt) const;
1242
+
1243
+ // Same as above, but returns NBestSentencePieceText.
1244
+ - virtual util::Status NBestEncode(absl::string_view input, int nbest_size,
1245
+ + virtual absl::Status NBestEncode(absl::string_view input, int nbest_size,
1246
+ NBestSentencePieceText *nbest_spt) const;
1247
+
1248
+ // Same as above, but samples one segmentation from the hypotheses
1249
+ // (Lattice).
1250
+ - virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
1251
+ + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size,
1252
+ float alpha, SentencePieceText *spt) const;
1253
+
1254
+ // Sample `samples` segmentations from the segmentation lattice.
1255
+ @@ -323,21 +324,21 @@ class SentencePieceProcessor {
1256
+ // If `include_best` is true, the best tokenization is always included in the
1257
+ // sample, and the remaining elements are sampled excluding the best.
1258
+ // This method is only available in Unigram mode.
1259
+ - virtual util::Status SampleEncodeAndScore(
1260
+ + virtual absl::Status SampleEncodeAndScore(
1261
+ absl::string_view input, int samples, float theta, bool wor,
1262
+ bool include_best, NBestSentencePieceText *samples_spt) const;
1263
+
1264
+ // Calculate entropy of possible tokenization.
1265
+ // Only available in unigram mode.
1266
+ - virtual util::Status CalculateEntropy(absl::string_view input, float theta,
1267
+ + virtual absl::Status CalculateEntropy(absl::string_view input, float theta,
1268
+ float *entropy) const;
1269
+
1270
+ // Given a sequence of pieces, decodes it into SentencePieceText.
1271
+ - virtual util::Status Decode(const std::vector<std::string> &pieces,
1272
+ + virtual absl::Status Decode(const std::vector<std::string> &pieces,
1273
+ SentencePieceText *spt) const;
1274
+
1275
+ // Given a sequence of ids, decodes it into SentencePieceText.
1276
+ - virtual util::Status Decode(const std::vector<int> &ids,
1277
+ + virtual absl::Status Decode(const std::vector<int> &ids,
1278
+ SentencePieceText *spt) const;
1279
+
1280
+ //////////////////////////////////////////////////////////////
1281
+ @@ -487,13 +488,13 @@ class SentencePieceProcessor {
1282
+ private:
1283
+ enum ExtraOption { REVERSE, BOS, EOS };
1284
+
1285
+ - util::Status ParseExtraOptions(absl::string_view extra_option,
1286
+ + absl::Status ParseExtraOptions(absl::string_view extra_option,
1287
+ std::vector<ExtraOption> *extra_options) const;
1288
+
1289
+ - util::Status ApplyExtraOptions(const std::vector<ExtraOption> &extra_options,
1290
+ + absl::Status ApplyExtraOptions(const std::vector<ExtraOption> &extra_options,
1291
+ SentencePieceText *spt) const;
1292
+
1293
+ - util::Status PopulateSentencePieceText(
1294
+ + absl::Status PopulateSentencePieceText(
1295
+ absl::string_view input, absl::string_view normalized,
1296
+ const std::vector<size_t> &norm_to_orig,
1297
+ const std::vector<std::pair<absl::string_view, int>> &result,
1298
+ @@ -526,10 +527,10 @@ namespace io {
1299
+ // io::LoadModelProto("//path/spm.model", model_proto.get());
1300
+ // SentencePieceProcessor sp;
1301
+ // CHECK_OK(sp.Load(std::move(model_proto)));
1302
+ -util::Status LoadModelProto(absl::string_view, ModelProto *model_proto);
1303
+ +absl::Status LoadModelProto(absl::string_view, ModelProto *model_proto);
1304
+
1305
+ // Saves `model_proto` as `filename`.
1306
+ -util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto);
1307
+ +absl::Status SaveModelProto(absl::string_view, const ModelProto &model_proto);
1308
+ } // namespace io
1309
+ #endif // SWIG
1310
+ } // namespace sentencepiece
1311
+ diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc
1312
+ index 373e73e..829c3d4 100644
1313
+ --- a/src/sentencepiece_processor_test.cc
1314
+ +++ b/src/sentencepiece_processor_test.cc
1315
+ @@ -23,10 +23,10 @@
1316
+ #include "sentencepiece_processor.h"
1317
+ #include "sentencepiece_trainer.h"
1318
+ #include "testharness.h"
1319
+ -#include "third_party/absl/container/flat_hash_map.h"
1320
+ -#include "third_party/absl/memory/memory.h"
1321
+ -#include "third_party/absl/strings/str_cat.h"
1322
+ -#include "third_party/absl/strings/string_view.h"
1323
+ +#include "absl/container/flat_hash_map.h"
1324
+ +#include "absl/memory/memory.h"
1325
+ +#include "absl/strings/str_cat.h"
1326
+ +#include "absl/strings/string_view.h"
1327
+ #include "util.h"
1328
+
1329
+ namespace sentencepiece {
1330
+ diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc
1331
+ index b9fe64f..5b33cd7 100644
1332
+ --- a/src/sentencepiece_trainer.cc
1333
+ +++ b/src/sentencepiece_trainer.cc
1334
+ @@ -22,12 +22,13 @@
1335
+ #include "sentencepiece_model.pb.h"
1336
+ #include "sentencepiece_trainer.h"
1337
+ #include "spec_parser.h"
1338
+ -#include "third_party/absl/flags/flag.h"
1339
+ -#include "third_party/absl/strings/numbers.h"
1340
+ -#include "third_party/absl/strings/str_cat.h"
1341
+ -#include "third_party/absl/strings/str_split.h"
1342
+ -#include "third_party/absl/strings/string_view.h"
1343
+ -#include "third_party/absl/strings/strip.h"
1344
+ +#include "absl/flags/flag.h"
1345
+ +#include "absl/strings/numbers.h"
1346
+ +#include "absl/strings/str_cat.h"
1347
+ +#include "absl/strings/str_split.h"
1348
+ +#include "absl/strings/string_view.h"
1349
+ +#include "absl/strings/strip.h"
1350
+ +#include "absl/status/status.h"
1351
+ #include "trainer_factory.h"
1352
+ #include "util.h"
1353
+
1354
+ @@ -37,7 +38,7 @@ static constexpr char kDefaultNormalizerName[] = "nmt_nfkc";
1355
+ } // namespace
1356
+
1357
+ // static
1358
+ -util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
1359
+ +absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
1360
+ SentenceIterator *sentence_iterator,
1361
+ std::string *serialized_model_proto) {
1362
+ NormalizerSpec normalizer_spec;
1363
+ @@ -45,7 +46,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
1364
+ serialized_model_proto);
1365
+ }
1366
+
1367
+ -util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
1368
+ +absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
1369
+ const NormalizerSpec &normalizer_spec,
1370
+ SentenceIterator *sentence_iterator,
1371
+ std::string *serialized_model_proto) {
1372
+ @@ -55,7 +56,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec,
1373
+ }
1374
+
1375
+ // static
1376
+ -util::Status SentencePieceTrainer::Train(
1377
+ +absl::Status SentencePieceTrainer::Train(
1378
+ const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec,
1379
+ const NormalizerSpec &denormalizer_spec,
1380
+ SentenceIterator *sentence_iterator, std::string *serialized_model_proto) {
1381
+ @@ -97,7 +98,7 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(absl::string_view name) {
1382
+ }
1383
+
1384
+ // static
1385
+ -util::Status SentencePieceTrainer::MergeSpecsFromArgs(
1386
+ +absl::Status SentencePieceTrainer::MergeSpecsFromArgs(
1387
+ absl::string_view args, TrainerSpec *trainer_spec,
1388
+ NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec) {
1389
+ CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null.";
1390
+ @@ -125,7 +126,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs(
1391
+ }
1392
+
1393
+ // static
1394
+ -util::Status SentencePieceTrainer::MergeSpecsFromArgs(
1395
+ +absl::Status SentencePieceTrainer::MergeSpecsFromArgs(
1396
+ const std::unordered_map<std::string, std::string> &kwargs,
1397
+ TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec,
1398
+ NormalizerSpec *denormalizer_spec) {
1399
+ @@ -171,7 +172,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs(
1400
+ }
1401
+
1402
+ // static
1403
+ -util::Status SentencePieceTrainer::Train(absl::string_view args,
1404
+ +absl::Status SentencePieceTrainer::Train(absl::string_view args,
1405
+ SentenceIterator *sentence_iterator,
1406
+ std::string *serialized_model_proto) {
1407
+ LOG(INFO) << "Running command: " << args.data();
1408
+ @@ -185,7 +186,7 @@ util::Status SentencePieceTrainer::Train(absl::string_view args,
1409
+ }
1410
+
1411
+ // static
1412
+ -util::Status SentencePieceTrainer::Train(
1413
+ +absl::Status SentencePieceTrainer::Train(
1414
+ const std::unordered_map<std::string, std::string> &kwargs,
1415
+ SentenceIterator *sentence_iterator, std::string *serialized_model_proto) {
1416
+ TrainerSpec trainer_spec;
1417
+ @@ -198,7 +199,7 @@ util::Status SentencePieceTrainer::Train(
1418
+ }
1419
+
1420
+ // static
1421
+ -util::Status SentencePieceTrainer::PopulateNormalizerSpec(
1422
+ +absl::Status SentencePieceTrainer::PopulateNormalizerSpec(
1423
+ NormalizerSpec *normalizer_spec, bool is_denormalizer) {
1424
+ CHECK_OR_RETURN(normalizer_spec);
1425
+
1426
+ @@ -226,7 +227,7 @@ util::Status SentencePieceTrainer::PopulateNormalizerSpec(
1427
+ }
1428
+
1429
+ // static
1430
+ -util::Status SentencePieceTrainer::PopulateModelTypeFromString(
1431
+ +absl::Status SentencePieceTrainer::PopulateModelTypeFromString(
1432
+ absl::string_view type, TrainerSpec *spec) {
1433
+ static const std::unordered_map<std::string, TrainerSpec::ModelType>
1434
+ kModelTypeMap = {{"unigram", TrainerSpec::UNIGRAM},
1435
+ @@ -239,7 +240,7 @@ util::Status SentencePieceTrainer::PopulateModelTypeFromString(
1436
+ return util::OkStatus();
1437
+ }
1438
+
1439
+ - return util::StatusBuilder(util::StatusCode::kInternal, GTL_LOC)
1440
+ + return util::StatusBuilder(absl::StatusCode::kInternal, GTL_LOC)
1441
+ << "\"" << type << "\" is not found in TrainerSpec";
1442
+ }
1443
+
1444
+ @@ -248,7 +249,7 @@ const pretokenizer::PretokenizerForTrainingInterface *g_pretokenizer = nullptr;
1445
+ } // namespace
1446
+
1447
+ // static
1448
+ -util::Status SentencePieceTrainer::SetPretokenizerForTraining(
1449
+ +absl::Status SentencePieceTrainer::SetPretokenizerForTraining(
1450
+ const pretokenizer::PretokenizerForTrainingInterface *pretokenizer) {
1451
+ g_pretokenizer = pretokenizer;
1452
+ return util::OkStatus();
1453
+ diff --git a/src/sentencepiece_trainer.h b/src/sentencepiece_trainer.h
1454
+ index bb74ab9..ec6cf93 100644
1455
+ --- a/src/sentencepiece_trainer.h
1456
+ +++ b/src/sentencepiece_trainer.h
1457
+ @@ -19,6 +19,7 @@
1458
+ #include <unordered_map>
1459
+
1460
+ #include "sentencepiece_processor.h"
1461
+ +#include "absl/status/status.h"
1462
+
1463
+ namespace sentencepiece {
1464
+
1465
+ @@ -46,7 +47,7 @@ class SentenceIterator {
1466
+ virtual bool done() const = 0;
1467
+ virtual void Next() = 0;
1468
+ virtual const std::string &value() const = 0;
1469
+ - virtual util::Status status() const = 0;
1470
+ + virtual absl::Status status() const = 0;
1471
+ };
1472
+
1473
+ class SentencePieceTrainer {
1474
+ @@ -54,14 +55,14 @@ class SentencePieceTrainer {
1475
+ // Trains SentencePiece model with `trainer_spec`.
1476
+ // Default `normalizer_spec` is used.
1477
+ // When `sentence_iterator` is passed, load sentences from the iterator.
1478
+ - static util::Status Train(const TrainerSpec &trainer_spec,
1479
+ + static absl::Status Train(const TrainerSpec &trainer_spec,
1480
+ SentenceIterator *sentence_iterator = nullptr,
1481
+ std::string *serialized_model_proto = nullptr);
1482
+
1483
+ // Trains SentencePiece model with `trainer_spec` and
1484
+ // `normalizer_spec`.
1485
+ // When `sentence_iterator` is passed, load sentences from the iterator.
1486
+ - static util::Status Train(const TrainerSpec &trainer_spec,
1487
+ + static absl::Status Train(const TrainerSpec &trainer_spec,
1488
+ const NormalizerSpec &normalizer_spec,
1489
+ SentenceIterator *sentence_iterator = nullptr,
1490
+ std::string *serialized_model_proto = nullptr);
1491
+ @@ -69,7 +70,7 @@ class SentencePieceTrainer {
1492
+ // Trains SentencePiece model with `trainer_spec`, `normalizer_spec`
1493
+ // and `denormalizer_spec`.
1494
+ // When `sentence_iterator` is passed, load sentences from the iterator.
1495
+ - static util::Status Train(const TrainerSpec &trainer_spec,
1496
+ + static absl::Status Train(const TrainerSpec &trainer_spec,
1497
+ const NormalizerSpec &normalizer_spec,
1498
+ const NormalizerSpec &denormalizer_spec,
1499
+ SentenceIterator *sentence_iterator = nullptr,
1500
+ @@ -78,13 +79,13 @@ class SentencePieceTrainer {
1501
+ // e.g.,
1502
+ // '--input=data --model_prefix=m --vocab_size=8192 model_type=unigram'
1503
+ // When `sentence_iterator` is passed, load sentences from the iterator.
1504
+ - static util::Status Train(absl::string_view args,
1505
+ + static absl::Status Train(absl::string_view args,
1506
+ SentenceIterator *sentence_iterator = nullptr,
1507
+ std::string *serialized_model_proto = nullptr);
1508
+
1509
+ // Trains SentencePiece model with mapin `kwargs`.
1510
+ // e.g., {{"input", "data"}, {"model_prefix, "m"}, {"vocab_size", "8192"}...}
1511
+ - static util::Status Train(
1512
+ + static absl::Status Train(
1513
+ const std::unordered_map<std::string, std::string> &kwargs,
1514
+ SentenceIterator *sentence_iterator = nullptr,
1515
+ std::string *serialized_model_proto = nullptr);
1516
+ @@ -96,19 +97,19 @@ class SentencePieceTrainer {
1517
+
1518
+ // Populates necessary fields (precompiled_charmap) from
1519
+ // `NormalizerSpec::name` or `NormalizerSpec::normalization_rule_tsv`.
1520
+ - static util::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec,
1521
+ + static absl::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec,
1522
+ bool is_denormalizer = false);
1523
+
1524
+ // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the
1525
+ // std::unordered_map in `kargs`.
1526
+ - static util::Status MergeSpecsFromArgs(
1527
+ + static absl::Status MergeSpecsFromArgs(
1528
+ const std::unordered_map<std::string, std::string> &kwargs,
1529
+ TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec,
1530
+ NormalizerSpec *denormalizer_spec);
1531
+
1532
+ // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the
1533
+ // command line flags in `args`.
1534
+ - static util::Status MergeSpecsFromArgs(absl::string_view args,
1535
+ + static absl::Status MergeSpecsFromArgs(absl::string_view args,
1536
+ TrainerSpec *trainer_spec,
1537
+ NormalizerSpec *normalizer_spec,
1538
+ NormalizerSpec *denormalizer_spec);
1539
+ @@ -116,7 +117,7 @@ class SentencePieceTrainer {
1540
+ // Injects global pre-tokenizer that are applied in training time.
1541
+ // Pretokenizer is only used for extracting pieces.
1542
+ // TODO(taku): It would be better to inject per `trainer_spec`.
1543
+ - static util::Status SetPretokenizerForTraining(
1544
+ + static absl::Status SetPretokenizerForTraining(
1545
+ const pretokenizer::PretokenizerForTrainingInterface *pretokenizer);
1546
+
1547
+ // Returns the current pretokenizer. if no pretokenizer is defined, returns
1548
+ @@ -129,17 +130,17 @@ class SentencePieceTrainer {
1549
+ // with comma-separated values. `field_name` must not be a nested message.
1550
+ // The body of these functions are automatically generated with
1551
+ // data/gen_spec_parser.pl
1552
+ - static util::Status SetProtoField(const std::string &name,
1553
+ + static absl::Status SetProtoField(const std::string &name,
1554
+ const std::string &value,
1555
+ TrainerSpec *message);
1556
+
1557
+ - static util::Status SetProtoField(const std::string &name,
1558
+ + static absl::Status SetProtoField(const std::string &name,
1559
+ const std::string &value,
1560
+ NormalizerSpec *message);
1561
+
1562
+ // Populates model type from string representation, e.g., "bpe".
1563
+ // Supported model: "unigram", "bpe", "word", "char".
1564
+ - static util::Status PopulateModelTypeFromString(absl::string_view type,
1565
+ + static absl::Status PopulateModelTypeFromString(absl::string_view type,
1566
+ TrainerSpec *trainer_spec);
1567
+
1568
+ private:
1569
+ diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc
1570
+ index e44e66b..00c8d08 100644
1571
+ --- a/src/sentencepiece_trainer_test.cc
1572
+ +++ b/src/sentencepiece_trainer_test.cc
1573
+ @@ -16,7 +16,8 @@
1574
+ #include "sentencepiece_model.pb.h"
1575
+ #include "sentencepiece_trainer.h"
1576
+ #include "testharness.h"
1577
+ -#include "third_party/absl/strings/str_cat.h"
1578
+ +#include "absl/strings/str_cat.h"
1579
+ +#include "absl/status/status.h"
1580
+ #include "util.h"
1581
+
1582
+ namespace sentencepiece {
1583
+ @@ -109,7 +110,7 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) {
1584
+ bool done() const override { return idx_ == vec_.size(); }
1585
+ void Next() override { ++idx_; }
1586
+ const std::string &value() const override { return vec_[idx_]; }
1587
+ - util::Status status() const override { return util::OkStatus(); }
1588
+ + absl::Status status() const override { return util::OkStatus(); }
1589
+
1590
+ private:
1591
+ std::vector<std::string> vec_;
1592
+ diff --git a/src/spec_parser.h b/src/spec_parser.h
1593
+ index 2c5a95b..259c45d 100644
1594
+ --- a/src/spec_parser.h
1595
+ +++ b/src/spec_parser.h
1596
+ @@ -19,8 +19,9 @@
1597
+ #include <vector>
1598
+
1599
+ #include "sentencepiece_processor.h"
1600
+ -#include "third_party/absl/strings/ascii.h"
1601
+ -#include "third_party/absl/strings/str_split.h"
1602
+ +#include "absl/strings/ascii.h"
1603
+ +#include "absl/strings/str_split.h"
1604
+ +#include "absl/status/status.h"
1605
+ #include "util.h"
1606
+
1607
+ namespace sentencepiece {
1608
+ @@ -49,7 +50,7 @@ namespace sentencepiece {
1609
+ if (name == #param_name) { \
1610
+ int32 v; \
1611
+ if (!string_util::lexical_cast(value, &v)) \
1612
+ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
1613
+ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
1614
+ << "cannot parse \"" << value << "\" as int."; \
1615
+ message->set_##param_name(v); \
1616
+ return util::OkStatus(); \
1617
+ @@ -59,7 +60,7 @@ namespace sentencepiece {
1618
+ if (name == #param_name) { \
1619
+ uint64 v; \
1620
+ if (!string_util::lexical_cast(value, &v)) \
1621
+ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
1622
+ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
1623
+ << "cannot parse \"" << value << "\" as int."; \
1624
+ message->set_##param_name(v); \
1625
+ return util::OkStatus(); \
1626
+ @@ -69,7 +70,7 @@ namespace sentencepiece {
1627
+ if (name == #param_name) { \
1628
+ double v; \
1629
+ if (!string_util::lexical_cast(value, &v)) \
1630
+ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
1631
+ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
1632
+ << "cannot parse \"" << value << "\" as int."; \
1633
+ message->set_##param_name(v); \
1634
+ return util::OkStatus(); \
1635
+ @@ -79,7 +80,7 @@ namespace sentencepiece {
1636
+ if (name == #param_name) { \
1637
+ bool v; \
1638
+ if (!string_util::lexical_cast(value.empty() ? "true" : value, &v)) \
1639
+ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
1640
+ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
1641
+ << "cannot parse \"" << value << "\" as bool."; \
1642
+ message->set_##param_name(v); \
1643
+ return util::OkStatus(); \
1644
+ @@ -89,7 +90,7 @@ namespace sentencepiece {
1645
+ if (name == #param_name) { \
1646
+ const auto it = map_name.find(absl::AsciiStrToUpper(value)); \
1647
+ if (it == map_name.end()) \
1648
+ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \
1649
+ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \
1650
+ << "unknown enumeration value of \"" << value << "\" as " \
1651
+ << #map_name; \
1652
+ message->set_##param_name(it->second); \
1653
+ @@ -186,7 +187,7 @@ inline std::string PrintProto(const NormalizerSpec &message,
1654
+ return os.str();
1655
+ }
1656
+
1657
+ -util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
1658
+ +absl::Status SentencePieceTrainer::SetProtoField(const std::string &name,
1659
+ const std::string &value,
1660
+ TrainerSpec *message) {
1661
+ CHECK_OR_RETURN(message);
1662
+ @@ -239,11 +240,11 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
1663
+ PARSE_STRING(pad_piece);
1664
+ PARSE_STRING(unk_surface);
1665
+
1666
+ - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
1667
+ + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
1668
+ << "unknown field name \"" << name << "\" in TrainerSpec.";
1669
+ }
1670
+
1671
+ -util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
1672
+ +absl::Status SentencePieceTrainer::SetProtoField(const std::string &name,
1673
+ const std::string &value,
1674
+ NormalizerSpec *message) {
1675
+ CHECK_OR_RETURN(message);
1676
+ @@ -255,7 +256,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
1677
+ PARSE_BOOL(escape_whitespaces);
1678
+ PARSE_STRING(normalization_rule_tsv);
1679
+
1680
+ - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
1681
+ + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
1682
+ << "unknown field name \"" << name << "\" in NormalizerSpec.";
1683
+ }
1684
+
1685
+ diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc
1686
+ index 3382ddc..9dda65c 100644
1687
+ --- a/src/spm_decode_main.cc
1688
+ +++ b/src/spm_decode_main.cc
1689
+ @@ -21,8 +21,8 @@
1690
+ #include "init.h"
1691
+ #include "sentencepiece.pb.h"
1692
+ #include "sentencepiece_processor.h"
1693
+ -#include "third_party/absl/flags/flag.h"
1694
+ -#include "third_party/absl/strings/str_split.h"
1695
+ +#include "absl/flags/flag.h"
1696
+ +#include "absl/strings/str_split.h"
1697
+ #include "util.h"
1698
+
1699
+ ABSL_FLAG(std::string, model, "", "model file name");
1700
+ diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc
1701
+ index 4d12a38..29b7458 100644
1702
+ --- a/src/spm_encode_main.cc
1703
+ +++ b/src/spm_encode_main.cc
1704
+ @@ -21,10 +21,10 @@
1705
+ #include "init.h"
1706
+ #include "sentencepiece.pb.h"
1707
+ #include "sentencepiece_processor.h"
1708
+ -#include "third_party/absl/container/flat_hash_map.h"
1709
+ -#include "third_party/absl/flags/flag.h"
1710
+ -#include "third_party/absl/strings/str_cat.h"
1711
+ -#include "third_party/absl/strings/str_join.h"
1712
+ +#include "absl/container/flat_hash_map.h"
1713
+ +#include "absl/flags/flag.h"
1714
+ +#include "absl/strings/str_cat.h"
1715
+ +#include "absl/strings/str_join.h"
1716
+ #include "trainer_interface.h"
1717
+
1718
+ ABSL_FLAG(std::string, model, "", "model file name");
1719
+ diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc
1720
+ index b5d93cb..70a65c1 100644
1721
+ --- a/src/spm_export_vocab_main.cc
1722
+ +++ b/src/spm_export_vocab_main.cc
1723
+ @@ -20,7 +20,7 @@
1724
+ #include "init.h"
1725
+ #include "sentencepiece_model.pb.h"
1726
+ #include "sentencepiece_processor.h"
1727
+ -#include "third_party/absl/flags/flag.h"
1728
+ +#include "absl/flags/flag.h"
1729
+
1730
+ ABSL_FLAG(std::string, output, "", "Output filename");
1731
+ ABSL_FLAG(std::string, model, "", "input model file name");
1732
+ diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc
1733
+ index 96da360..8c541b8 100644
1734
+ --- a/src/spm_normalize_main.cc
1735
+ +++ b/src/spm_normalize_main.cc
1736
+ @@ -21,7 +21,7 @@
1737
+ #include "sentencepiece_model.pb.h"
1738
+ #include "sentencepiece_processor.h"
1739
+ #include "sentencepiece_trainer.h"
1740
+ -#include "third_party/absl/flags/flag.h"
1741
+ +#include "absl/flags/flag.h"
1742
+
1743
+ ABSL_FLAG(std::string, model, "", "Model file name");
1744
+ ABSL_FLAG(bool, use_internal_normalization, false,
1745
+ diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc
1746
+ index baf8dbf..ba1e811 100644
1747
+ --- a/src/spm_train_main.cc
1748
+ +++ b/src/spm_train_main.cc
1749
+ @@ -18,10 +18,10 @@
1750
+ #include "init.h"
1751
+ #include "sentencepiece_model.pb.h"
1752
+ #include "sentencepiece_trainer.h"
1753
+ -#include "third_party/absl/flags/flag.h"
1754
+ -#include "third_party/absl/strings/ascii.h"
1755
+ -#include "third_party/absl/strings/str_join.h"
1756
+ -#include "third_party/absl/strings/str_split.h"
1757
+ +#include "absl/flags/flag.h"
1758
+ +#include "absl/strings/ascii.h"
1759
+ +#include "absl/strings/str_join.h"
1760
+ +#include "absl/strings/str_split.h"
1761
+ #include "util.h"
1762
+
1763
+ using sentencepiece::NormalizerSpec;
1764
+ diff --git a/src/testharness.cc b/src/testharness.cc
1765
+ index f6b1efe..daf2d14 100644
1766
+ --- a/src/testharness.cc
1767
+ +++ b/src/testharness.cc
1768
+ @@ -26,7 +26,7 @@
1769
+ #include <vector>
1770
+
1771
+ #include "common.h"
1772
+ -#include "third_party/absl/strings/str_cat.h"
1773
+ +#include "absl/strings/str_cat.h"
1774
+ #include "util.h"
1775
+
1776
+ namespace sentencepiece {
1777
+ diff --git a/src/testharness.h b/src/testharness.h
1778
+ index 9879b06..98317ad 100644
1779
+ --- a/src/testharness.h
1780
+ +++ b/src/testharness.h
1781
+ @@ -21,9 +21,9 @@
1782
+ #include <string>
1783
+
1784
+ #include "common.h"
1785
+ -#include "third_party/absl/flags/flag.h"
1786
+ -#include "third_party/absl/flags/parse.h"
1787
+ -#include "third_party/absl/strings/string_view.h"
1788
+ +#include "absl/flags/flag.h"
1789
+ +#include "absl/flags/parse.h"
1790
+ +#include "absl/strings/string_view.h"
1791
+
1792
+ ABSL_DECLARE_FLAG(std::string, test_tmpdir);
1793
+ ABSL_DECLARE_FLAG(std::string, test_srcdir);
1794
+ diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc
1795
+ index d1d2541..ff594d0 100644
1796
+ --- a/src/trainer_factory.cc
1797
+ +++ b/src/trainer_factory.cc
1798
+ @@ -14,7 +14,7 @@
1799
+
1800
+ #include "bpe_model_trainer.h"
1801
+ #include "char_model_trainer.h"
1802
+ -#include "third_party/absl/memory/memory.h"
1803
+ +#include "absl/memory/memory.h"
1804
+ #include "trainer_factory.h"
1805
+ #include "unigram_model_trainer.h"
1806
+ #include "word_model_trainer.h"
1807
+ diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc
1808
+ index a3a4b74..e6a2587 100644
1809
+ --- a/src/trainer_interface.cc
1810
+ +++ b/src/trainer_interface.cc
1811
+ @@ -26,13 +26,14 @@
1812
+ #include "normalizer.h"
1813
+ #include "sentencepiece_processor.h"
1814
+ #include "sentencepiece_trainer.h"
1815
+ -#include "third_party/absl/container/flat_hash_map.h"
1816
+ -#include "third_party/absl/memory/memory.h"
1817
+ -#include "third_party/absl/strings/numbers.h"
1818
+ -#include "third_party/absl/strings/str_cat.h"
1819
+ -#include "third_party/absl/strings/str_format.h"
1820
+ -#include "third_party/absl/strings/str_join.h"
1821
+ -#include "third_party/absl/strings/str_split.h"
1822
+ +#include "absl/container/flat_hash_map.h"
1823
+ +#include "absl/memory/memory.h"
1824
+ +#include "absl/strings/numbers.h"
1825
+ +#include "absl/strings/str_cat.h"
1826
+ +#include "absl/strings/str_format.h"
1827
+ +#include "absl/strings/str_join.h"
1828
+ +#include "absl/strings/str_split.h"
1829
+ +#include "absl/status/status.h"
1830
+ #include "trainer_interface.h"
1831
+ #include "unicode_script.h"
1832
+ #include "util.h"
1833
+ @@ -49,7 +50,7 @@ const char32 TrainerInterface::kUPPBoundaryChar = L'\u0009';
1834
+ const char TrainerInterface::kUPPBoundaryStr[] = "\t";
1835
+
1836
+ namespace {
1837
+ -util::Status VerifySpec(const TrainerSpec &trainer_spec) {
1838
+ +absl::Status VerifySpec(const TrainerSpec &trainer_spec) {
1839
+ CHECK_GT_OR_RETURN(trainer_spec.vocab_size(), 0);
1840
+
1841
+ if (trainer_spec.model_type() == TrainerSpec::UNIGRAM ||
1842
+ @@ -164,7 +165,7 @@ bool MultiFileSentenceIterator::done() const {
1843
+ return (!read_done_ && file_index_ == files_.size());
1844
+ }
1845
+
1846
+ -util::Status MultiFileSentenceIterator::status() const {
1847
+ +absl::Status MultiFileSentenceIterator::status() const {
1848
+ CHECK_OR_RETURN(fp_);
1849
+ return fp_->status();
1850
+ }
1851
+ @@ -296,7 +297,7 @@ bool TrainerInterface::IsValidSentencePiece(
1852
+ return true;
1853
+ }
1854
+
1855
+ -util::Status TrainerInterface::LoadSentences() {
1856
+ +absl::Status TrainerInterface::LoadSentences() {
1857
+ RETURN_IF_ERROR(status());
1858
+ CHECK_OR_RETURN(sentences_.empty());
1859
+ CHECK_OR_RETURN(required_chars_.empty());
1860
+ @@ -537,7 +538,7 @@ void TrainerInterface::SplitSentencesByWhitespace() {
1861
+ LOG(INFO) << "Done! " << sentences_.size();
1862
+ }
1863
+
1864
+ -util::Status TrainerInterface::Serialize(ModelProto *model_proto) const {
1865
+ +absl::Status TrainerInterface::Serialize(ModelProto *model_proto) const {
1866
+ RETURN_IF_ERROR(status());
1867
+
1868
+ // Duplicated sentencepiece is not allowed.
1869
+ @@ -611,7 +612,7 @@ util::Status TrainerInterface::Serialize(ModelProto *model_proto) const {
1870
+ return util::OkStatus();
1871
+ }
1872
+
1873
+ -util::Status TrainerInterface::SaveModel(absl::string_view filename) const {
1874
+ +absl::Status TrainerInterface::SaveModel(absl::string_view filename) const {
1875
+ LOG(INFO) << "Saving model: " << filename;
1876
+ ModelProto model_proto;
1877
+ RETURN_IF_ERROR(Serialize(&model_proto));
1878
+ @@ -622,7 +623,7 @@ util::Status TrainerInterface::SaveModel(absl::string_view filename) const {
1879
+ return util::OkStatus();
1880
+ }
1881
+
1882
+ -util::Status TrainerInterface::SaveVocab(absl::string_view filename) const {
1883
+ +absl::Status TrainerInterface::SaveVocab(absl::string_view filename) const {
1884
+ LOG(INFO) << "Saving vocabs: " << filename;
1885
+ ModelProto model_proto;
1886
+ RETURN_IF_ERROR(Serialize(&model_proto));
1887
+ @@ -644,7 +645,7 @@ util::Status TrainerInterface::SaveVocab(absl::string_view filename) const {
1888
+ return util::OkStatus();
1889
+ }
1890
+
1891
+ -util::Status TrainerInterface::Save() const {
1892
+ +absl::Status TrainerInterface::Save() const {
1893
+ if (output_model_proto_) {
1894
+ RETURN_IF_ERROR(Serialize(output_model_proto_));
1895
+ } else {
1896
+ @@ -654,7 +655,7 @@ util::Status TrainerInterface::Save() const {
1897
+ return util::OkStatus();
1898
+ }
1899
+
1900
+ -util::Status TrainerInterface::InitMetaPieces() {
1901
+ +absl::Status TrainerInterface::InitMetaPieces() {
1902
+ CHECK_OR_RETURN(meta_pieces_.empty());
1903
+ bool has_unk = false;
1904
+
1905
+ diff --git a/src/trainer_interface.h b/src/trainer_interface.h
1906
+ index f66d59a..b4fbc7b 100644
1907
+ --- a/src/trainer_interface.h
1908
+ +++ b/src/trainer_interface.h
1909
+ @@ -27,7 +27,8 @@
1910
+ #include "sentencepiece_model.pb.h"
1911
+ #include "sentencepiece_processor.h"
1912
+ #include "sentencepiece_trainer.h"
1913
+ -#include "third_party/absl/container/flat_hash_map.h"
1914
+ +#include "absl/container/flat_hash_map.h"
1915
+ +#include "absl/status/status.h"
1916
+ #include "util.h"
1917
+
1918
+ namespace sentencepiece {
1919
+ @@ -57,7 +58,7 @@ class MultiFileSentenceIterator : public SentenceIterator {
1920
+ bool done() const override;
1921
+ void Next() override;
1922
+ const std::string &value() const override { return value_; }
1923
+ - util::Status status() const override;
1924
+ + absl::Status status() const override;
1925
+
1926
+ private:
1927
+ void TryRead();
1928
+ @@ -90,16 +91,16 @@ class TrainerInterface {
1929
+
1930
+ // Loads sentence from `sentence_iterator` and stores the model
1931
+ // to `output_model_proto`.
1932
+ - virtual util::Status Train(SentenceIterator *sentence_iterator,
1933
+ + virtual absl::Status Train(SentenceIterator *sentence_iterator,
1934
+ ModelProto *output_model_proto) {
1935
+ sentence_iterator_ = sentence_iterator;
1936
+ output_model_proto_ = output_model_proto;
1937
+ return Train();
1938
+ }
1939
+
1940
+ - virtual util::Status Train() { return status(); }
1941
+ + virtual absl::Status Train() { return status(); }
1942
+
1943
+ - virtual util::Status status() const { return status_; }
1944
+ + virtual absl::Status status() const { return status_; }
1945
+
1946
+ FRIEND_TEST(TrainerInterfaceTest, IsValidSentencePieceTest);
1947
+ FRIEND_TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest);
1948
+ @@ -115,7 +116,7 @@ class TrainerInterface {
1949
+
1950
+ // Loads all sentences from spec.input() or SentenceIterator.
1951
+ // It loads at most input_sentence_size sentences.
1952
+ - util::Status LoadSentences();
1953
+ + absl::Status LoadSentences();
1954
+
1955
+ // Splits all sentencecs by whitespaces and
1956
+ // replace the |sentences_| with tokenized string.
1957
+ @@ -125,7 +126,7 @@ class TrainerInterface {
1958
+ void SplitSentencesByWhitespace();
1959
+
1960
+ // Save model files into spec.model_prefix().
1961
+ - util::Status Save() const;
1962
+ + absl::Status Save() const;
1963
+
1964
+ // Set of characters which must be included in the final vocab.
1965
+ // The value of this map stores the frequency.
1966
+ @@ -152,7 +153,7 @@ class TrainerInterface {
1967
+ meta_pieces_;
1968
+
1969
+ // Detect errors on initialization.
1970
+ - util::Status status_;
1971
+ + absl::Status status_;
1972
+
1973
+ // Loads sentences from SentenceIterator if not null.
1974
+ SentenceIterator *sentence_iterator_ = nullptr;
1975
+ @@ -162,19 +163,19 @@ class TrainerInterface {
1976
+
1977
+ private:
1978
+ // Serialize final_pieces_ to |model_proto|.
1979
+ - util::Status Serialize(ModelProto *model_proto) const;
1980
+ + absl::Status Serialize(ModelProto *model_proto) const;
1981
+
1982
+ // Saves the best sentence split with the current model for debugging.
1983
+ - util::Status SaveSplits(absl::string_view filename) const;
1984
+ + absl::Status SaveSplits(absl::string_view filename) const;
1985
+
1986
+ // Saves model file.
1987
+ - util::Status SaveModel(absl::string_view filename) const;
1988
+ + absl::Status SaveModel(absl::string_view filename) const;
1989
+
1990
+ // Saves vocabulary file for NMT.
1991
+ - util::Status SaveVocab(absl::string_view filename) const;
1992
+ + absl::Status SaveVocab(absl::string_view filename) const;
1993
+
1994
+ // Initializes `meta_pieces_` from TrainerSpec.
1995
+ - util::Status InitMetaPieces();
1996
+ + absl::Status InitMetaPieces();
1997
+
1998
+ // Randomly sampled raw sentences for self-testing.
1999
+ std::vector<std::string> self_test_samples_;
2000
+ diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc
2001
+ index 70a51ad..d7f3f0c 100644
2002
+ --- a/src/trainer_interface_test.cc
2003
+ +++ b/src/trainer_interface_test.cc
2004
+ @@ -16,8 +16,8 @@
2005
+
2006
+ #include "filesystem.h"
2007
+ #include "testharness.h"
2008
+ -#include "third_party/absl/strings/str_cat.h"
2009
+ -#include "third_party/absl/strings/str_format.h"
2010
+ +#include "absl/strings/str_cat.h"
2011
+ +#include "absl/strings/str_format.h"
2012
+ #include "trainer_interface.h"
2013
+ #include "util.h"
2014
+
2015
+ diff --git a/src/unicode_script.cc b/src/unicode_script.cc
2016
+ index 583dc30..11b24dc 100644
2017
+ --- a/src/unicode_script.cc
2018
+ +++ b/src/unicode_script.cc
2019
+ @@ -14,7 +14,7 @@
2020
+
2021
+ #include <unordered_map>
2022
+
2023
+ -#include "third_party/absl/container/flat_hash_map.h"
2024
+ +#include "absl/container/flat_hash_map.h"
2025
+ #include "unicode_script.h"
2026
+ #include "unicode_script_map.h"
2027
+ #include "util.h"
2028
+ diff --git a/src/unicode_script_map.h b/src/unicode_script_map.h
2029
+ index f2e67e9..f1b8299 100644
2030
+ --- a/src/unicode_script_map.h
2031
+ +++ b/src/unicode_script_map.h
2032
+ @@ -14,7 +14,7 @@
2033
+
2034
+ #ifndef UNICODE_SCRIPT_DATA_H_
2035
+ #define UNICODE_SCRIPT_DATA_H_
2036
+ -#include "third_party/absl/container/flat_hash_map.h"
2037
+ +#include "absl/container/flat_hash_map.h"
2038
+ namespace sentencepiece {
2039
+ namespace unicode_script {
2040
+ namespace {
2041
+ diff --git a/src/unicode_script_test.cc b/src/unicode_script_test.cc
2042
+ index ab33565..e0b1c4d 100644
2043
+ --- a/src/unicode_script_test.cc
2044
+ +++ b/src/unicode_script_test.cc
2045
+ @@ -14,7 +14,7 @@
2046
+
2047
+ #include "common.h"
2048
+ #include "testharness.h"
2049
+ -#include "third_party/absl/strings/string_view.h"
2050
+ +#include "absl/strings/string_view.h"
2051
+ #include "unicode_script.h"
2052
+ #include "util.h"
2053
+
2054
+ diff --git a/src/unigram_model.cc b/src/unigram_model.cc
2055
+ index 3b99060..9c72fb9 100644
2056
+ --- a/src/unigram_model.cc
2057
+ +++ b/src/unigram_model.cc
2058
+ @@ -22,9 +22,9 @@
2059
+ #include <utility>
2060
+ #include <vector>
2061
+
2062
+ -#include "third_party/absl/memory/memory.h"
2063
+ -#include "third_party/absl/strings/str_split.h"
2064
+ -#include "third_party/absl/strings/string_view.h"
2065
+ +#include "absl/memory/memory.h"
2066
+ +#include "absl/strings/str_split.h"
2067
+ +#include "absl/strings/string_view.h"
2068
+ #include "unigram_model.h"
2069
+ #include "util.h"
2070
+
2071
+ diff --git a/src/unigram_model.h b/src/unigram_model.h
2072
+ index 448e489..9062f12 100644
2073
+ --- a/src/unigram_model.h
2074
+ +++ b/src/unigram_model.h
2075
+ @@ -24,7 +24,7 @@
2076
+ #include "freelist.h"
2077
+ #include "model_interface.h"
2078
+ #include "sentencepiece_model.pb.h"
2079
+ -#include "third_party/darts_clone/darts.h"
2080
+ +#include "include/darts.h"
2081
+
2082
+ namespace sentencepiece {
2083
+ namespace unigram {
2084
+ diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc
2085
+ index f93b21c..808e907 100644
2086
+ --- a/src/unigram_model_test.cc
2087
+ +++ b/src/unigram_model_test.cc
2088
+ @@ -22,8 +22,8 @@
2089
+ #include "sentencepiece_model.pb.h"
2090
+ #include "sentencepiece_processor.h"
2091
+ #include "testharness.h"
2092
+ -#include "third_party/absl/strings/str_cat.h"
2093
+ -#include "third_party/absl/strings/str_join.h"
2094
+ +#include "absl/strings/str_cat.h"
2095
+ +#include "absl/strings/str_join.h"
2096
+ #include "util.h"
2097
+
2098
+ namespace sentencepiece {
2099
+ diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc
2100
+ index 9615040..7d16bd2 100644
2101
+ --- a/src/unigram_model_trainer.cc
2102
+ +++ b/src/unigram_model_trainer.cc
2103
+ @@ -25,8 +25,9 @@
2104
+ #include "normalizer.h"
2105
+ #include "pretokenizer_for_training.h"
2106
+ #include "sentencepiece_trainer.h"
2107
+ -#include "third_party/absl/container/flat_hash_map.h"
2108
+ -#include "third_party/absl/memory/memory.h"
2109
+ +#include "absl/container/flat_hash_map.h"
2110
+ +#include "absl/memory/memory.h"
2111
+ +#include "absl/status/status.h"
2112
+ #include "third_party/esaxx/esa.hxx" // Suffix array library.
2113
+ #include "unicode_script.h"
2114
+ #include "unigram_model_trainer.h"
2115
+ @@ -463,7 +464,7 @@ TrainerModel::SentencePieces Trainer::FinalizeSentencePieces(
2116
+ return Sorted(final_sentencepieces);
2117
+ }
2118
+
2119
+ -util::Status Trainer::Train() {
2120
+ +absl::Status Trainer::Train() {
2121
+ RETURN_IF_ERROR(status());
2122
+
2123
+ CHECK_EQ_OR_RETURN(TrainerSpec::UNIGRAM, trainer_spec_.model_type());
2124
+ diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h
2125
+ index 91fbeb4..d41967d 100644
2126
+ --- a/src/unigram_model_trainer.h
2127
+ +++ b/src/unigram_model_trainer.h
2128
+ @@ -21,7 +21,8 @@
2129
+ #include <vector>
2130
+
2131
+ #include "sentencepiece_model.pb.h"
2132
+ -#include "third_party/absl/strings/string_view.h"
2133
+ +#include "absl/strings/string_view.h"
2134
+ +#include "absl/status/status.h"
2135
+ #include "trainer_interface.h"
2136
+ #include "unigram_model.h"
2137
+ #include "util.h"
2138
+ @@ -68,7 +69,7 @@ class Trainer : public TrainerInterface {
2139
+ : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
2140
+ denormalizer_spec) {}
2141
+
2142
+ - util::Status Train() override;
2143
+ + absl::Status Train() override;
2144
+
2145
+ private:
2146
+ FRIEND_TEST(TrainerTest, IsValidSentencePieceTest);
2147
+ diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc
2148
+ index ffe515e..fdb25f6 100644
2149
+ --- a/src/unigram_model_trainer_test.cc
2150
+ +++ b/src/unigram_model_trainer_test.cc
2151
+ @@ -16,8 +16,8 @@
2152
+ #include "sentencepiece_processor.h"
2153
+ #include "sentencepiece_trainer.h"
2154
+ #include "testharness.h"
2155
+ -#include "third_party/absl/strings/str_cat.h"
2156
+ -#include "third_party/absl/strings/str_join.h"
2157
+ +#include "absl/strings/str_cat.h"
2158
+ +#include "absl/strings/str_join.h"
2159
+ #include "unigram_model_trainer.h"
2160
+ #include "util.h"
2161
+
2162
+ diff --git a/src/util.h b/src/util.h
2163
+ index 0d15863..7122c7c 100644
2164
+ --- a/src/util.h
2165
+ +++ b/src/util.h
2166
+ @@ -30,7 +30,8 @@
2167
+
2168
+ #include "common.h"
2169
+ #include "sentencepiece_processor.h"
2170
+ -#include "third_party/absl/strings/string_view.h"
2171
+ +#include "absl/strings/string_view.h"
2172
+ +#include "absl/status/status.h"
2173
+
2174
+ #ifdef SPM_NO_THREADLOCAL
2175
+ #include <pthread.h>
2176
+ @@ -359,14 +360,14 @@ std::string StrError(int errnum);
2177
+
2178
+ std::vector<std::string> StrSplitAsCSV(absl::string_view text);
2179
+
2180
+ -inline Status OkStatus() { return Status(); }
2181
+ +inline absl::Status OkStatus() { return absl::Status(); }
2182
+
2183
+ #define DECLARE_ERROR(FUNC) \
2184
+ - inline util::Status FUNC##Error(absl::string_view str) { \
2185
+ - return util::Status(StatusCode::k##FUNC, str.data()); \
2186
+ + inline absl::Status FUNC##Error(absl::string_view str) { \
2187
+ + return absl::Status(absl::StatusCode::k##FUNC, str.data()); \
2188
+ } \
2189
+ - inline bool Is##FUNC(const util::Status &status) { \
2190
+ - return status.code() == StatusCode::k##FUNC; \
2191
+ + inline bool Is##FUNC(const absl::Status &status) { \
2192
+ + return status.code() ==absl::StatusCode::k##FUNC; \
2193
+ }
2194
+
2195
+ DECLARE_ERROR(Cancelled)
2196
+ @@ -390,8 +391,8 @@ DECLARE_ERROR(Unauthenticated)
2197
+
2198
+ class StatusBuilder {
2199
+ public:
2200
+ - explicit StatusBuilder(StatusCode code) : code_(code) {}
2201
+ - explicit StatusBuilder(StatusCode code, int loc) : code_(code) {}
2202
+ + explicit StatusBuilder(absl::StatusCode code) : code_(code) {}
2203
+ + explicit StatusBuilder(absl::StatusCode code, int loc) : code_(code) {}
2204
+
2205
+ template <typename T>
2206
+ StatusBuilder &operator<<(const T &value) {
2207
+ @@ -399,10 +400,10 @@ class StatusBuilder {
2208
+ return *this;
2209
+ }
2210
+
2211
+ - operator Status() const { return Status(code_, os_.str()); }
2212
+ + operator absl::Status() const { return absl::Status(code_, os_.str()); }
2213
+
2214
+ private:
2215
+ - StatusCode code_;
2216
+ + absl::StatusCode code_;
2217
+ std::ostringstream os_;
2218
+ };
2219
+
2220
+ @@ -410,7 +411,7 @@ class StatusBuilder {
2221
+ if (condition) { \
2222
+ } else /* NOLINT */ \
2223
+ return ::sentencepiece::util::StatusBuilder( \
2224
+ - ::sentencepiece::util::StatusCode::kInternal) \
2225
+ + ::absl::StatusCode::kInternal) \
2226
+ << __FILE__ << "(" << __LINE__ << ") [" << #condition << "] "
2227
+
2228
+ #define CHECK_EQ_OR_RETURN(a, b) CHECK_OR_RETURN((a) == (b))
2229
+ diff --git a/src/util_test.cc b/src/util_test.cc
2230
+ index 71d006f..67290dc 100644
2231
+ --- a/src/util_test.cc
2232
+ +++ b/src/util_test.cc
2233
+ @@ -16,7 +16,8 @@
2234
+
2235
+ #include "filesystem.h"
2236
+ #include "testharness.h"
2237
+ -#include "third_party/absl/strings/str_cat.h"
2238
+ +#include "absl/strings/str_cat.h"
2239
+ +#include "absl/status/status.h"
2240
+ #include "util.h"
2241
+
2242
+ namespace sentencepiece {
2243
+ @@ -376,27 +377,27 @@ TEST(UtilTest, STLDeleteELementsTest) {
2244
+ }
2245
+
2246
+ TEST(UtilTest, StatusTest) {
2247
+ - const util::Status ok;
2248
+ + const absl::Status ok;
2249
+ EXPECT_TRUE(ok.ok());
2250
+ - EXPECT_EQ(util::StatusCode::kOk, ok.code());
2251
+ + EXPECT_EQ(absl::StatusCode::kOk, ok.code());
2252
+ EXPECT_EQ(std::string(""), ok.message());
2253
+
2254
+ - const util::Status s1(util::StatusCode::kUnknown, "unknown");
2255
+ - const util::Status s2(util::StatusCode::kUnknown, std::string("unknown"));
2256
+ + const absl::Status s1(absl::StatusCode::kUnknown, "unknown");
2257
+ + const absl::Status s2(absl::StatusCode::kUnknown, std::string("unknown"));
2258
+
2259
+ - EXPECT_EQ(util::StatusCode::kUnknown, s1.code());
2260
+ - EXPECT_EQ(util::StatusCode::kUnknown, s2.code());
2261
+ + EXPECT_EQ(absl::StatusCode::kUnknown, s1.code());
2262
+ + EXPECT_EQ(absl::StatusCode::kUnknown, s2.code());
2263
+ EXPECT_EQ(std::string("unknown"), s1.message());
2264
+ EXPECT_EQ(std::string("unknown"), s2.message());
2265
+
2266
+ auto ok2 = util::OkStatus();
2267
+ EXPECT_TRUE(ok2.ok());
2268
+ - EXPECT_EQ(util::StatusCode::kOk, ok2.code());
2269
+ + EXPECT_EQ(absl::StatusCode::kOk, ok2.code());
2270
+ EXPECT_EQ(std::string(""), ok2.message());
2271
+
2272
+ util::OkStatus().IgnoreError();
2273
+ for (int i = 1; i <= 16; ++i) {
2274
+ - util::Status s(static_cast<util::StatusCode>(i), "message");
2275
+ + absl::Status s(static_cast<absl::StatusCode>(i), "message");
2276
+ EXPECT_TRUE(s.ToString().find("message") != std::string::npos)
2277
+ << s.ToString();
2278
+ }
2279
+ diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc
2280
+ index 0b8b062..bc1f86b 100644
2281
+ --- a/src/word_model_trainer.cc
2282
+ +++ b/src/word_model_trainer.cc
2283
+ @@ -15,8 +15,9 @@
2284
+ #include <cmath>
2285
+ #include <string>
2286
+
2287
+ -#include "third_party/absl/container/flat_hash_map.h"
2288
+ -#include "third_party/absl/strings/string_view.h"
2289
+ +#include "absl/container/flat_hash_map.h"
2290
+ +#include "absl/strings/string_view.h"
2291
+ +#include "absl/status/status.h"
2292
+ #include "util.h"
2293
+ #include "word_model.h"
2294
+ #include "word_model_trainer.h"
2295
+ @@ -24,7 +25,7 @@
2296
+ namespace sentencepiece {
2297
+ namespace word {
2298
+
2299
+ -util::Status Trainer::Train() {
2300
+ +absl::Status Trainer::Train() {
2301
+ RETURN_IF_ERROR(status());
2302
+
2303
+ CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
2304
+ diff --git a/src/word_model_trainer.h b/src/word_model_trainer.h
2305
+ index 76f8f32..436e595 100644
2306
+ --- a/src/word_model_trainer.h
2307
+ +++ b/src/word_model_trainer.h
2308
+ @@ -17,6 +17,7 @@
2309
+
2310
+ #include "sentencepiece_model.pb.h"
2311
+ #include "trainer_interface.h"
2312
+ +#include "absl/status/status.h"
2313
+
2314
+ namespace sentencepiece {
2315
+ namespace word {
2316
+ @@ -34,7 +35,7 @@ class Trainer : public TrainerInterface {
2317
+ : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
2318
+ denormalizer_spec) {}
2319
+
2320
+ - util::Status Train() override;
2321
+ + absl::Status Train() override;
2322
+ };
2323
+ } // namespace word
2324
+ } // namespace sentencepiece
2325
+ diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc
2326
+ index c4a8bc6..366810f 100644
2327
+ --- a/src/word_model_trainer_test.cc
2328
+ +++ b/src/word_model_trainer_test.cc
2329
+ @@ -18,8 +18,8 @@
2330
+ #include "filesystem.h"
2331
+ #include "sentencepiece_processor.h"
2332
+ #include "testharness.h"
2333
+ -#include "third_party/absl/strings/str_cat.h"
2334
+ -#include "third_party/absl/strings/str_join.h"
2335
+ +#include "absl/strings/str_cat.h"
2336
+ +#include "absl/strings/str_join.h"
2337
+ #include "util.h"
2338
+ #include "word_model_trainer.h"
2339
+
gemma.cpp/build/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *
2
+ !.gitignore
3
+ !.hgignore
gemma.cpp/build/CMakeCache.txt ADDED
@@ -0,0 +1,982 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is the CMakeCache file.
2
+ # For build in directory: /content/gemma.cpp/build
3
+ # It was generated by CMake: /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
4
+ # You can edit this file to change values found and used by cmake.
5
+ # If you do not want to change any of the values, simply exit the editor.
6
+ # If you do want to change a value, simply edit, save, and exit the editor.
7
+ # The syntax for the file is as follows:
8
+ # KEY:TYPE=VALUE
9
+ # KEY is the name of a variable in the cache.
10
+ # TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
11
+ # VALUE is the current value for the KEY.
12
+
13
+ ########################
14
+ # EXTERNAL cache entries
15
+ ########################
16
+
17
+ //Build a 32 bit version of the library.
18
+ BENCHMARK_BUILD_32_BITS:BOOL=OFF
19
+
20
+ //Flags used by the C++ compiler during coverage builds.
21
+ BENCHMARK_CXX_FLAGS_COVERAGE:STRING=-g
22
+
23
+ //Allow the downloading and in-tree building of unmet dependencies
24
+ BENCHMARK_DOWNLOAD_DEPENDENCIES:BOOL=OFF
25
+
26
+ //Enable building and running the assembly tests
27
+ BENCHMARK_ENABLE_ASSEMBLY_TESTS:BOOL=OFF
28
+
29
+ //Build documentation with Doxygen.
30
+ BENCHMARK_ENABLE_DOXYGEN:BOOL=OFF
31
+
32
+ //Enable the use of exceptions in the benchmark library.
33
+ BENCHMARK_ENABLE_EXCEPTIONS:BOOL=ON
34
+
35
+ //Enable building the unit tests which depend on gtest
36
+ BENCHMARK_ENABLE_GTEST_TESTS:BOOL=ON
37
+
38
+ //Enable installation of benchmark. (Projects embedding benchmark
39
+ // may want to turn this OFF.)
40
+ BENCHMARK_ENABLE_INSTALL:BOOL=ON
41
+
42
+ //Enable performance counters provided by libpfm
43
+ BENCHMARK_ENABLE_LIBPFM:BOOL=OFF
44
+
45
+ //Enable link time optimisation of the benchmark library.
46
+ BENCHMARK_ENABLE_LTO:BOOL=OFF
47
+
48
+ //Enable testing of the benchmark library.
49
+ BENCHMARK_ENABLE_TESTING:BOOL=ON
50
+
51
+ //Build Release candidates with -Werror.
52
+ BENCHMARK_ENABLE_WERROR:BOOL=ON
53
+
54
+ //Flags used for linking binaries during coverage builds.
55
+ BENCHMARK_EXE_LINKER_FLAGS_COVERAGE:STRING=
56
+
57
+ //Build Release candidates with -Werror regardless of compiler
58
+ // issues.
59
+ BENCHMARK_FORCE_WERROR:BOOL=OFF
60
+
61
+ //Enable installation of documentation.
62
+ BENCHMARK_INSTALL_DOCS:BOOL=ON
63
+
64
+ //Flags used by the shared libraries linker during coverage builds.
65
+ BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE:STRING=
66
+
67
+ //Use bundled GoogleTest. If disabled, the find_package(GTest)
68
+ // will be used.
69
+ BENCHMARK_USE_BUNDLED_GTEST:BOOL=ON
70
+
71
+ //Build and test using libc++ as the standard library.
72
+ BENCHMARK_USE_LIBCXX:BOOL=OFF
73
+
74
+ //Builds the googlemock subproject
75
+ BUILD_GMOCK:BOOL=ON
76
+
77
+ //Build shared libraries
78
+ BUILD_SHARED_LIBS:BOOL=OFF
79
+
80
+ //Build the testing tree.
81
+ BUILD_TESTING:BOOL=ON
82
+
83
+ //Path to a program.
84
+ CMAKE_ADDR2LINE:FILEPATH=/usr/bin/addr2line
85
+
86
+ //Path to a program.
87
+ CMAKE_AR:FILEPATH=/usr/bin/ar
88
+
89
+ //Choose the type of build, options are: None Debug Release RelWithDebInfo
90
+ // MinSizeRel ...
91
+ CMAKE_BUILD_TYPE:STRING=
92
+
93
+ //Enable/Disable color output during build.
94
+ CMAKE_COLOR_MAKEFILE:BOOL=ON
95
+
96
+ //CXX compiler
97
+ CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++
98
+
99
+ //A wrapper around 'ar' adding the appropriate '--plugin' option
100
+ // for the GCC compiler
101
+ CMAKE_CXX_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11
102
+
103
+ //A wrapper around 'ranlib' adding the appropriate '--plugin' option
104
+ // for the GCC compiler
105
+ CMAKE_CXX_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11
106
+
107
+ //Flags used by the CXX compiler during all build types.
108
+ CMAKE_CXX_FLAGS:STRING=
109
+
110
+ //Flags used by the CXX compiler during DEBUG builds.
111
+ CMAKE_CXX_FLAGS_DEBUG:STRING=-g
112
+
113
+ //Flags used by the CXX compiler during MINSIZEREL builds.
114
+ CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
115
+
116
+ //Flags used by the CXX compiler during RELEASE builds.
117
+ CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
118
+
119
+ //Flags used by the CXX compiler during RELWITHDEBINFO builds.
120
+ CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
121
+
122
+ //C compiler
123
+ CMAKE_C_COMPILER:FILEPATH=/usr/bin/cc
124
+
125
+ //A wrapper around 'ar' adding the appropriate '--plugin' option
126
+ // for the GCC compiler
127
+ CMAKE_C_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11
128
+
129
+ //A wrapper around 'ranlib' adding the appropriate '--plugin' option
130
+ // for the GCC compiler
131
+ CMAKE_C_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11
132
+
133
+ //Flags used by the C compiler during all build types.
134
+ CMAKE_C_FLAGS:STRING=
135
+
136
+ //Flags used by the C compiler during DEBUG builds.
137
+ CMAKE_C_FLAGS_DEBUG:STRING=-g
138
+
139
+ //Flags used by the C compiler during MINSIZEREL builds.
140
+ CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
141
+
142
+ //Flags used by the C compiler during RELEASE builds.
143
+ CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
144
+
145
+ //Flags used by the C compiler during RELWITHDEBINFO builds.
146
+ CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
147
+
148
+ //Path to a program.
149
+ CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND
150
+
151
+ //Flags used by the linker during all build types.
152
+ CMAKE_EXE_LINKER_FLAGS:STRING=
153
+
154
+ //Flags used by the linker during DEBUG builds.
155
+ CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=
156
+
157
+ //Flags used by the linker during MINSIZEREL builds.
158
+ CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=
159
+
160
+ //Flags used by the linker during RELEASE builds.
161
+ CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=
162
+
163
+ //Flags used by the linker during RELWITHDEBINFO builds.
164
+ CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
165
+
166
+ //Enable/Disable output of compile commands during generation.
167
+ CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=
168
+
169
+ //Value Computed by CMake.
170
+ CMAKE_FIND_PACKAGE_REDIRECTS_DIR:STATIC=/content/gemma.cpp/build/CMakeFiles/pkgRedirects
171
+
172
+ //User executables (bin)
173
+ CMAKE_INSTALL_BINDIR:PATH=bin
174
+
175
+ //Read-only architecture-independent data (DATAROOTDIR)
176
+ CMAKE_INSTALL_DATADIR:PATH=
177
+
178
+ //Read-only architecture-independent data root (share)
179
+ CMAKE_INSTALL_DATAROOTDIR:PATH=share
180
+
181
+ //Documentation root (DATAROOTDIR/doc/PROJECT_NAME)
182
+ CMAKE_INSTALL_DOCDIR:PATH=
183
+
184
+ //C header files (include)
185
+ CMAKE_INSTALL_INCLUDEDIR:PATH=include
186
+
187
+ //Info documentation (DATAROOTDIR/info)
188
+ CMAKE_INSTALL_INFODIR:PATH=
189
+
190
+ //Object code libraries (lib)
191
+ CMAKE_INSTALL_LIBDIR:PATH=lib
192
+
193
+ //Program executables (libexec)
194
+ CMAKE_INSTALL_LIBEXECDIR:PATH=libexec
195
+
196
+ //Locale-dependent data (DATAROOTDIR/locale)
197
+ CMAKE_INSTALL_LOCALEDIR:PATH=
198
+
199
+ //Modifiable single-machine data (var)
200
+ CMAKE_INSTALL_LOCALSTATEDIR:PATH=var
201
+
202
+ //Man documentation (DATAROOTDIR/man)
203
+ CMAKE_INSTALL_MANDIR:PATH=
204
+
205
+ //C header files for non-gcc (/usr/include)
206
+ CMAKE_INSTALL_OLDINCLUDEDIR:PATH=/usr/include
207
+
208
+ //Install path prefix, prepended onto install directories.
209
+ CMAKE_INSTALL_PREFIX:PATH=/usr/local
210
+
211
+ //Run-time variable data (LOCALSTATEDIR/run)
212
+ CMAKE_INSTALL_RUNSTATEDIR:PATH=
213
+
214
+ //System admin executables (sbin)
215
+ CMAKE_INSTALL_SBINDIR:PATH=sbin
216
+
217
+ //Modifiable architecture-independent data (com)
218
+ CMAKE_INSTALL_SHAREDSTATEDIR:PATH=com
219
+
220
+ //Read-only single-machine data (etc)
221
+ CMAKE_INSTALL_SYSCONFDIR:PATH=etc
222
+
223
+ //Path to a program.
224
+ CMAKE_LINKER:FILEPATH=/usr/bin/ld
225
+
226
+ //Path to a program.
227
+ CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake
228
+
229
+ //Flags used by the linker during the creation of modules during
230
+ // all build types.
231
+ CMAKE_MODULE_LINKER_FLAGS:STRING=
232
+
233
+ //Flags used by the linker during the creation of modules during
234
+ // DEBUG builds.
235
+ CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=
236
+
237
+ //Flags used by the linker during the creation of modules during
238
+ // MINSIZEREL builds.
239
+ CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=
240
+
241
+ //Flags used by the linker during the creation of modules during
242
+ // RELEASE builds.
243
+ CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=
244
+
245
+ //Flags used by the linker during the creation of modules during
246
+ // RELWITHDEBINFO builds.
247
+ CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
248
+
249
+ //Path to a program.
250
+ CMAKE_NM:FILEPATH=/usr/bin/nm
251
+
252
+ //Path to a program.
253
+ CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy
254
+
255
+ //Path to a program.
256
+ CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump
257
+
258
+ //Value Computed by CMake
259
+ CMAKE_PROJECT_DESCRIPTION:STATIC=
260
+
261
+ //Value Computed by CMake
262
+ CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
263
+
264
+ //Value Computed by CMake
265
+ CMAKE_PROJECT_NAME:STATIC=gemma
266
+
267
+ //Value Computed by CMake
268
+ CMAKE_PROJECT_VERSION:STATIC=1.2.0
269
+
270
+ //Value Computed by CMake
271
+ CMAKE_PROJECT_VERSION_MAJOR:STATIC=1
272
+
273
+ //Value Computed by CMake
274
+ CMAKE_PROJECT_VERSION_MINOR:STATIC=2
275
+
276
+ //Value Computed by CMake
277
+ CMAKE_PROJECT_VERSION_PATCH:STATIC=0
278
+
279
+ //Value Computed by CMake
280
+ CMAKE_PROJECT_VERSION_TWEAK:STATIC=
281
+
282
+ //Path to a program.
283
+ CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib
284
+
285
+ //Path to a program.
286
+ CMAKE_READELF:FILEPATH=/usr/bin/readelf
287
+
288
+ //Flags used by the linker during the creation of shared libraries
289
+ // during all build types.
290
+ CMAKE_SHARED_LINKER_FLAGS:STRING=
291
+
292
+ //Flags used by the linker during the creation of shared libraries
293
+ // during DEBUG builds.
294
+ CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=
295
+
296
+ //Flags used by the linker during the creation of shared libraries
297
+ // during MINSIZEREL builds.
298
+ CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=
299
+
300
+ //Flags used by the linker during the creation of shared libraries
301
+ // during RELEASE builds.
302
+ CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=
303
+
304
+ //Flags used by the linker during the creation of shared libraries
305
+ // during RELWITHDEBINFO builds.
306
+ CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=
307
+
308
+ //If set, runtime paths are not added when installing shared libraries,
309
+ // but are added when building.
310
+ CMAKE_SKIP_INSTALL_RPATH:BOOL=NO
311
+
312
+ //If set, runtime paths are not added when using shared libraries.
313
+ CMAKE_SKIP_RPATH:BOOL=NO
314
+
315
+ //Flags used by the linker during the creation of static libraries
316
+ // during all build types.
317
+ CMAKE_STATIC_LINKER_FLAGS:STRING=
318
+
319
+ //Flags used by the linker during the creation of static libraries
320
+ // during DEBUG builds.
321
+ CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING=
322
+
323
+ //Flags used by the linker during the creation of static libraries
324
+ // during MINSIZEREL builds.
325
+ CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING=
326
+
327
+ //Flags used by the linker during the creation of static libraries
328
+ // during RELEASE builds.
329
+ CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING=
330
+
331
+ //Flags used by the linker during the creation of static libraries
332
+ // during RELWITHDEBINFO builds.
333
+ CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING=
334
+
335
+ //Path to a program.
336
+ CMAKE_STRIP:FILEPATH=/usr/bin/strip
337
+
338
+ //Path to a program.
339
+ CMAKE_TAPI:FILEPATH=CMAKE_TAPI-NOTFOUND
340
+
341
+ //If this value is on, makefiles will be generated without the
342
+ // .SILENT directive, and all commands will be echoed to the console
343
+ // during the make. This is useful for debugging only. With Visual
344
+ // Studio IDE projects all commands are done without /nologo.
345
+ CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE
346
+
347
+ //Path to the coverage program that CTest uses for performing coverage
348
+ // inspection
349
+ COVERAGE_COMMAND:FILEPATH=/usr/bin/gcov
350
+
351
+ //Extra command line flags to pass to the coverage tool
352
+ COVERAGE_EXTRA_FLAGS:STRING=-l
353
+
354
+ //How many times to retry timed-out CTest submissions.
355
+ CTEST_SUBMIT_RETRY_COUNT:STRING=3
356
+
357
+ //How long to wait between timed-out CTest submissions.
358
+ CTEST_SUBMIT_RETRY_DELAY:STRING=5
359
+
360
+ //OFF
361
+ CXXFEATURECHECK_DEBUG:BOOL=OFF
362
+
363
+ //Maximum time allowed before CTest will kill the test.
364
+ DART_TESTING_TIMEOUT:STRING=1500
365
+
366
+ //Directory under which to collect all populated content
367
+ FETCHCONTENT_BASE_DIR:PATH=/content/gemma.cpp/build/_deps
368
+
369
+ //Disables all attempts to download or update content and assumes
370
+ // source dirs already exist
371
+ FETCHCONTENT_FULLY_DISCONNECTED:BOOL=OFF
372
+
373
+ //Enables QUIET option for all content population
374
+ FETCHCONTENT_QUIET:BOOL=ON
375
+
376
+ //When not empty, overrides where to find pre-populated content
377
+ // for benchmark
378
+ FETCHCONTENT_SOURCE_DIR_BENCHMARK:PATH=
379
+
380
+ //When not empty, overrides where to find pre-populated content
381
+ // for highway
382
+ FETCHCONTENT_SOURCE_DIR_HIGHWAY:PATH=
383
+
384
+ //When not empty, overrides where to find pre-populated content
385
+ // for json
386
+ FETCHCONTENT_SOURCE_DIR_JSON:PATH=
387
+
388
+ //When not empty, overrides where to find pre-populated content
389
+ // for sentencepiece
390
+ FETCHCONTENT_SOURCE_DIR_SENTENCEPIECE:PATH=
391
+
392
+ //Enables UPDATE_DISCONNECTED behavior for all content population
393
+ FETCHCONTENT_UPDATES_DISCONNECTED:BOOL=OFF
394
+
395
+ //Enables UPDATE_DISCONNECTED behavior just for population of benchmark
396
+ FETCHCONTENT_UPDATES_DISCONNECTED_BENCHMARK:BOOL=OFF
397
+
398
+ //Enables UPDATE_DISCONNECTED behavior just for population of highway
399
+ FETCHCONTENT_UPDATES_DISCONNECTED_HIGHWAY:BOOL=OFF
400
+
401
+ //Enables UPDATE_DISCONNECTED behavior just for population of json
402
+ FETCHCONTENT_UPDATES_DISCONNECTED_JSON:BOOL=OFF
403
+
404
+ //Enables UPDATE_DISCONNECTED behavior just for population of sentencepiece
405
+ FETCHCONTENT_UPDATES_DISCONNECTED_SENTENCEPIECE:BOOL=OFF
406
+
407
+ //Enable Gemma tests
408
+ GEMMA_ENABLE_TESTS:BOOL=OFF
409
+
410
+ //Path to a program.
411
+ GITCOMMAND:FILEPATH=/usr/bin/git
412
+
413
+ //Git command line client
414
+ GIT_EXECUTABLE:FILEPATH=/usr/bin/git
415
+
416
+ //Set copts for Armv7 with NEON (requires vfpv4)?
417
+ HWY_CMAKE_ARM7:BOOL=OFF
418
+
419
+ //Change to header-only?
420
+ HWY_CMAKE_HEADER_ONLY:BOOL=OFF
421
+
422
+ //Set copts for RISCV with RVV?
423
+ HWY_CMAKE_RVV:BOOL=ON
424
+
425
+ //Set SSE2 as baseline for 32-bit x86?
426
+ HWY_CMAKE_SSE2:BOOL=OFF
427
+
428
+ //Include contrib/
429
+ HWY_ENABLE_CONTRIB:BOOL=ON
430
+
431
+ //Build examples
432
+ HWY_ENABLE_EXAMPLES:BOOL=ON
433
+
434
+ //Install library
435
+ HWY_ENABLE_INSTALL:BOOL=ON
436
+
437
+ //Enable HWY tests
438
+ HWY_ENABLE_TESTS:BOOL=ON
439
+
440
+ //Ignore BUILD_SHARED_LIBS
441
+ HWY_FORCE_STATIC_LIBS:BOOL=OFF
442
+
443
+ //Use pre-installed googletest?
444
+ HWY_SYSTEM_GTEST:BOOL=OFF
445
+
446
+ //Add -Werror flag?
447
+ HWY_WARNINGS_ARE_ERRORS:BOOL=OFF
448
+
449
+ //Enable installation of googletest. (Projects embedding googletest
450
+ // may want to turn this OFF.)
451
+ INSTALL_GTEST:BOOL=ON
452
+
453
+ //Build the unit tests when BUILD_TESTING is enabled.
454
+ JSON_BuildTests:BOOL=OFF
455
+
456
+ //Enable CI build targets.
457
+ JSON_CI:BOOL=OFF
458
+
459
+ //Use extended diagnostic messages.
460
+ JSON_Diagnostics:BOOL=OFF
461
+
462
+ //Disable default integer enum serialization.
463
+ JSON_DisableEnumSerialization:BOOL=OFF
464
+
465
+ //Place use-defined string literals in the global namespace.
466
+ JSON_GlobalUDLs:BOOL=ON
467
+
468
+ //Enable implicit conversions.
469
+ JSON_ImplicitConversions:BOOL=ON
470
+
471
+ //Install CMake targets during install step.
472
+ JSON_Install:BOOL=OFF
473
+
474
+ //Enable legacy discarded value comparison.
475
+ JSON_LegacyDiscardedValueComparison:BOOL=OFF
476
+
477
+ //Use non-amalgamated version of the library.
478
+ JSON_MultipleHeaders:BOOL=ON
479
+
480
+ //Include as system headers (skip for clang-tidy).
481
+ JSON_SystemInclude:BOOL=OFF
482
+
483
+ //Path to a program.
484
+ LLVM_FILECHECK_EXE:FILEPATH=LLVM_FILECHECK_EXE-NOTFOUND
485
+
486
+ //Command to build the project
487
+ MAKECOMMAND:STRING=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake --build . --config "${CTEST_CONFIGURATION_TYPE}"
488
+
489
+ //Path to the memory checking command, used for memory error detection.
490
+ MEMORYCHECK_COMMAND:FILEPATH=/usr/local/cuda/bin/compute-sanitizer
491
+
492
+ //File that contains suppressions for the memory checker
493
+ MEMORYCHECK_SUPPRESSIONS_FILE:FILEPATH=
494
+
495
+ //Name of the computer/site where compile is being run
496
+ SITE:STRING=4ac239cc9fe6
497
+
498
+ //Provider of absl library
499
+ SPM_ABSL_PROVIDER:STRING=internal
500
+
501
+ //Builds test binaries.
502
+ SPM_BUILD_TEST:BOOL=OFF
503
+
504
+ //Runs gcov to test coverage.
505
+ SPM_COVERAGE:BOOL=OFF
506
+
507
+ //Override system processor
508
+ SPM_CROSS_SYSTEM_PROCESSOR,:BOOL=OFF
509
+
510
+ //Use /MT flag in MSVC build
511
+ SPM_ENABLE_MSVC_MT_BUILD,:BOOL=OFF
512
+
513
+ //Enables NFKC compile
514
+ SPM_ENABLE_NFKC_COMPILE:BOOL=OFF
515
+
516
+ //Builds shared libaries in addition to static libraries.
517
+ SPM_ENABLE_SHARED:BOOL=ON
518
+
519
+ //Enable TCMalloc if available.
520
+ SPM_ENABLE_TCMALLOC:BOOL=ON
521
+
522
+ //Makes a tensorflow compatible shared file.
523
+ SPM_ENABLE_TENSORFLOW_SHARED:BOOL=OFF
524
+
525
+ //Disable thread_local operator
526
+ SPM_NO_THREADLOCAL:BOOL=OFF
527
+
528
+ //Provider of protobuf library
529
+ SPM_PROTOBUF_PROVIDER:STRING=internal
530
+
531
+ //Link static library of TCMALLOC.
532
+ SPM_TCMALLOC_STATIC:BOOL=OFF
533
+
534
+ //Path to a library.
535
+ TCMALLOC_LIB:FILEPATH=TCMALLOC_LIB-NOTFOUND
536
+
537
+ //Value Computed by CMake
538
+ benchmark_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/benchmark-build
539
+
540
+ //Value Computed by CMake
541
+ benchmark_IS_TOP_LEVEL:STATIC=OFF
542
+
543
+ //Value Computed by CMake
544
+ benchmark_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/benchmark-src
545
+
546
+ //Value Computed by CMake
547
+ gemma_BINARY_DIR:STATIC=/content/gemma.cpp/build
548
+
549
+ //Value Computed by CMake
550
+ gemma_IS_TOP_LEVEL:STATIC=ON
551
+
552
+ //Value Computed by CMake
553
+ gemma_SOURCE_DIR:STATIC=/content/gemma.cpp
554
+
555
+ //Value Computed by CMake
556
+ gmock_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-build/googlemock
557
+
558
+ //Value Computed by CMake
559
+ gmock_IS_TOP_LEVEL:STATIC=OFF
560
+
561
+ //Dependencies for the target
562
+ gmock_LIB_DEPENDS:STATIC=general;gtest;
563
+
564
+ //Value Computed by CMake
565
+ gmock_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-src/googlemock
566
+
567
+ //Build all of Google Mock's own tests.
568
+ gmock_build_tests:BOOL=OFF
569
+
570
+ //Dependencies for the target
571
+ gmock_main_LIB_DEPENDS:STATIC=general;gmock;
572
+
573
+ //Value Computed by CMake
574
+ googletest-distribution_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-build
575
+
576
+ //Value Computed by CMake
577
+ googletest-distribution_IS_TOP_LEVEL:STATIC=OFF
578
+
579
+ //Value Computed by CMake
580
+ googletest-distribution_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-src
581
+
582
+ //Value Computed by CMake
583
+ gtest_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-build/googletest
584
+
585
+ //Value Computed by CMake
586
+ gtest_IS_TOP_LEVEL:STATIC=OFF
587
+
588
+ //Value Computed by CMake
589
+ gtest_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build/googletest-src/googletest
590
+
591
+ //Build gtest's sample programs.
592
+ gtest_build_samples:BOOL=OFF
593
+
594
+ //Build all of gtest's own tests.
595
+ gtest_build_tests:BOOL=OFF
596
+
597
+ //Disable uses of pthreads in gtest.
598
+ gtest_disable_pthreads:BOOL=OFF
599
+
600
+ //Use shared (DLL) run-time lib even when Google Test is built
601
+ // as static lib.
602
+ gtest_force_shared_crt:BOOL=ON
603
+
604
+ //Build gtest with internal symbols hidden in shared libraries.
605
+ gtest_hide_internal_symbols:BOOL=OFF
606
+
607
+ //Dependencies for the target
608
+ gtest_main_LIB_DEPENDS:STATIC=general;gtest;
609
+
610
+ //Value Computed by CMake
611
+ hwy_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-build
612
+
613
+ //Value Computed by CMake
614
+ hwy_IS_TOP_LEVEL:STATIC=OFF
615
+
616
+ //Value Computed by CMake
617
+ hwy_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/highway-src
618
+
619
+ //Dependencies for the target
620
+ hwy_contrib_LIB_DEPENDS:STATIC=general;hwy;
621
+
622
+ //Dependencies for the target
623
+ hwy_test_LIB_DEPENDS:STATIC=general;hwy;
624
+
625
+ //Dependencies for the target
626
+ libgemma_LIB_DEPENDS:STATIC=general;hwy;general;hwy_contrib;general;sentencepiece-static;
627
+
628
+ //Value Computed by CMake
629
+ nlohmann_json_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/json-build
630
+
631
+ //Value Computed by CMake
632
+ nlohmann_json_IS_TOP_LEVEL:STATIC=OFF
633
+
634
+ //Value Computed by CMake
635
+ nlohmann_json_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/json-src
636
+
637
+ //Value Computed by CMake
638
+ sentencepiece_BINARY_DIR:STATIC=/content/gemma.cpp/build/_deps/sentencepiece-build
639
+
640
+ //Value Computed by CMake
641
+ sentencepiece_IS_TOP_LEVEL:STATIC=OFF
642
+
643
+ //Value Computed by CMake
644
+ sentencepiece_SOURCE_DIR:STATIC=/content/gemma.cpp/build/_deps/sentencepiece-src
645
+
646
+ //Dependencies for the target
647
+ sentencepiece_train_LIB_DEPENDS:STATIC=general;sentencepiece;
648
+
649
+
650
+ ########################
651
+ # INTERNAL cache entries
652
+ ########################
653
+
654
+ //Test ATOMICS_LOCK_FREE_INSTRUCTIONS
655
+ ATOMICS_LOCK_FREE_INSTRUCTIONS:INTERNAL=1
656
+ //ADVANCED property for variable: BENCHMARK_CXX_FLAGS_COVERAGE
657
+ BENCHMARK_CXX_FLAGS_COVERAGE-ADVANCED:INTERNAL=1
658
+ //ADVANCED property for variable: BENCHMARK_EXE_LINKER_FLAGS_COVERAGE
659
+ BENCHMARK_EXE_LINKER_FLAGS_COVERAGE-ADVANCED:INTERNAL=1
660
+ //Test BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG
661
+ BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG:INTERNAL=1
662
+ //Test BENCHMARK_HAS_O3_FLAG
663
+ BENCHMARK_HAS_O3_FLAG:INTERNAL=1
664
+ //Test BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH
665
+ BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH:INTERNAL=1
666
+ //Test BENCHMARK_HAS_WNO_ODR
667
+ BENCHMARK_HAS_WNO_ODR:INTERNAL=1
668
+ //ADVANCED property for variable: BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE
669
+ BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE-ADVANCED:INTERNAL=1
670
+ //ADVANCED property for variable: BUILD_SHARED_LIBS
671
+ BUILD_SHARED_LIBS-ADVANCED:INTERNAL=1
672
+ //ADVANCED property for variable: CMAKE_ADDR2LINE
673
+ CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
674
+ //ADVANCED property for variable: CMAKE_AR
675
+ CMAKE_AR-ADVANCED:INTERNAL=1
676
+ //This is the directory where this CMakeCache.txt was created
677
+ CMAKE_CACHEFILE_DIR:INTERNAL=/content/gemma.cpp/build
678
+ //Major version of cmake used to create the current loaded cache
679
+ CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
680
+ //Minor version of cmake used to create the current loaded cache
681
+ CMAKE_CACHE_MINOR_VERSION:INTERNAL=27
682
+ //Patch version of cmake used to create the current loaded cache
683
+ CMAKE_CACHE_PATCH_VERSION:INTERNAL=9
684
+ //ADVANCED property for variable: CMAKE_COLOR_MAKEFILE
685
+ CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1
686
+ //Path to CMake executable.
687
+ CMAKE_COMMAND:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
688
+ //Path to cpack program executable.
689
+ CMAKE_CPACK_COMMAND:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/cpack
690
+ //ADVANCED property for variable: CMAKE_CTEST_COMMAND
691
+ CMAKE_CTEST_COMMAND-ADVANCED:INTERNAL=1
692
+ //Path to ctest program executable.
693
+ CMAKE_CTEST_COMMAND:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/bin/ctest
694
+ //ADVANCED property for variable: CMAKE_CXX_COMPILER
695
+ CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
696
+ //ADVANCED property for variable: CMAKE_CXX_COMPILER_AR
697
+ CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1
698
+ //ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB
699
+ CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1
700
+ //ADVANCED property for variable: CMAKE_CXX_FLAGS
701
+ CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1
702
+ //ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG
703
+ CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1
704
+ //ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL
705
+ CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
706
+ //ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE
707
+ CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1
708
+ //ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO
709
+ CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
710
+ //Test CMAKE_CXX_LINK_NO_PIE_SUPPORTED
711
+ CMAKE_CXX_LINK_NO_PIE_SUPPORTED:INTERNAL=1
712
+ //Test CMAKE_CXX_LINK_PIE_SUPPORTED
713
+ CMAKE_CXX_LINK_PIE_SUPPORTED:INTERNAL=1
714
+ //ADVANCED property for variable: CMAKE_C_COMPILER
715
+ CMAKE_C_COMPILER-ADVANCED:INTERNAL=1
716
+ //ADVANCED property for variable: CMAKE_C_COMPILER_AR
717
+ CMAKE_C_COMPILER_AR-ADVANCED:INTERNAL=1
718
+ //ADVANCED property for variable: CMAKE_C_COMPILER_RANLIB
719
+ CMAKE_C_COMPILER_RANLIB-ADVANCED:INTERNAL=1
720
+ //ADVANCED property for variable: CMAKE_C_FLAGS
721
+ CMAKE_C_FLAGS-ADVANCED:INTERNAL=1
722
+ //ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG
723
+ CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1
724
+ //ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL
725
+ CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
726
+ //ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE
727
+ CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1
728
+ //ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO
729
+ CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
730
+ //ADVANCED property for variable: CMAKE_DLLTOOL
731
+ CMAKE_DLLTOOL-ADVANCED:INTERNAL=1
732
+ //Executable file format
733
+ CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
734
+ //ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS
735
+ CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1
736
+ //ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG
737
+ CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
738
+ //ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL
739
+ CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
740
+ //ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE
741
+ CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
742
+ //ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
743
+ CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
744
+ //ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS
745
+ CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1
746
+ //Name of external makefile project generator.
747
+ CMAKE_EXTRA_GENERATOR:INTERNAL=
748
+ //Name of generator.
749
+ CMAKE_GENERATOR:INTERNAL=Unix Makefiles
750
+ //Generator instance identifier.
751
+ CMAKE_GENERATOR_INSTANCE:INTERNAL=
752
+ //Name of generator platform.
753
+ CMAKE_GENERATOR_PLATFORM:INTERNAL=
754
+ //Name of generator toolset.
755
+ CMAKE_GENERATOR_TOOLSET:INTERNAL=
756
+ //Test CMAKE_HAVE_LIBC_PTHREAD
757
+ CMAKE_HAVE_LIBC_PTHREAD:INTERNAL=1
758
+ //Source directory with the top level CMakeLists.txt file for this
759
+ // project
760
+ CMAKE_HOME_DIRECTORY:INTERNAL=/content/gemma.cpp
761
+ //ADVANCED property for variable: CMAKE_INSTALL_BINDIR
762
+ CMAKE_INSTALL_BINDIR-ADVANCED:INTERNAL=1
763
+ //ADVANCED property for variable: CMAKE_INSTALL_DATADIR
764
+ CMAKE_INSTALL_DATADIR-ADVANCED:INTERNAL=1
765
+ //ADVANCED property for variable: CMAKE_INSTALL_DATAROOTDIR
766
+ CMAKE_INSTALL_DATAROOTDIR-ADVANCED:INTERNAL=1
767
+ //ADVANCED property for variable: CMAKE_INSTALL_DOCDIR
768
+ CMAKE_INSTALL_DOCDIR-ADVANCED:INTERNAL=1
769
+ //ADVANCED property for variable: CMAKE_INSTALL_INCLUDEDIR
770
+ CMAKE_INSTALL_INCLUDEDIR-ADVANCED:INTERNAL=1
771
+ //ADVANCED property for variable: CMAKE_INSTALL_INFODIR
772
+ CMAKE_INSTALL_INFODIR-ADVANCED:INTERNAL=1
773
+ //ADVANCED property for variable: CMAKE_INSTALL_LIBDIR
774
+ CMAKE_INSTALL_LIBDIR-ADVANCED:INTERNAL=1
775
+ //ADVANCED property for variable: CMAKE_INSTALL_LIBEXECDIR
776
+ CMAKE_INSTALL_LIBEXECDIR-ADVANCED:INTERNAL=1
777
+ //ADVANCED property for variable: CMAKE_INSTALL_LOCALEDIR
778
+ CMAKE_INSTALL_LOCALEDIR-ADVANCED:INTERNAL=1
779
+ //ADVANCED property for variable: CMAKE_INSTALL_LOCALSTATEDIR
780
+ CMAKE_INSTALL_LOCALSTATEDIR-ADVANCED:INTERNAL=1
781
+ //ADVANCED property for variable: CMAKE_INSTALL_MANDIR
782
+ CMAKE_INSTALL_MANDIR-ADVANCED:INTERNAL=1
783
+ //ADVANCED property for variable: CMAKE_INSTALL_OLDINCLUDEDIR
784
+ CMAKE_INSTALL_OLDINCLUDEDIR-ADVANCED:INTERNAL=1
785
+ //ADVANCED property for variable: CMAKE_INSTALL_RUNSTATEDIR
786
+ CMAKE_INSTALL_RUNSTATEDIR-ADVANCED:INTERNAL=1
787
+ //ADVANCED property for variable: CMAKE_INSTALL_SBINDIR
788
+ CMAKE_INSTALL_SBINDIR-ADVANCED:INTERNAL=1
789
+ //ADVANCED property for variable: CMAKE_INSTALL_SHAREDSTATEDIR
790
+ CMAKE_INSTALL_SHAREDSTATEDIR-ADVANCED:INTERNAL=1
791
+ //Install .so files without execute permission.
792
+ CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1
793
+ //ADVANCED property for variable: CMAKE_INSTALL_SYSCONFDIR
794
+ CMAKE_INSTALL_SYSCONFDIR-ADVANCED:INTERNAL=1
795
+ //ADVANCED property for variable: CMAKE_LINKER
796
+ CMAKE_LINKER-ADVANCED:INTERNAL=1
797
+ //ADVANCED property for variable: CMAKE_MAKE_PROGRAM
798
+ CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
799
+ //ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS
800
+ CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1
801
+ //ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG
802
+ CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
803
+ //ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL
804
+ CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
805
+ //ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE
806
+ CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
807
+ //ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
808
+ CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
809
+ //ADVANCED property for variable: CMAKE_NM
810
+ CMAKE_NM-ADVANCED:INTERNAL=1
811
+ //number of local generators
812
+ CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=12
813
+ //ADVANCED property for variable: CMAKE_OBJCOPY
814
+ CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
815
+ //ADVANCED property for variable: CMAKE_OBJDUMP
816
+ CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
817
+ //Platform information initialized
818
+ CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
819
+ //ADVANCED property for variable: CMAKE_RANLIB
820
+ CMAKE_RANLIB-ADVANCED:INTERNAL=1
821
+ //ADVANCED property for variable: CMAKE_READELF
822
+ CMAKE_READELF-ADVANCED:INTERNAL=1
823
+ //Path to CMake installation.
824
+ CMAKE_ROOT:INTERNAL=/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27
825
+ //ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS
826
+ CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1
827
+ //ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG
828
+ CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
829
+ //ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
830
+ CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
831
+ //ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE
832
+ CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
833
+ //ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
834
+ CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
835
+ //ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH
836
+ CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1
837
+ //ADVANCED property for variable: CMAKE_SKIP_RPATH
838
+ CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1
839
+ //ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS
840
+ CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1
841
+ //ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG
842
+ CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
843
+ //ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL
844
+ CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
845
+ //ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE
846
+ CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
847
+ //ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
848
+ CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
849
+ //ADVANCED property for variable: CMAKE_STRIP
850
+ CMAKE_STRIP-ADVANCED:INTERNAL=1
851
+ //ADVANCED property for variable: CMAKE_TAPI
852
+ CMAKE_TAPI-ADVANCED:INTERNAL=1
853
+ //uname command
854
+ CMAKE_UNAME:INTERNAL=/usr/bin/uname
855
+ //ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE
856
+ CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
857
+ //Result of TRY_COMPILE
858
+ COMPILE_HAVE_GNU_POSIX_REGEX:INTERNAL=FALSE
859
+ //Result of TRY_COMPILE
860
+ COMPILE_HAVE_POSIX_REGEX:INTERNAL=TRUE
861
+ //Result of TRY_COMPILE
862
+ COMPILE_HAVE_PTHREAD_AFFINITY:INTERNAL=TRUE
863
+ //Result of TRY_COMPILE
864
+ COMPILE_HAVE_STD_REGEX:INTERNAL=TRUE
865
+ //Result of TRY_COMPILE
866
+ COMPILE_HAVE_STEADY_CLOCK:INTERNAL=TRUE
867
+ //ADVANCED property for variable: COVERAGE_COMMAND
868
+ COVERAGE_COMMAND-ADVANCED:INTERNAL=1
869
+ //ADVANCED property for variable: COVERAGE_EXTRA_FLAGS
870
+ COVERAGE_EXTRA_FLAGS-ADVANCED:INTERNAL=1
871
+ //ADVANCED property for variable: CTEST_SUBMIT_RETRY_COUNT
872
+ CTEST_SUBMIT_RETRY_COUNT-ADVANCED:INTERNAL=1
873
+ //ADVANCED property for variable: CTEST_SUBMIT_RETRY_DELAY
874
+ CTEST_SUBMIT_RETRY_DELAY-ADVANCED:INTERNAL=1
875
+ //ADVANCED property for variable: DART_TESTING_TIMEOUT
876
+ DART_TESTING_TIMEOUT-ADVANCED:INTERNAL=1
877
+ //Details about finding Git
878
+ FIND_PACKAGE_MESSAGE_DETAILS_Git:INTERNAL=[/usr/bin/git][v2.34.1()]
879
+ //Details about finding Python
880
+ FIND_PACKAGE_MESSAGE_DETAILS_Python:INTERNAL=[/usr/local/bin/python][cfound components: Interpreter ][v3.10.12()]
881
+ //Details about finding Threads
882
+ FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()]
883
+ //ADVANCED property for variable: GITCOMMAND
884
+ GITCOMMAND-ADVANCED:INTERNAL=1
885
+ //ADVANCED property for variable: GIT_EXECUTABLE
886
+ GIT_EXECUTABLE-ADVANCED:INTERNAL=1
887
+ //Have include asm/hwcap.h
888
+ HAVE_ASM_HWCAP_H:INTERNAL=
889
+ //Test HAVE_CXX_FLAG_COVERAGE
890
+ HAVE_CXX_FLAG_COVERAGE:INTERNAL=1
891
+ //Test HAVE_CXX_FLAG_FSTRICT_ALIASING
892
+ HAVE_CXX_FLAG_FSTRICT_ALIASING:INTERNAL=1
893
+ //Test HAVE_CXX_FLAG_PEDANTIC
894
+ HAVE_CXX_FLAG_PEDANTIC:INTERNAL=1
895
+ //Test HAVE_CXX_FLAG_PEDANTIC_ERRORS
896
+ HAVE_CXX_FLAG_PEDANTIC_ERRORS:INTERNAL=1
897
+ //Test HAVE_CXX_FLAG_WALL
898
+ HAVE_CXX_FLAG_WALL:INTERNAL=1
899
+ //Test HAVE_CXX_FLAG_WD654
900
+ HAVE_CXX_FLAG_WD654:INTERNAL=
901
+ //Test HAVE_CXX_FLAG_WERROR
902
+ HAVE_CXX_FLAG_WERROR:INTERNAL=1
903
+ //Test HAVE_CXX_FLAG_WEXTRA
904
+ HAVE_CXX_FLAG_WEXTRA:INTERNAL=1
905
+ //Test HAVE_CXX_FLAG_WFLOAT_EQUAL
906
+ HAVE_CXX_FLAG_WFLOAT_EQUAL:INTERNAL=1
907
+ //Test HAVE_CXX_FLAG_WNO_DEPRECATED
908
+ HAVE_CXX_FLAG_WNO_DEPRECATED:INTERNAL=1
909
+ //Test HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS
910
+ HAVE_CXX_FLAG_WNO_DEPRECATED_DECLARATIONS:INTERNAL=1
911
+ //Test HAVE_CXX_FLAG_WOLD_STYLE_CAST
912
+ HAVE_CXX_FLAG_WOLD_STYLE_CAST:INTERNAL=1
913
+ //Test HAVE_CXX_FLAG_WSHADOW
914
+ HAVE_CXX_FLAG_WSHADOW:INTERNAL=1
915
+ //Test HAVE_CXX_FLAG_WSHORTEN_64_TO_32
916
+ HAVE_CXX_FLAG_WSHORTEN_64_TO_32:INTERNAL=
917
+ //Test HAVE_CXX_FLAG_WSTRICT_ALIASING
918
+ HAVE_CXX_FLAG_WSTRICT_ALIASING:INTERNAL=1
919
+ //Test HAVE_CXX_FLAG_WTHREAD_SAFETY
920
+ HAVE_CXX_FLAG_WTHREAD_SAFETY:INTERNAL=
921
+ //Have library rt
922
+ HAVE_LIB_RT:INTERNAL=1
923
+ //Have include sys/auxv.h
924
+ HAVE_SYS_AUXV_H:INTERNAL=1
925
+ //Test HWY_EMSCRIPTEN
926
+ HWY_EMSCRIPTEN:INTERNAL=
927
+ //ADVANCED property for variable: HWY_FORCE_STATIC_LIBS
928
+ HWY_FORCE_STATIC_LIBS-ADVANCED:INTERNAL=1
929
+ //Test HWY_RISCV
930
+ HWY_RISCV:INTERNAL=
931
+ //ADVANCED property for variable: MAKECOMMAND
932
+ MAKECOMMAND-ADVANCED:INTERNAL=1
933
+ //ADVANCED property for variable: MEMORYCHECK_COMMAND
934
+ MEMORYCHECK_COMMAND-ADVANCED:INTERNAL=1
935
+ //ADVANCED property for variable: MEMORYCHECK_SUPPRESSIONS_FILE
936
+ MEMORYCHECK_SUPPRESSIONS_FILE-ADVANCED:INTERNAL=1
937
+ NLOHMANN_JSON_CONFIG_INSTALL_DIR:INTERNAL=share/cmake/nlohmann_json
938
+ //Result of try_run()
939
+ RUN_HAVE_POSIX_REGEX:INTERNAL=0
940
+ //Result of try_run()
941
+ RUN_HAVE_PTHREAD_AFFINITY:INTERNAL=0
942
+ //Result of try_run()
943
+ RUN_HAVE_STD_REGEX:INTERNAL=0
944
+ //Result of try_run()
945
+ RUN_HAVE_STEADY_CLOCK:INTERNAL=0
946
+ //ADVANCED property for variable: SITE
947
+ SITE-ADVANCED:INTERNAL=1
948
+ //STRINGS property for variable: SPM_ABSL_PROVIDER
949
+ SPM_ABSL_PROVIDER-STRINGS:INTERNAL=internal;module;package
950
+ //STRINGS property for variable: SPM_PROTOBUF_PROVIDER
951
+ SPM_PROTOBUF_PROVIDER-STRINGS:INTERNAL=internal;package
952
+ //linker supports push/pop state
953
+ _CMAKE_LINKER_PUSHPOP_STATE_SUPPORTED:INTERNAL=TRUE
954
+ //CMAKE_INSTALL_PREFIX during last run
955
+ _GNUInstallDirs_LAST_CMAKE_INSTALL_PREFIX:INTERNAL=/usr/local
956
+ //Compiler reason failure
957
+ _Python_Compiler_REASON_FAILURE:INTERNAL=
958
+ //Development reason failure
959
+ _Python_Development_REASON_FAILURE:INTERNAL=
960
+ //Path to a program.
961
+ _Python_EXECUTABLE:INTERNAL=/usr/local/bin/python
962
+ //Python Properties
963
+ _Python_INTERPRETER_PROPERTIES:INTERNAL=Python;3;10;12;64;32;;cpython-310-x86_64-linux-gnu;abi3;/usr/lib/python3.10;/usr/lib/python3.10;/usr/local/lib/python3.10/dist-packages;/usr/local/lib/python3.10/dist-packages
964
+ _Python_INTERPRETER_SIGNATURE:INTERNAL=fe70fcc9cb393d9a6babd75ef083d11f
965
+ //NumPy reason failure
966
+ _Python_NumPy_REASON_FAILURE:INTERNAL=
967
+ cmake_package_name:INTERNAL=GTest
968
+ generated_dir:INTERNAL=/content/gemma.cpp/build/_deps/highway-build/googletest-build/googletest/generated
969
+ //ADVANCED property for variable: gmock_build_tests
970
+ gmock_build_tests-ADVANCED:INTERNAL=1
971
+ //ADVANCED property for variable: gtest_build_samples
972
+ gtest_build_samples-ADVANCED:INTERNAL=1
973
+ //ADVANCED property for variable: gtest_build_tests
974
+ gtest_build_tests-ADVANCED:INTERNAL=1
975
+ //ADVANCED property for variable: gtest_disable_pthreads
976
+ gtest_disable_pthreads-ADVANCED:INTERNAL=1
977
+ //ADVANCED property for variable: gtest_force_shared_crt
978
+ gtest_force_shared_crt-ADVANCED:INTERNAL=1
979
+ //ADVANCED property for variable: gtest_hide_internal_symbols
980
+ gtest_hide_internal_symbols-ADVANCED:INTERNAL=1
981
+ targets_export_name:INTERNAL=GTestTargets
982
+
gemma.cpp/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set(CMAKE_C_COMPILER "/usr/bin/cc")
2
+ set(CMAKE_C_COMPILER_ARG1 "")
3
+ set(CMAKE_C_COMPILER_ID "GNU")
4
+ set(CMAKE_C_COMPILER_VERSION "11.4.0")
5
+ set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
6
+ set(CMAKE_C_COMPILER_WRAPPER "")
7
+ set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
8
+ set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
9
+ set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
10
+ set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
11
+ set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
12
+ set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
13
+ set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
14
+ set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
15
+
16
+ set(CMAKE_C_PLATFORM_ID "Linux")
17
+ set(CMAKE_C_SIMULATE_ID "")
18
+ set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
19
+ set(CMAKE_C_SIMULATE_VERSION "")
20
+
21
+
22
+
23
+
24
+ set(CMAKE_AR "/usr/bin/ar")
25
+ set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
26
+ set(CMAKE_RANLIB "/usr/bin/ranlib")
27
+ set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
28
+ set(CMAKE_LINKER "/usr/bin/ld")
29
+ set(CMAKE_MT "")
30
+ set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
31
+ set(CMAKE_COMPILER_IS_GNUCC 1)
32
+ set(CMAKE_C_COMPILER_LOADED 1)
33
+ set(CMAKE_C_COMPILER_WORKS TRUE)
34
+ set(CMAKE_C_ABI_COMPILED TRUE)
35
+
36
+ set(CMAKE_C_COMPILER_ENV_VAR "CC")
37
+
38
+ set(CMAKE_C_COMPILER_ID_RUN 1)
39
+ set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
40
+ set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
41
+ set(CMAKE_C_LINKER_PREFERENCE 10)
42
+ set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE)
43
+
44
+ # Save compiler ABI information.
45
+ set(CMAKE_C_SIZEOF_DATA_PTR "8")
46
+ set(CMAKE_C_COMPILER_ABI "ELF")
47
+ set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
48
+ set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
49
+
50
+ if(CMAKE_C_SIZEOF_DATA_PTR)
51
+ set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
52
+ endif()
53
+
54
+ if(CMAKE_C_COMPILER_ABI)
55
+ set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
56
+ endif()
57
+
58
+ if(CMAKE_C_LIBRARY_ARCHITECTURE)
59
+ set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60
+ endif()
61
+
62
+ set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
63
+ if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
64
+ set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
65
+ endif()
66
+
67
+
68
+
69
+
70
+
71
+ set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
72
+ set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
73
+ set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
74
+ set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
gemma.cpp/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set(CMAKE_CXX_COMPILER "/usr/bin/c++")
2
+ set(CMAKE_CXX_COMPILER_ARG1 "")
3
+ set(CMAKE_CXX_COMPILER_ID "GNU")
4
+ set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
5
+ set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
6
+ set(CMAKE_CXX_COMPILER_WRAPPER "")
7
+ set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
8
+ set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
9
+ set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
10
+ set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
11
+ set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
12
+ set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
13
+ set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
14
+ set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
15
+ set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
16
+
17
+ set(CMAKE_CXX_PLATFORM_ID "Linux")
18
+ set(CMAKE_CXX_SIMULATE_ID "")
19
+ set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
20
+ set(CMAKE_CXX_SIMULATE_VERSION "")
21
+
22
+
23
+
24
+
25
+ set(CMAKE_AR "/usr/bin/ar")
26
+ set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
27
+ set(CMAKE_RANLIB "/usr/bin/ranlib")
28
+ set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
29
+ set(CMAKE_LINKER "/usr/bin/ld")
30
+ set(CMAKE_MT "")
31
+ set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
32
+ set(CMAKE_COMPILER_IS_GNUCXX 1)
33
+ set(CMAKE_CXX_COMPILER_LOADED 1)
34
+ set(CMAKE_CXX_COMPILER_WORKS TRUE)
35
+ set(CMAKE_CXX_ABI_COMPILED TRUE)
36
+
37
+ set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
38
+
39
+ set(CMAKE_CXX_COMPILER_ID_RUN 1)
40
+ set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
41
+ set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
42
+
43
+ foreach (lang C OBJC OBJCXX)
44
+ if (CMAKE_${lang}_COMPILER_ID_RUN)
45
+ foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
46
+ list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
47
+ endforeach()
48
+ endif()
49
+ endforeach()
50
+
51
+ set(CMAKE_CXX_LINKER_PREFERENCE 30)
52
+ set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
53
+ set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE)
54
+
55
+ # Save compiler ABI information.
56
+ set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
57
+ set(CMAKE_CXX_COMPILER_ABI "ELF")
58
+ set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
59
+ set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60
+
61
+ if(CMAKE_CXX_SIZEOF_DATA_PTR)
62
+ set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
63
+ endif()
64
+
65
+ if(CMAKE_CXX_COMPILER_ABI)
66
+ set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
67
+ endif()
68
+
69
+ if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
70
+ set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
71
+ endif()
72
+
73
+ set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
74
+ if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
75
+ set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
76
+ endif()
77
+
78
+
79
+
80
+
81
+
82
+ set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
83
+ set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
84
+ set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
85
+ set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1901373878efd64fb8d123f266ec93db00a3523087d52afa0fff59401a75ce
3
+ size 15968
gemma.cpp/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706369b9a080132db3bd9f26616f30a752f9376201eb47cfb747ef4b34d7120e
3
+ size 15992
gemma.cpp/build/CMakeFiles/3.27.9/CMakeSystem.cmake ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set(CMAKE_HOST_SYSTEM "Linux-6.1.85+")
2
+ set(CMAKE_HOST_SYSTEM_NAME "Linux")
3
+ set(CMAKE_HOST_SYSTEM_VERSION "6.1.85+")
4
+ set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
5
+
6
+
7
+
8
+ set(CMAKE_SYSTEM "Linux-6.1.85+")
9
+ set(CMAKE_SYSTEM_NAME "Linux")
10
+ set(CMAKE_SYSTEM_VERSION "6.1.85+")
11
+ set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12
+
13
+ set(CMAKE_CROSSCOMPILING "FALSE")
14
+
15
+ set(CMAKE_SYSTEM_LOADED 1)
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/CMakeCCompilerId.c ADDED
@@ -0,0 +1,866 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifdef __cplusplus
2
+ # error "A C++ compiler has been selected for C."
3
+ #endif
4
+
5
+ #if defined(__18CXX)
6
+ # define ID_VOID_MAIN
7
+ #endif
8
+ #if defined(__CLASSIC_C__)
9
+ /* cv-qualifiers did not exist in K&R C */
10
+ # define const
11
+ # define volatile
12
+ #endif
13
+
14
+ #if !defined(__has_include)
15
+ /* If the compiler does not have __has_include, pretend the answer is
16
+ always no. */
17
+ # define __has_include(x) 0
18
+ #endif
19
+
20
+
21
+ /* Version number components: V=Version, R=Revision, P=Patch
22
+ Version date components: YYYY=Year, MM=Month, DD=Day */
23
+
24
+ #if defined(__INTEL_COMPILER) || defined(__ICC)
25
+ # define COMPILER_ID "Intel"
26
+ # if defined(_MSC_VER)
27
+ # define SIMULATE_ID "MSVC"
28
+ # endif
29
+ # if defined(__GNUC__)
30
+ # define SIMULATE_ID "GNU"
31
+ # endif
32
+ /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
33
+ except that a few beta releases use the old format with V=2021. */
34
+ # if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
35
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
36
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
37
+ # if defined(__INTEL_COMPILER_UPDATE)
38
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
39
+ # else
40
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
41
+ # endif
42
+ # else
43
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
44
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
45
+ /* The third version component from --version is an update index,
46
+ but no macro is provided for it. */
47
+ # define COMPILER_VERSION_PATCH DEC(0)
48
+ # endif
49
+ # if defined(__INTEL_COMPILER_BUILD_DATE)
50
+ /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
51
+ # define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
52
+ # endif
53
+ # if defined(_MSC_VER)
54
+ /* _MSC_VER = VVRR */
55
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
56
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
57
+ # endif
58
+ # if defined(__GNUC__)
59
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
60
+ # elif defined(__GNUG__)
61
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
62
+ # endif
63
+ # if defined(__GNUC_MINOR__)
64
+ # define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
65
+ # endif
66
+ # if defined(__GNUC_PATCHLEVEL__)
67
+ # define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
68
+ # endif
69
+
70
+ #elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
71
+ # define COMPILER_ID "IntelLLVM"
72
+ #if defined(_MSC_VER)
73
+ # define SIMULATE_ID "MSVC"
74
+ #endif
75
+ #if defined(__GNUC__)
76
+ # define SIMULATE_ID "GNU"
77
+ #endif
78
+ /* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
79
+ * later. Look for 6 digit vs. 8 digit version number to decide encoding.
80
+ * VVVV is no smaller than the current year when a version is released.
81
+ */
82
+ #if __INTEL_LLVM_COMPILER < 1000000L
83
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
84
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
85
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10)
86
+ #else
87
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
88
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
89
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100)
90
+ #endif
91
+ #if defined(_MSC_VER)
92
+ /* _MSC_VER = VVRR */
93
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
94
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
95
+ #endif
96
+ #if defined(__GNUC__)
97
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
98
+ #elif defined(__GNUG__)
99
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
100
+ #endif
101
+ #if defined(__GNUC_MINOR__)
102
+ # define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
103
+ #endif
104
+ #if defined(__GNUC_PATCHLEVEL__)
105
+ # define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
106
+ #endif
107
+
108
+ #elif defined(__PATHCC__)
109
+ # define COMPILER_ID "PathScale"
110
+ # define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
111
+ # define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
112
+ # if defined(__PATHCC_PATCHLEVEL__)
113
+ # define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
114
+ # endif
115
+
116
+ #elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
117
+ # define COMPILER_ID "Embarcadero"
118
+ # define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
119
+ # define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
120
+ # define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
121
+
122
+ #elif defined(__BORLANDC__)
123
+ # define COMPILER_ID "Borland"
124
+ /* __BORLANDC__ = 0xVRR */
125
+ # define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
126
+ # define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
127
+
128
+ #elif defined(__WATCOMC__) && __WATCOMC__ < 1200
129
+ # define COMPILER_ID "Watcom"
130
+ /* __WATCOMC__ = VVRR */
131
+ # define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
132
+ # define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
133
+ # if (__WATCOMC__ % 10) > 0
134
+ # define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
135
+ # endif
136
+
137
+ #elif defined(__WATCOMC__)
138
+ # define COMPILER_ID "OpenWatcom"
139
+ /* __WATCOMC__ = VVRP + 1100 */
140
+ # define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
141
+ # define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
142
+ # if (__WATCOMC__ % 10) > 0
143
+ # define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
144
+ # endif
145
+
146
+ #elif defined(__SUNPRO_C)
147
+ # define COMPILER_ID "SunPro"
148
+ # if __SUNPRO_C >= 0x5100
149
+ /* __SUNPRO_C = 0xVRRP */
150
+ # define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12)
151
+ # define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF)
152
+ # define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF)
153
+ # else
154
+ /* __SUNPRO_CC = 0xVRP */
155
+ # define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8)
156
+ # define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF)
157
+ # define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF)
158
+ # endif
159
+
160
+ #elif defined(__HP_cc)
161
+ # define COMPILER_ID "HP"
162
+ /* __HP_cc = VVRRPP */
163
+ # define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000)
164
+ # define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100)
165
+ # define COMPILER_VERSION_PATCH DEC(__HP_cc % 100)
166
+
167
+ #elif defined(__DECC)
168
+ # define COMPILER_ID "Compaq"
169
+ /* __DECC_VER = VVRRTPPPP */
170
+ # define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000)
171
+ # define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000 % 100)
172
+ # define COMPILER_VERSION_PATCH DEC(__DECC_VER % 10000)
173
+
174
+ #elif defined(__IBMC__) && defined(__COMPILER_VER__)
175
+ # define COMPILER_ID "zOS"
176
+ /* __IBMC__ = VRP */
177
+ # define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
178
+ # define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
179
+ # define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10)
180
+
181
+ #elif defined(__open_xl__) && defined(__clang__)
182
+ # define COMPILER_ID "IBMClang"
183
+ # define COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
184
+ # define COMPILER_VERSION_MINOR DEC(__open_xl_release__)
185
+ # define COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
186
+ # define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
187
+
188
+
189
+ #elif defined(__ibmxl__) && defined(__clang__)
190
+ # define COMPILER_ID "XLClang"
191
+ # define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
192
+ # define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
193
+ # define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
194
+ # define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
195
+
196
+
197
+ #elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800
198
+ # define COMPILER_ID "XL"
199
+ /* __IBMC__ = VRP */
200
+ # define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
201
+ # define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
202
+ # define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10)
203
+
204
+ #elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800
205
+ # define COMPILER_ID "VisualAge"
206
+ /* __IBMC__ = VRP */
207
+ # define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
208
+ # define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
209
+ # define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10)
210
+
211
+ #elif defined(__NVCOMPILER)
212
+ # define COMPILER_ID "NVHPC"
213
+ # define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
214
+ # define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
215
+ # if defined(__NVCOMPILER_PATCHLEVEL__)
216
+ # define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
217
+ # endif
218
+
219
+ #elif defined(__PGI)
220
+ # define COMPILER_ID "PGI"
221
+ # define COMPILER_VERSION_MAJOR DEC(__PGIC__)
222
+ # define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
223
+ # if defined(__PGIC_PATCHLEVEL__)
224
+ # define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
225
+ # endif
226
+
227
+ #elif defined(_CRAYC)
228
+ # define COMPILER_ID "Cray"
229
+ # define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
230
+ # define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
231
+
232
+ #elif defined(__TI_COMPILER_VERSION__)
233
+ # define COMPILER_ID "TI"
234
+ /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
235
+ # define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
236
+ # define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
237
+ # define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
238
+
239
+ #elif defined(__CLANG_FUJITSU)
240
+ # define COMPILER_ID "FujitsuClang"
241
+ # define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
242
+ # define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
243
+ # define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
244
+ # define COMPILER_VERSION_INTERNAL_STR __clang_version__
245
+
246
+
247
+ #elif defined(__FUJITSU)
248
+ # define COMPILER_ID "Fujitsu"
249
+ # if defined(__FCC_version__)
250
+ # define COMPILER_VERSION __FCC_version__
251
+ # elif defined(__FCC_major__)
252
+ # define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
253
+ # define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
254
+ # define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
255
+ # endif
256
+ # if defined(__fcc_version)
257
+ # define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
258
+ # elif defined(__FCC_VERSION)
259
+ # define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
260
+ # endif
261
+
262
+
263
+ #elif defined(__ghs__)
264
+ # define COMPILER_ID "GHS"
265
+ /* __GHS_VERSION_NUMBER = VVVVRP */
266
+ # ifdef __GHS_VERSION_NUMBER
267
+ # define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
268
+ # define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
269
+ # define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
270
+ # endif
271
+
272
+ #elif defined(__TASKING__)
273
+ # define COMPILER_ID "Tasking"
274
+ # define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
275
+ # define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
276
+ # define COMPILER_VERSION_INTERNAL DEC(__VERSION__)
277
+
278
+ #elif defined(__TINYC__)
279
+ # define COMPILER_ID "TinyCC"
280
+
281
+ #elif defined(__BCC__)
282
+ # define COMPILER_ID "Bruce"
283
+
284
+ #elif defined(__SCO_VERSION__)
285
+ # define COMPILER_ID "SCO"
286
+
287
+ #elif defined(__ARMCC_VERSION) && !defined(__clang__)
288
+ # define COMPILER_ID "ARMCC"
289
+ #if __ARMCC_VERSION >= 1000000
290
+ /* __ARMCC_VERSION = VRRPPPP */
291
+ # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
292
+ # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
293
+ # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
294
+ #else
295
+ /* __ARMCC_VERSION = VRPPPP */
296
+ # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
297
+ # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
298
+ # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
299
+ #endif
300
+
301
+
302
+ #elif defined(__clang__) && defined(__apple_build_version__)
303
+ # define COMPILER_ID "AppleClang"
304
+ # if defined(_MSC_VER)
305
+ # define SIMULATE_ID "MSVC"
306
+ # endif
307
+ # define COMPILER_VERSION_MAJOR DEC(__clang_major__)
308
+ # define COMPILER_VERSION_MINOR DEC(__clang_minor__)
309
+ # define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
310
+ # if defined(_MSC_VER)
311
+ /* _MSC_VER = VVRR */
312
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
313
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
314
+ # endif
315
+ # define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
316
+
317
+ #elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
318
+ # define COMPILER_ID "ARMClang"
319
+ # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
320
+ # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
321
+ # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100 % 100)
322
+ # define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
323
+
324
+ #elif defined(__clang__)
325
+ # define COMPILER_ID "Clang"
326
+ # if defined(_MSC_VER)
327
+ # define SIMULATE_ID "MSVC"
328
+ # endif
329
+ # define COMPILER_VERSION_MAJOR DEC(__clang_major__)
330
+ # define COMPILER_VERSION_MINOR DEC(__clang_minor__)
331
+ # define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
332
+ # if defined(_MSC_VER)
333
+ /* _MSC_VER = VVRR */
334
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
335
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
336
+ # endif
337
+
338
+ #elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
339
+ # define COMPILER_ID "LCC"
340
+ # define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
341
+ # define COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
342
+ # if defined(__LCC_MINOR__)
343
+ # define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
344
+ # endif
345
+ # if defined(__GNUC__) && defined(__GNUC_MINOR__)
346
+ # define SIMULATE_ID "GNU"
347
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
348
+ # define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
349
+ # if defined(__GNUC_PATCHLEVEL__)
350
+ # define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
351
+ # endif
352
+ # endif
353
+
354
+ #elif defined(__GNUC__)
355
+ # define COMPILER_ID "GNU"
356
+ # define COMPILER_VERSION_MAJOR DEC(__GNUC__)
357
+ # if defined(__GNUC_MINOR__)
358
+ # define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
359
+ # endif
360
+ # if defined(__GNUC_PATCHLEVEL__)
361
+ # define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
362
+ # endif
363
+
364
+ #elif defined(_MSC_VER)
365
+ # define COMPILER_ID "MSVC"
366
+ /* _MSC_VER = VVRR */
367
+ # define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
368
+ # define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
369
+ # if defined(_MSC_FULL_VER)
370
+ # if _MSC_VER >= 1400
371
+ /* _MSC_FULL_VER = VVRRPPPPP */
372
+ # define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
373
+ # else
374
+ /* _MSC_FULL_VER = VVRRPPPP */
375
+ # define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
376
+ # endif
377
+ # endif
378
+ # if defined(_MSC_BUILD)
379
+ # define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
380
+ # endif
381
+
382
+ #elif defined(_ADI_COMPILER)
383
+ # define COMPILER_ID "ADSP"
384
+ #if defined(__VERSIONNUM__)
385
+ /* __VERSIONNUM__ = 0xVVRRPPTT */
386
+ # define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
387
+ # define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
388
+ # define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
389
+ # define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
390
+ #endif
391
+
392
+ #elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
393
+ # define COMPILER_ID "IAR"
394
+ # if defined(__VER__) && defined(__ICCARM__)
395
+ # define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
396
+ # define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
397
+ # define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
398
+ # define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
399
+ # elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
400
+ # define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
401
+ # define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
402
+ # define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
403
+ # define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
404
+ # endif
405
+
406
+ #elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC)
407
+ # define COMPILER_ID "SDCC"
408
+ # if defined(__SDCC_VERSION_MAJOR)
409
+ # define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR)
410
+ # define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR)
411
+ # define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH)
412
+ # else
413
+ /* SDCC = VRP */
414
+ # define COMPILER_VERSION_MAJOR DEC(SDCC/100)
415
+ # define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10)
416
+ # define COMPILER_VERSION_PATCH DEC(SDCC % 10)
417
+ # endif
418
+
419
+
420
+ /* These compilers are either not known or too old to define an
421
+ identification macro. Try to identify the platform and guess that
422
+ it is the native compiler. */
423
+ #elif defined(__hpux) || defined(__hpua)
424
+ # define COMPILER_ID "HP"
425
+
426
+ #else /* unknown compiler */
427
+ # define COMPILER_ID ""
428
+ #endif
429
+
430
+ /* Construct the string literal in pieces to prevent the source from
431
+ getting matched. Store it in a pointer rather than an array
432
+ because some compilers will just produce instructions to fill the
433
+ array rather than assigning a pointer to a static array. */
434
+ char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
435
+ #ifdef SIMULATE_ID
436
+ char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
437
+ #endif
438
+
439
+ #ifdef __QNXNTO__
440
+ char const* qnxnto = "INFO" ":" "qnxnto[]";
441
+ #endif
442
+
443
+ #if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
444
+ char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
445
+ #endif
446
+
447
+ #define STRINGIFY_HELPER(X) #X
448
+ #define STRINGIFY(X) STRINGIFY_HELPER(X)
449
+
450
+ /* Identify known platforms by name. */
451
+ #if defined(__linux) || defined(__linux__) || defined(linux)
452
+ # define PLATFORM_ID "Linux"
453
+
454
+ #elif defined(__MSYS__)
455
+ # define PLATFORM_ID "MSYS"
456
+
457
+ #elif defined(__CYGWIN__)
458
+ # define PLATFORM_ID "Cygwin"
459
+
460
+ #elif defined(__MINGW32__)
461
+ # define PLATFORM_ID "MinGW"
462
+
463
+ #elif defined(__APPLE__)
464
+ # define PLATFORM_ID "Darwin"
465
+
466
+ #elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
467
+ # define PLATFORM_ID "Windows"
468
+
469
+ #elif defined(__FreeBSD__) || defined(__FreeBSD)
470
+ # define PLATFORM_ID "FreeBSD"
471
+
472
+ #elif defined(__NetBSD__) || defined(__NetBSD)
473
+ # define PLATFORM_ID "NetBSD"
474
+
475
+ #elif defined(__OpenBSD__) || defined(__OPENBSD)
476
+ # define PLATFORM_ID "OpenBSD"
477
+
478
+ #elif defined(__sun) || defined(sun)
479
+ # define PLATFORM_ID "SunOS"
480
+
481
+ #elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
482
+ # define PLATFORM_ID "AIX"
483
+
484
+ #elif defined(__hpux) || defined(__hpux__)
485
+ # define PLATFORM_ID "HP-UX"
486
+
487
+ #elif defined(__HAIKU__)
488
+ # define PLATFORM_ID "Haiku"
489
+
490
+ #elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
491
+ # define PLATFORM_ID "BeOS"
492
+
493
+ #elif defined(__QNX__) || defined(__QNXNTO__)
494
+ # define PLATFORM_ID "QNX"
495
+
496
+ #elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
497
+ # define PLATFORM_ID "Tru64"
498
+
499
+ #elif defined(__riscos) || defined(__riscos__)
500
+ # define PLATFORM_ID "RISCos"
501
+
502
+ #elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
503
+ # define PLATFORM_ID "SINIX"
504
+
505
+ #elif defined(__UNIX_SV__)
506
+ # define PLATFORM_ID "UNIX_SV"
507
+
508
+ #elif defined(__bsdos__)
509
+ # define PLATFORM_ID "BSDOS"
510
+
511
+ #elif defined(_MPRAS) || defined(MPRAS)
512
+ # define PLATFORM_ID "MP-RAS"
513
+
514
+ #elif defined(__osf) || defined(__osf__)
515
+ # define PLATFORM_ID "OSF1"
516
+
517
+ #elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
518
+ # define PLATFORM_ID "SCO_SV"
519
+
520
+ #elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
521
+ # define PLATFORM_ID "ULTRIX"
522
+
523
+ #elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
524
+ # define PLATFORM_ID "Xenix"
525
+
526
+ #elif defined(__WATCOMC__)
527
+ # if defined(__LINUX__)
528
+ # define PLATFORM_ID "Linux"
529
+
530
+ # elif defined(__DOS__)
531
+ # define PLATFORM_ID "DOS"
532
+
533
+ # elif defined(__OS2__)
534
+ # define PLATFORM_ID "OS2"
535
+
536
+ # elif defined(__WINDOWS__)
537
+ # define PLATFORM_ID "Windows3x"
538
+
539
+ # elif defined(__VXWORKS__)
540
+ # define PLATFORM_ID "VxWorks"
541
+
542
+ # else /* unknown platform */
543
+ # define PLATFORM_ID
544
+ # endif
545
+
546
+ #elif defined(__INTEGRITY)
547
+ # if defined(INT_178B)
548
+ # define PLATFORM_ID "Integrity178"
549
+
550
+ # else /* regular Integrity */
551
+ # define PLATFORM_ID "Integrity"
552
+ # endif
553
+
554
+ # elif defined(_ADI_COMPILER)
555
+ # define PLATFORM_ID "ADSP"
556
+
557
+ #else /* unknown platform */
558
+ # define PLATFORM_ID
559
+
560
+ #endif
561
+
562
+ /* For windows compilers MSVC and Intel we can determine
563
+ the architecture of the compiler being used. This is because
564
+ the compilers do not have flags that can change the architecture,
565
+ but rather depend on which compiler is being used
566
+ */
567
+ #if defined(_WIN32) && defined(_MSC_VER)
568
+ # if defined(_M_IA64)
569
+ # define ARCHITECTURE_ID "IA64"
570
+
571
+ # elif defined(_M_ARM64EC)
572
+ # define ARCHITECTURE_ID "ARM64EC"
573
+
574
+ # elif defined(_M_X64) || defined(_M_AMD64)
575
+ # define ARCHITECTURE_ID "x64"
576
+
577
+ # elif defined(_M_IX86)
578
+ # define ARCHITECTURE_ID "X86"
579
+
580
+ # elif defined(_M_ARM64)
581
+ # define ARCHITECTURE_ID "ARM64"
582
+
583
+ # elif defined(_M_ARM)
584
+ # if _M_ARM == 4
585
+ # define ARCHITECTURE_ID "ARMV4I"
586
+ # elif _M_ARM == 5
587
+ # define ARCHITECTURE_ID "ARMV5I"
588
+ # else
589
+ # define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
590
+ # endif
591
+
592
+ # elif defined(_M_MIPS)
593
+ # define ARCHITECTURE_ID "MIPS"
594
+
595
+ # elif defined(_M_SH)
596
+ # define ARCHITECTURE_ID "SHx"
597
+
598
+ # else /* unknown architecture */
599
+ # define ARCHITECTURE_ID ""
600
+ # endif
601
+
602
+ #elif defined(__WATCOMC__)
603
+ # if defined(_M_I86)
604
+ # define ARCHITECTURE_ID "I86"
605
+
606
+ # elif defined(_M_IX86)
607
+ # define ARCHITECTURE_ID "X86"
608
+
609
+ # else /* unknown architecture */
610
+ # define ARCHITECTURE_ID ""
611
+ # endif
612
+
613
+ #elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
614
+ # if defined(__ICCARM__)
615
+ # define ARCHITECTURE_ID "ARM"
616
+
617
+ # elif defined(__ICCRX__)
618
+ # define ARCHITECTURE_ID "RX"
619
+
620
+ # elif defined(__ICCRH850__)
621
+ # define ARCHITECTURE_ID "RH850"
622
+
623
+ # elif defined(__ICCRL78__)
624
+ # define ARCHITECTURE_ID "RL78"
625
+
626
+ # elif defined(__ICCRISCV__)
627
+ # define ARCHITECTURE_ID "RISCV"
628
+
629
+ # elif defined(__ICCAVR__)
630
+ # define ARCHITECTURE_ID "AVR"
631
+
632
+ # elif defined(__ICC430__)
633
+ # define ARCHITECTURE_ID "MSP430"
634
+
635
+ # elif defined(__ICCV850__)
636
+ # define ARCHITECTURE_ID "V850"
637
+
638
+ # elif defined(__ICC8051__)
639
+ # define ARCHITECTURE_ID "8051"
640
+
641
+ # elif defined(__ICCSTM8__)
642
+ # define ARCHITECTURE_ID "STM8"
643
+
644
+ # else /* unknown architecture */
645
+ # define ARCHITECTURE_ID ""
646
+ # endif
647
+
648
+ #elif defined(__ghs__)
649
+ # if defined(__PPC64__)
650
+ # define ARCHITECTURE_ID "PPC64"
651
+
652
+ # elif defined(__ppc__)
653
+ # define ARCHITECTURE_ID "PPC"
654
+
655
+ # elif defined(__ARM__)
656
+ # define ARCHITECTURE_ID "ARM"
657
+
658
+ # elif defined(__x86_64__)
659
+ # define ARCHITECTURE_ID "x64"
660
+
661
+ # elif defined(__i386__)
662
+ # define ARCHITECTURE_ID "X86"
663
+
664
+ # else /* unknown architecture */
665
+ # define ARCHITECTURE_ID ""
666
+ # endif
667
+
668
+ #elif defined(__TI_COMPILER_VERSION__)
669
+ # if defined(__TI_ARM__)
670
+ # define ARCHITECTURE_ID "ARM"
671
+
672
+ # elif defined(__MSP430__)
673
+ # define ARCHITECTURE_ID "MSP430"
674
+
675
+ # elif defined(__TMS320C28XX__)
676
+ # define ARCHITECTURE_ID "TMS320C28x"
677
+
678
+ # elif defined(__TMS320C6X__) || defined(_TMS320C6X)
679
+ # define ARCHITECTURE_ID "TMS320C6x"
680
+
681
+ # else /* unknown architecture */
682
+ # define ARCHITECTURE_ID ""
683
+ # endif
684
+
685
+ # elif defined(__ADSPSHARC__)
686
+ # define ARCHITECTURE_ID "SHARC"
687
+
688
+ # elif defined(__ADSPBLACKFIN__)
689
+ # define ARCHITECTURE_ID "Blackfin"
690
+
691
+ #elif defined(__TASKING__)
692
+
693
+ # if defined(__CTC__) || defined(__CPTC__)
694
+ # define ARCHITECTURE_ID "TriCore"
695
+
696
+ # elif defined(__CMCS__)
697
+ # define ARCHITECTURE_ID "MCS"
698
+
699
+ # elif defined(__CARM__)
700
+ # define ARCHITECTURE_ID "ARM"
701
+
702
+ # elif defined(__CARC__)
703
+ # define ARCHITECTURE_ID "ARC"
704
+
705
+ # elif defined(__C51__)
706
+ # define ARCHITECTURE_ID "8051"
707
+
708
+ # elif defined(__CPCP__)
709
+ # define ARCHITECTURE_ID "PCP"
710
+
711
+ # else
712
+ # define ARCHITECTURE_ID ""
713
+ # endif
714
+
715
+ #else
716
+ # define ARCHITECTURE_ID
717
+ #endif
718
+
719
+ /* Convert integer to decimal digit literals. */
720
+ #define DEC(n) \
721
+ ('0' + (((n) / 10000000)%10)), \
722
+ ('0' + (((n) / 1000000)%10)), \
723
+ ('0' + (((n) / 100000)%10)), \
724
+ ('0' + (((n) / 10000)%10)), \
725
+ ('0' + (((n) / 1000)%10)), \
726
+ ('0' + (((n) / 100)%10)), \
727
+ ('0' + (((n) / 10)%10)), \
728
+ ('0' + ((n) % 10))
729
+
730
+ /* Convert integer to hex digit literals. */
731
+ #define HEX(n) \
732
+ ('0' + ((n)>>28 & 0xF)), \
733
+ ('0' + ((n)>>24 & 0xF)), \
734
+ ('0' + ((n)>>20 & 0xF)), \
735
+ ('0' + ((n)>>16 & 0xF)), \
736
+ ('0' + ((n)>>12 & 0xF)), \
737
+ ('0' + ((n)>>8 & 0xF)), \
738
+ ('0' + ((n)>>4 & 0xF)), \
739
+ ('0' + ((n) & 0xF))
740
+
741
+ /* Construct a string literal encoding the version number. */
742
+ #ifdef COMPILER_VERSION
743
+ char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
744
+
745
+ /* Construct a string literal encoding the version number components. */
746
+ #elif defined(COMPILER_VERSION_MAJOR)
747
+ char const info_version[] = {
748
+ 'I', 'N', 'F', 'O', ':',
749
+ 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
750
+ COMPILER_VERSION_MAJOR,
751
+ # ifdef COMPILER_VERSION_MINOR
752
+ '.', COMPILER_VERSION_MINOR,
753
+ # ifdef COMPILER_VERSION_PATCH
754
+ '.', COMPILER_VERSION_PATCH,
755
+ # ifdef COMPILER_VERSION_TWEAK
756
+ '.', COMPILER_VERSION_TWEAK,
757
+ # endif
758
+ # endif
759
+ # endif
760
+ ']','\0'};
761
+ #endif
762
+
763
+ /* Construct a string literal encoding the internal version number. */
764
+ #ifdef COMPILER_VERSION_INTERNAL
765
+ char const info_version_internal[] = {
766
+ 'I', 'N', 'F', 'O', ':',
767
+ 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
768
+ 'i','n','t','e','r','n','a','l','[',
769
+ COMPILER_VERSION_INTERNAL,']','\0'};
770
+ #elif defined(COMPILER_VERSION_INTERNAL_STR)
771
+ char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
772
+ #endif
773
+
774
+ /* Construct a string literal encoding the version number components. */
775
+ #ifdef SIMULATE_VERSION_MAJOR
776
+ char const info_simulate_version[] = {
777
+ 'I', 'N', 'F', 'O', ':',
778
+ 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
779
+ SIMULATE_VERSION_MAJOR,
780
+ # ifdef SIMULATE_VERSION_MINOR
781
+ '.', SIMULATE_VERSION_MINOR,
782
+ # ifdef SIMULATE_VERSION_PATCH
783
+ '.', SIMULATE_VERSION_PATCH,
784
+ # ifdef SIMULATE_VERSION_TWEAK
785
+ '.', SIMULATE_VERSION_TWEAK,
786
+ # endif
787
+ # endif
788
+ # endif
789
+ ']','\0'};
790
+ #endif
791
+
792
+ /* Construct the string literal in pieces to prevent the source from
793
+ getting matched. Store it in a pointer rather than an array
794
+ because some compilers will just produce instructions to fill the
795
+ array rather than assigning a pointer to a static array. */
796
+ char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
797
+ char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
798
+
799
+
800
+
801
+ #if !defined(__STDC__) && !defined(__clang__)
802
+ # if defined(_MSC_VER) || defined(__ibmxl__) || defined(__IBMC__)
803
+ # define C_VERSION "90"
804
+ # else
805
+ # define C_VERSION
806
+ # endif
807
+ #elif __STDC_VERSION__ > 201710L
808
+ # define C_VERSION "23"
809
+ #elif __STDC_VERSION__ >= 201710L
810
+ # define C_VERSION "17"
811
+ #elif __STDC_VERSION__ >= 201000L
812
+ # define C_VERSION "11"
813
+ #elif __STDC_VERSION__ >= 199901L
814
+ # define C_VERSION "99"
815
+ #else
816
+ # define C_VERSION "90"
817
+ #endif
818
+ const char* info_language_standard_default =
819
+ "INFO" ":" "standard_default[" C_VERSION "]";
820
+
821
+ const char* info_language_extensions_default = "INFO" ":" "extensions_default["
822
+ #if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) || \
823
+ defined(__TI_COMPILER_VERSION__)) && \
824
+ !defined(__STRICT_ANSI__)
825
+ "ON"
826
+ #else
827
+ "OFF"
828
+ #endif
829
+ "]";
830
+
831
+ /*--------------------------------------------------------------------------*/
832
+
833
+ #ifdef ID_VOID_MAIN
834
+ void main() {}
835
+ #else
836
+ # if defined(__CLASSIC_C__)
837
+ int main(argc, argv) int argc; char *argv[];
838
+ # else
839
+ int main(int argc, char* argv[])
840
+ # endif
841
+ {
842
+ int require = 0;
843
+ require += info_compiler[argc];
844
+ require += info_platform[argc];
845
+ require += info_arch[argc];
846
+ #ifdef COMPILER_VERSION_MAJOR
847
+ require += info_version[argc];
848
+ #endif
849
+ #ifdef COMPILER_VERSION_INTERNAL
850
+ require += info_version_internal[argc];
851
+ #endif
852
+ #ifdef SIMULATE_ID
853
+ require += info_simulate[argc];
854
+ #endif
855
+ #ifdef SIMULATE_VERSION_MAJOR
856
+ require += info_simulate_version[argc];
857
+ #endif
858
+ #if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
859
+ require += info_cray[argc];
860
+ #endif
861
+ require += info_language_standard_default[argc];
862
+ require += info_language_extensions_default[argc];
863
+ (void)argv;
864
+ return require;
865
+ }
866
+ #endif
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdC/a.out ADDED
Binary file (16.1 kB). View file
 
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/CMakeCXXCompilerId.cpp ADDED
@@ -0,0 +1,855 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* This source file must have a .cpp extension so that all C++ compilers
2
+ recognize the extension without flags. Borland does not know .cxx for
3
+ example. */
4
+ #ifndef __cplusplus
5
+ # error "A C compiler has been selected for C++."
6
+ #endif
7
+
8
+ #if !defined(__has_include)
9
+ /* If the compiler does not have __has_include, pretend the answer is
10
+ always no. */
11
+ # define __has_include(x) 0
12
+ #endif
13
+
14
+
15
+ /* Version number components: V=Version, R=Revision, P=Patch
16
+ Version date components: YYYY=Year, MM=Month, DD=Day */
17
+
18
+ #if defined(__COMO__)
19
+ # define COMPILER_ID "Comeau"
20
+ /* __COMO_VERSION__ = VRR */
21
+ # define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100)
22
+ # define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100)
23
+
24
+ #elif defined(__INTEL_COMPILER) || defined(__ICC)
25
+ # define COMPILER_ID "Intel"
26
+ # if defined(_MSC_VER)
27
+ # define SIMULATE_ID "MSVC"
28
+ # endif
29
+ # if defined(__GNUC__)
30
+ # define SIMULATE_ID "GNU"
31
+ # endif
32
+ /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
33
+ except that a few beta releases use the old format with V=2021. */
34
+ # if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
35
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
36
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
37
+ # if defined(__INTEL_COMPILER_UPDATE)
38
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
39
+ # else
40
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
41
+ # endif
42
+ # else
43
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
44
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
45
+ /* The third version component from --version is an update index,
46
+ but no macro is provided for it. */
47
+ # define COMPILER_VERSION_PATCH DEC(0)
48
+ # endif
49
+ # if defined(__INTEL_COMPILER_BUILD_DATE)
50
+ /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
51
+ # define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
52
+ # endif
53
+ # if defined(_MSC_VER)
54
+ /* _MSC_VER = VVRR */
55
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
56
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
57
+ # endif
58
+ # if defined(__GNUC__)
59
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
60
+ # elif defined(__GNUG__)
61
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
62
+ # endif
63
+ # if defined(__GNUC_MINOR__)
64
+ # define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
65
+ # endif
66
+ # if defined(__GNUC_PATCHLEVEL__)
67
+ # define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
68
+ # endif
69
+
70
+ #elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
71
+ # define COMPILER_ID "IntelLLVM"
72
+ #if defined(_MSC_VER)
73
+ # define SIMULATE_ID "MSVC"
74
+ #endif
75
+ #if defined(__GNUC__)
76
+ # define SIMULATE_ID "GNU"
77
+ #endif
78
+ /* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
79
+ * later. Look for 6 digit vs. 8 digit version number to decide encoding.
80
+ * VVVV is no smaller than the current year when a version is released.
81
+ */
82
+ #if __INTEL_LLVM_COMPILER < 1000000L
83
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
84
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
85
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10)
86
+ #else
87
+ # define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
88
+ # define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
89
+ # define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100)
90
+ #endif
91
+ #if defined(_MSC_VER)
92
+ /* _MSC_VER = VVRR */
93
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
94
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
95
+ #endif
96
+ #if defined(__GNUC__)
97
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
98
+ #elif defined(__GNUG__)
99
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
100
+ #endif
101
+ #if defined(__GNUC_MINOR__)
102
+ # define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
103
+ #endif
104
+ #if defined(__GNUC_PATCHLEVEL__)
105
+ # define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
106
+ #endif
107
+
108
+ #elif defined(__PATHCC__)
109
+ # define COMPILER_ID "PathScale"
110
+ # define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
111
+ # define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
112
+ # if defined(__PATHCC_PATCHLEVEL__)
113
+ # define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
114
+ # endif
115
+
116
+ #elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
117
+ # define COMPILER_ID "Embarcadero"
118
+ # define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
119
+ # define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
120
+ # define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
121
+
122
+ #elif defined(__BORLANDC__)
123
+ # define COMPILER_ID "Borland"
124
+ /* __BORLANDC__ = 0xVRR */
125
+ # define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
126
+ # define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
127
+
128
+ #elif defined(__WATCOMC__) && __WATCOMC__ < 1200
129
+ # define COMPILER_ID "Watcom"
130
+ /* __WATCOMC__ = VVRR */
131
+ # define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
132
+ # define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
133
+ # if (__WATCOMC__ % 10) > 0
134
+ # define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
135
+ # endif
136
+
137
+ #elif defined(__WATCOMC__)
138
+ # define COMPILER_ID "OpenWatcom"
139
+ /* __WATCOMC__ = VVRP + 1100 */
140
+ # define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
141
+ # define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
142
+ # if (__WATCOMC__ % 10) > 0
143
+ # define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
144
+ # endif
145
+
146
+ #elif defined(__SUNPRO_CC)
147
+ # define COMPILER_ID "SunPro"
148
+ # if __SUNPRO_CC >= 0x5100
149
+ /* __SUNPRO_CC = 0xVRRP */
150
+ # define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
151
+ # define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
152
+ # define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
153
+ # else
154
+ /* __SUNPRO_CC = 0xVRP */
155
+ # define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
156
+ # define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
157
+ # define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
158
+ # endif
159
+
160
+ #elif defined(__HP_aCC)
161
+ # define COMPILER_ID "HP"
162
+ /* __HP_aCC = VVRRPP */
163
+ # define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
164
+ # define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
165
+ # define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100)
166
+
167
+ #elif defined(__DECCXX)
168
+ # define COMPILER_ID "Compaq"
169
+ /* __DECCXX_VER = VVRRTPPPP */
170
+ # define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
171
+ # define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100)
172
+ # define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000)
173
+
174
+ #elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
175
+ # define COMPILER_ID "zOS"
176
+ /* __IBMCPP__ = VRP */
177
+ # define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
178
+ # define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
179
+ # define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
180
+
181
+ #elif defined(__open_xl__) && defined(__clang__)
182
+ # define COMPILER_ID "IBMClang"
183
+ # define COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
184
+ # define COMPILER_VERSION_MINOR DEC(__open_xl_release__)
185
+ # define COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
186
+ # define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
187
+
188
+
189
+ #elif defined(__ibmxl__) && defined(__clang__)
190
+ # define COMPILER_ID "XLClang"
191
+ # define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
192
+ # define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
193
+ # define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
194
+ # define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
195
+
196
+
197
+ #elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
198
+ # define COMPILER_ID "XL"
199
+ /* __IBMCPP__ = VRP */
200
+ # define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
201
+ # define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
202
+ # define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
203
+
204
+ #elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
205
+ # define COMPILER_ID "VisualAge"
206
+ /* __IBMCPP__ = VRP */
207
+ # define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
208
+ # define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
209
+ # define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
210
+
211
+ #elif defined(__NVCOMPILER)
212
+ # define COMPILER_ID "NVHPC"
213
+ # define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
214
+ # define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
215
+ # if defined(__NVCOMPILER_PATCHLEVEL__)
216
+ # define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
217
+ # endif
218
+
219
+ #elif defined(__PGI)
220
+ # define COMPILER_ID "PGI"
221
+ # define COMPILER_VERSION_MAJOR DEC(__PGIC__)
222
+ # define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
223
+ # if defined(__PGIC_PATCHLEVEL__)
224
+ # define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
225
+ # endif
226
+
227
+ #elif defined(_CRAYC)
228
+ # define COMPILER_ID "Cray"
229
+ # define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
230
+ # define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
231
+
232
+ #elif defined(__TI_COMPILER_VERSION__)
233
+ # define COMPILER_ID "TI"
234
+ /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
235
+ # define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
236
+ # define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
237
+ # define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
238
+
239
+ #elif defined(__CLANG_FUJITSU)
240
+ # define COMPILER_ID "FujitsuClang"
241
+ # define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
242
+ # define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
243
+ # define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
244
+ # define COMPILER_VERSION_INTERNAL_STR __clang_version__
245
+
246
+
247
+ #elif defined(__FUJITSU)
248
+ # define COMPILER_ID "Fujitsu"
249
+ # if defined(__FCC_version__)
250
+ # define COMPILER_VERSION __FCC_version__
251
+ # elif defined(__FCC_major__)
252
+ # define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
253
+ # define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
254
+ # define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
255
+ # endif
256
+ # if defined(__fcc_version)
257
+ # define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
258
+ # elif defined(__FCC_VERSION)
259
+ # define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
260
+ # endif
261
+
262
+
263
+ #elif defined(__ghs__)
264
+ # define COMPILER_ID "GHS"
265
+ /* __GHS_VERSION_NUMBER = VVVVRP */
266
+ # ifdef __GHS_VERSION_NUMBER
267
+ # define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
268
+ # define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
269
+ # define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
270
+ # endif
271
+
272
+ #elif defined(__TASKING__)
273
+ # define COMPILER_ID "Tasking"
274
+ # define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
275
+ # define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
276
+ # define COMPILER_VERSION_INTERNAL DEC(__VERSION__)
277
+
278
+ #elif defined(__SCO_VERSION__)
279
+ # define COMPILER_ID "SCO"
280
+
281
+ #elif defined(__ARMCC_VERSION) && !defined(__clang__)
282
+ # define COMPILER_ID "ARMCC"
283
+ #if __ARMCC_VERSION >= 1000000
284
+ /* __ARMCC_VERSION = VRRPPPP */
285
+ # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
286
+ # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
287
+ # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
288
+ #else
289
+ /* __ARMCC_VERSION = VRPPPP */
290
+ # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
291
+ # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
292
+ # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
293
+ #endif
294
+
295
+
296
+ #elif defined(__clang__) && defined(__apple_build_version__)
297
+ # define COMPILER_ID "AppleClang"
298
+ # if defined(_MSC_VER)
299
+ # define SIMULATE_ID "MSVC"
300
+ # endif
301
+ # define COMPILER_VERSION_MAJOR DEC(__clang_major__)
302
+ # define COMPILER_VERSION_MINOR DEC(__clang_minor__)
303
+ # define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
304
+ # if defined(_MSC_VER)
305
+ /* _MSC_VER = VVRR */
306
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
307
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
308
+ # endif
309
+ # define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
310
+
311
+ #elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
312
+ # define COMPILER_ID "ARMClang"
313
+ # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
314
+ # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
315
+ # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100 % 100)
316
+ # define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
317
+
318
+ #elif defined(__clang__)
319
+ # define COMPILER_ID "Clang"
320
+ # if defined(_MSC_VER)
321
+ # define SIMULATE_ID "MSVC"
322
+ # endif
323
+ # define COMPILER_VERSION_MAJOR DEC(__clang_major__)
324
+ # define COMPILER_VERSION_MINOR DEC(__clang_minor__)
325
+ # define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
326
+ # if defined(_MSC_VER)
327
+ /* _MSC_VER = VVRR */
328
+ # define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
329
+ # define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
330
+ # endif
331
+
332
+ #elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
333
+ # define COMPILER_ID "LCC"
334
+ # define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
335
+ # define COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
336
+ # if defined(__LCC_MINOR__)
337
+ # define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
338
+ # endif
339
+ # if defined(__GNUC__) && defined(__GNUC_MINOR__)
340
+ # define SIMULATE_ID "GNU"
341
+ # define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
342
+ # define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
343
+ # if defined(__GNUC_PATCHLEVEL__)
344
+ # define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
345
+ # endif
346
+ # endif
347
+
348
+ #elif defined(__GNUC__) || defined(__GNUG__)
349
+ # define COMPILER_ID "GNU"
350
+ # if defined(__GNUC__)
351
+ # define COMPILER_VERSION_MAJOR DEC(__GNUC__)
352
+ # else
353
+ # define COMPILER_VERSION_MAJOR DEC(__GNUG__)
354
+ # endif
355
+ # if defined(__GNUC_MINOR__)
356
+ # define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
357
+ # endif
358
+ # if defined(__GNUC_PATCHLEVEL__)
359
+ # define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
360
+ # endif
361
+
362
+ #elif defined(_MSC_VER)
363
+ # define COMPILER_ID "MSVC"
364
+ /* _MSC_VER = VVRR */
365
+ # define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
366
+ # define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
367
+ # if defined(_MSC_FULL_VER)
368
+ # if _MSC_VER >= 1400
369
+ /* _MSC_FULL_VER = VVRRPPPPP */
370
+ # define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
371
+ # else
372
+ /* _MSC_FULL_VER = VVRRPPPP */
373
+ # define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
374
+ # endif
375
+ # endif
376
+ # if defined(_MSC_BUILD)
377
+ # define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
378
+ # endif
379
+
380
+ #elif defined(_ADI_COMPILER)
381
+ # define COMPILER_ID "ADSP"
382
+ #if defined(__VERSIONNUM__)
383
+ /* __VERSIONNUM__ = 0xVVRRPPTT */
384
+ # define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
385
+ # define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
386
+ # define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
387
+ # define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
388
+ #endif
389
+
390
+ #elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
391
+ # define COMPILER_ID "IAR"
392
+ # if defined(__VER__) && defined(__ICCARM__)
393
+ # define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
394
+ # define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
395
+ # define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
396
+ # define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
397
+ # elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
398
+ # define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
399
+ # define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
400
+ # define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
401
+ # define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
402
+ # endif
403
+
404
+
405
+ /* These compilers are either not known or too old to define an
406
+ identification macro. Try to identify the platform and guess that
407
+ it is the native compiler. */
408
+ #elif defined(__hpux) || defined(__hpua)
409
+ # define COMPILER_ID "HP"
410
+
411
+ #else /* unknown compiler */
412
+ # define COMPILER_ID ""
413
+ #endif
414
+
415
+ /* Construct the string literal in pieces to prevent the source from
416
+ getting matched. Store it in a pointer rather than an array
417
+ because some compilers will just produce instructions to fill the
418
+ array rather than assigning a pointer to a static array. */
419
+ char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
420
+ #ifdef SIMULATE_ID
421
+ char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
422
+ #endif
423
+
424
+ #ifdef __QNXNTO__
425
+ char const* qnxnto = "INFO" ":" "qnxnto[]";
426
+ #endif
427
+
428
+ #if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
429
+ char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
430
+ #endif
431
+
432
+ #define STRINGIFY_HELPER(X) #X
433
+ #define STRINGIFY(X) STRINGIFY_HELPER(X)
434
+
435
+ /* Identify known platforms by name. */
436
+ #if defined(__linux) || defined(__linux__) || defined(linux)
437
+ # define PLATFORM_ID "Linux"
438
+
439
+ #elif defined(__MSYS__)
440
+ # define PLATFORM_ID "MSYS"
441
+
442
+ #elif defined(__CYGWIN__)
443
+ # define PLATFORM_ID "Cygwin"
444
+
445
+ #elif defined(__MINGW32__)
446
+ # define PLATFORM_ID "MinGW"
447
+
448
+ #elif defined(__APPLE__)
449
+ # define PLATFORM_ID "Darwin"
450
+
451
+ #elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
452
+ # define PLATFORM_ID "Windows"
453
+
454
+ #elif defined(__FreeBSD__) || defined(__FreeBSD)
455
+ # define PLATFORM_ID "FreeBSD"
456
+
457
+ #elif defined(__NetBSD__) || defined(__NetBSD)
458
+ # define PLATFORM_ID "NetBSD"
459
+
460
+ #elif defined(__OpenBSD__) || defined(__OPENBSD)
461
+ # define PLATFORM_ID "OpenBSD"
462
+
463
+ #elif defined(__sun) || defined(sun)
464
+ # define PLATFORM_ID "SunOS"
465
+
466
+ #elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
467
+ # define PLATFORM_ID "AIX"
468
+
469
+ #elif defined(__hpux) || defined(__hpux__)
470
+ # define PLATFORM_ID "HP-UX"
471
+
472
+ #elif defined(__HAIKU__)
473
+ # define PLATFORM_ID "Haiku"
474
+
475
+ #elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
476
+ # define PLATFORM_ID "BeOS"
477
+
478
+ #elif defined(__QNX__) || defined(__QNXNTO__)
479
+ # define PLATFORM_ID "QNX"
480
+
481
+ #elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
482
+ # define PLATFORM_ID "Tru64"
483
+
484
+ #elif defined(__riscos) || defined(__riscos__)
485
+ # define PLATFORM_ID "RISCos"
486
+
487
+ #elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
488
+ # define PLATFORM_ID "SINIX"
489
+
490
+ #elif defined(__UNIX_SV__)
491
+ # define PLATFORM_ID "UNIX_SV"
492
+
493
+ #elif defined(__bsdos__)
494
+ # define PLATFORM_ID "BSDOS"
495
+
496
+ #elif defined(_MPRAS) || defined(MPRAS)
497
+ # define PLATFORM_ID "MP-RAS"
498
+
499
+ #elif defined(__osf) || defined(__osf__)
500
+ # define PLATFORM_ID "OSF1"
501
+
502
+ #elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
503
+ # define PLATFORM_ID "SCO_SV"
504
+
505
+ #elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
506
+ # define PLATFORM_ID "ULTRIX"
507
+
508
+ #elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
509
+ # define PLATFORM_ID "Xenix"
510
+
511
+ #elif defined(__WATCOMC__)
512
+ # if defined(__LINUX__)
513
+ # define PLATFORM_ID "Linux"
514
+
515
+ # elif defined(__DOS__)
516
+ # define PLATFORM_ID "DOS"
517
+
518
+ # elif defined(__OS2__)
519
+ # define PLATFORM_ID "OS2"
520
+
521
+ # elif defined(__WINDOWS__)
522
+ # define PLATFORM_ID "Windows3x"
523
+
524
+ # elif defined(__VXWORKS__)
525
+ # define PLATFORM_ID "VxWorks"
526
+
527
+ # else /* unknown platform */
528
+ # define PLATFORM_ID
529
+ # endif
530
+
531
+ #elif defined(__INTEGRITY)
532
+ # if defined(INT_178B)
533
+ # define PLATFORM_ID "Integrity178"
534
+
535
+ # else /* regular Integrity */
536
+ # define PLATFORM_ID "Integrity"
537
+ # endif
538
+
539
+ # elif defined(_ADI_COMPILER)
540
+ # define PLATFORM_ID "ADSP"
541
+
542
+ #else /* unknown platform */
543
+ # define PLATFORM_ID
544
+
545
+ #endif
546
+
547
+ /* For windows compilers MSVC and Intel we can determine
548
+ the architecture of the compiler being used. This is because
549
+ the compilers do not have flags that can change the architecture,
550
+ but rather depend on which compiler is being used
551
+ */
552
+ #if defined(_WIN32) && defined(_MSC_VER)
553
+ # if defined(_M_IA64)
554
+ # define ARCHITECTURE_ID "IA64"
555
+
556
+ # elif defined(_M_ARM64EC)
557
+ # define ARCHITECTURE_ID "ARM64EC"
558
+
559
+ # elif defined(_M_X64) || defined(_M_AMD64)
560
+ # define ARCHITECTURE_ID "x64"
561
+
562
+ # elif defined(_M_IX86)
563
+ # define ARCHITECTURE_ID "X86"
564
+
565
+ # elif defined(_M_ARM64)
566
+ # define ARCHITECTURE_ID "ARM64"
567
+
568
+ # elif defined(_M_ARM)
569
+ # if _M_ARM == 4
570
+ # define ARCHITECTURE_ID "ARMV4I"
571
+ # elif _M_ARM == 5
572
+ # define ARCHITECTURE_ID "ARMV5I"
573
+ # else
574
+ # define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
575
+ # endif
576
+
577
+ # elif defined(_M_MIPS)
578
+ # define ARCHITECTURE_ID "MIPS"
579
+
580
+ # elif defined(_M_SH)
581
+ # define ARCHITECTURE_ID "SHx"
582
+
583
+ # else /* unknown architecture */
584
+ # define ARCHITECTURE_ID ""
585
+ # endif
586
+
587
+ #elif defined(__WATCOMC__)
588
+ # if defined(_M_I86)
589
+ # define ARCHITECTURE_ID "I86"
590
+
591
+ # elif defined(_M_IX86)
592
+ # define ARCHITECTURE_ID "X86"
593
+
594
+ # else /* unknown architecture */
595
+ # define ARCHITECTURE_ID ""
596
+ # endif
597
+
598
+ #elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
599
+ # if defined(__ICCARM__)
600
+ # define ARCHITECTURE_ID "ARM"
601
+
602
+ # elif defined(__ICCRX__)
603
+ # define ARCHITECTURE_ID "RX"
604
+
605
+ # elif defined(__ICCRH850__)
606
+ # define ARCHITECTURE_ID "RH850"
607
+
608
+ # elif defined(__ICCRL78__)
609
+ # define ARCHITECTURE_ID "RL78"
610
+
611
+ # elif defined(__ICCRISCV__)
612
+ # define ARCHITECTURE_ID "RISCV"
613
+
614
+ # elif defined(__ICCAVR__)
615
+ # define ARCHITECTURE_ID "AVR"
616
+
617
+ # elif defined(__ICC430__)
618
+ # define ARCHITECTURE_ID "MSP430"
619
+
620
+ # elif defined(__ICCV850__)
621
+ # define ARCHITECTURE_ID "V850"
622
+
623
+ # elif defined(__ICC8051__)
624
+ # define ARCHITECTURE_ID "8051"
625
+
626
+ # elif defined(__ICCSTM8__)
627
+ # define ARCHITECTURE_ID "STM8"
628
+
629
+ # else /* unknown architecture */
630
+ # define ARCHITECTURE_ID ""
631
+ # endif
632
+
633
+ #elif defined(__ghs__)
634
+ # if defined(__PPC64__)
635
+ # define ARCHITECTURE_ID "PPC64"
636
+
637
+ # elif defined(__ppc__)
638
+ # define ARCHITECTURE_ID "PPC"
639
+
640
+ # elif defined(__ARM__)
641
+ # define ARCHITECTURE_ID "ARM"
642
+
643
+ # elif defined(__x86_64__)
644
+ # define ARCHITECTURE_ID "x64"
645
+
646
+ # elif defined(__i386__)
647
+ # define ARCHITECTURE_ID "X86"
648
+
649
+ # else /* unknown architecture */
650
+ # define ARCHITECTURE_ID ""
651
+ # endif
652
+
653
+ #elif defined(__TI_COMPILER_VERSION__)
654
+ # if defined(__TI_ARM__)
655
+ # define ARCHITECTURE_ID "ARM"
656
+
657
+ # elif defined(__MSP430__)
658
+ # define ARCHITECTURE_ID "MSP430"
659
+
660
+ # elif defined(__TMS320C28XX__)
661
+ # define ARCHITECTURE_ID "TMS320C28x"
662
+
663
+ # elif defined(__TMS320C6X__) || defined(_TMS320C6X)
664
+ # define ARCHITECTURE_ID "TMS320C6x"
665
+
666
+ # else /* unknown architecture */
667
+ # define ARCHITECTURE_ID ""
668
+ # endif
669
+
670
+ # elif defined(__ADSPSHARC__)
671
+ # define ARCHITECTURE_ID "SHARC"
672
+
673
+ # elif defined(__ADSPBLACKFIN__)
674
+ # define ARCHITECTURE_ID "Blackfin"
675
+
676
+ #elif defined(__TASKING__)
677
+
678
+ # if defined(__CTC__) || defined(__CPTC__)
679
+ # define ARCHITECTURE_ID "TriCore"
680
+
681
+ # elif defined(__CMCS__)
682
+ # define ARCHITECTURE_ID "MCS"
683
+
684
+ # elif defined(__CARM__)
685
+ # define ARCHITECTURE_ID "ARM"
686
+
687
+ # elif defined(__CARC__)
688
+ # define ARCHITECTURE_ID "ARC"
689
+
690
+ # elif defined(__C51__)
691
+ # define ARCHITECTURE_ID "8051"
692
+
693
+ # elif defined(__CPCP__)
694
+ # define ARCHITECTURE_ID "PCP"
695
+
696
+ # else
697
+ # define ARCHITECTURE_ID ""
698
+ # endif
699
+
700
+ #else
701
+ # define ARCHITECTURE_ID
702
+ #endif
703
+
704
+ /* Convert integer to decimal digit literals. */
705
+ #define DEC(n) \
706
+ ('0' + (((n) / 10000000)%10)), \
707
+ ('0' + (((n) / 1000000)%10)), \
708
+ ('0' + (((n) / 100000)%10)), \
709
+ ('0' + (((n) / 10000)%10)), \
710
+ ('0' + (((n) / 1000)%10)), \
711
+ ('0' + (((n) / 100)%10)), \
712
+ ('0' + (((n) / 10)%10)), \
713
+ ('0' + ((n) % 10))
714
+
715
+ /* Convert integer to hex digit literals. */
716
+ #define HEX(n) \
717
+ ('0' + ((n)>>28 & 0xF)), \
718
+ ('0' + ((n)>>24 & 0xF)), \
719
+ ('0' + ((n)>>20 & 0xF)), \
720
+ ('0' + ((n)>>16 & 0xF)), \
721
+ ('0' + ((n)>>12 & 0xF)), \
722
+ ('0' + ((n)>>8 & 0xF)), \
723
+ ('0' + ((n)>>4 & 0xF)), \
724
+ ('0' + ((n) & 0xF))
725
+
726
+ /* Construct a string literal encoding the version number. */
727
+ #ifdef COMPILER_VERSION
728
+ char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
729
+
730
+ /* Construct a string literal encoding the version number components. */
731
+ #elif defined(COMPILER_VERSION_MAJOR)
732
+ char const info_version[] = {
733
+ 'I', 'N', 'F', 'O', ':',
734
+ 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
735
+ COMPILER_VERSION_MAJOR,
736
+ # ifdef COMPILER_VERSION_MINOR
737
+ '.', COMPILER_VERSION_MINOR,
738
+ # ifdef COMPILER_VERSION_PATCH
739
+ '.', COMPILER_VERSION_PATCH,
740
+ # ifdef COMPILER_VERSION_TWEAK
741
+ '.', COMPILER_VERSION_TWEAK,
742
+ # endif
743
+ # endif
744
+ # endif
745
+ ']','\0'};
746
+ #endif
747
+
748
+ /* Construct a string literal encoding the internal version number. */
749
+ #ifdef COMPILER_VERSION_INTERNAL
750
+ char const info_version_internal[] = {
751
+ 'I', 'N', 'F', 'O', ':',
752
+ 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
753
+ 'i','n','t','e','r','n','a','l','[',
754
+ COMPILER_VERSION_INTERNAL,']','\0'};
755
+ #elif defined(COMPILER_VERSION_INTERNAL_STR)
756
+ char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
757
+ #endif
758
+
759
+ /* Construct a string literal encoding the version number components. */
760
+ #ifdef SIMULATE_VERSION_MAJOR
761
+ char const info_simulate_version[] = {
762
+ 'I', 'N', 'F', 'O', ':',
763
+ 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
764
+ SIMULATE_VERSION_MAJOR,
765
+ # ifdef SIMULATE_VERSION_MINOR
766
+ '.', SIMULATE_VERSION_MINOR,
767
+ # ifdef SIMULATE_VERSION_PATCH
768
+ '.', SIMULATE_VERSION_PATCH,
769
+ # ifdef SIMULATE_VERSION_TWEAK
770
+ '.', SIMULATE_VERSION_TWEAK,
771
+ # endif
772
+ # endif
773
+ # endif
774
+ ']','\0'};
775
+ #endif
776
+
777
+ /* Construct the string literal in pieces to prevent the source from
778
+ getting matched. Store it in a pointer rather than an array
779
+ because some compilers will just produce instructions to fill the
780
+ array rather than assigning a pointer to a static array. */
781
+ char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
782
+ char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
783
+
784
+
785
+
786
+ #if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L
787
+ # if defined(__INTEL_CXX11_MODE__)
788
+ # if defined(__cpp_aggregate_nsdmi)
789
+ # define CXX_STD 201402L
790
+ # else
791
+ # define CXX_STD 201103L
792
+ # endif
793
+ # else
794
+ # define CXX_STD 199711L
795
+ # endif
796
+ #elif defined(_MSC_VER) && defined(_MSVC_LANG)
797
+ # define CXX_STD _MSVC_LANG
798
+ #else
799
+ # define CXX_STD __cplusplus
800
+ #endif
801
+
802
+ const char* info_language_standard_default = "INFO" ":" "standard_default["
803
+ #if CXX_STD > 202002L
804
+ "23"
805
+ #elif CXX_STD > 201703L
806
+ "20"
807
+ #elif CXX_STD >= 201703L
808
+ "17"
809
+ #elif CXX_STD >= 201402L
810
+ "14"
811
+ #elif CXX_STD >= 201103L
812
+ "11"
813
+ #else
814
+ "98"
815
+ #endif
816
+ "]";
817
+
818
+ const char* info_language_extensions_default = "INFO" ":" "extensions_default["
819
+ #if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) || \
820
+ defined(__TI_COMPILER_VERSION__)) && \
821
+ !defined(__STRICT_ANSI__)
822
+ "ON"
823
+ #else
824
+ "OFF"
825
+ #endif
826
+ "]";
827
+
828
+ /*--------------------------------------------------------------------------*/
829
+
830
+ int main(int argc, char* argv[])
831
+ {
832
+ int require = 0;
833
+ require += info_compiler[argc];
834
+ require += info_platform[argc];
835
+ require += info_arch[argc];
836
+ #ifdef COMPILER_VERSION_MAJOR
837
+ require += info_version[argc];
838
+ #endif
839
+ #ifdef COMPILER_VERSION_INTERNAL
840
+ require += info_version_internal[argc];
841
+ #endif
842
+ #ifdef SIMULATE_ID
843
+ require += info_simulate[argc];
844
+ #endif
845
+ #ifdef SIMULATE_VERSION_MAJOR
846
+ require += info_simulate_version[argc];
847
+ #endif
848
+ #if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
849
+ require += info_cray[argc];
850
+ #endif
851
+ require += info_language_standard_default[argc];
852
+ require += info_language_extensions_default[argc];
853
+ (void)argv;
854
+ return require;
855
+ }
gemma.cpp/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out ADDED
Binary file (16.1 kB). View file
 
gemma.cpp/build/CMakeFiles/CMakeConfigureLog.yaml ADDED
The diff for this file is too large to render. See raw diff