cahlen commited on
Commit
bd2d7d7
·
verified ·
1 Parent(s): 29fe620

Add CFD chaotic advection standard map Lyapunov kernel (52nd kernel)

Browse files
README.md CHANGED
@@ -17,6 +17,9 @@ tags:
17
  - erdos-straus
18
  - prime-convergents
19
  - flint-hills
 
 
 
20
  - spectral-methods
21
  - bigcompute
22
  library_name: other
@@ -29,11 +32,12 @@ datasets:
29
  - cahlen/hausdorff-dimension-spectrum
30
  - cahlen/continued-fraction-spectra
31
  - cahlen/ramanujan-machine-results
 
32
  ---
33
 
34
  # bigcompute.science CUDA Kernels
35
 
36
- 51 custom CUDA kernels for GPU-accelerated computational mathematics research. These kernels power the experiments at [bigcompute.science](https://bigcompute.science).
37
 
38
  All kernels are standalone — compile with `nvcc`, run from the command line. No PyTorch dependency.
39
 
@@ -136,6 +140,16 @@ nvcc -O3 -arch=sm_XX -o kernel kernel.cu -lm
136
  `flint-hills/` — Flint Hills series partial sums:
137
  - `flint_hills.cu` — high-precision partial sum to 10B terms
138
 
 
 
 
 
 
 
 
 
 
 
139
  ## Results
140
 
141
  All computation results are open:
 
17
  - erdos-straus
18
  - prime-convergents
19
  - flint-hills
20
+ - fluid-dynamics
21
+ - chaotic-advection
22
+ - cfd
23
  - spectral-methods
24
  - bigcompute
25
  library_name: other
 
32
  - cahlen/hausdorff-dimension-spectrum
33
  - cahlen/continued-fraction-spectra
34
  - cahlen/ramanujan-machine-results
35
+ - cahlen/cfd-chaotic-advection
36
  ---
37
 
38
  # bigcompute.science CUDA Kernels
39
 
40
+ 52 custom CUDA kernels for GPU-accelerated computational mathematics research. These kernels power the experiments at [bigcompute.science](https://bigcompute.science).
41
 
42
  All kernels are standalone — compile with `nvcc`, run from the command line. No PyTorch dependency.
43
 
 
140
  `flint-hills/` — Flint Hills series partial sums:
141
  - `flint_hills.cu` — high-precision partial sum to 10B terms
142
 
143
+ ### CFD / Chaotic Advection (1 kernel)
144
+
145
+ `cfd-chaotic-advection/` — Chirikov standard map Lyapunov spectrum (Benettin):
146
+ - `standard_map_lyapunov.cu` — 16.8M trajectories in 116.6s on RTX 5090 (sm_120)
147
+
148
+ ```bash
149
+ nvcc -O3 -arch=sm_120 -o standard_map_lyapunov cfd-chaotic-advection/standard_map_lyapunov.cu -lm
150
+ ./standard_map_lyapunov 2048 8192 50000 5.0
151
+ ```
152
+
153
  ## Results
154
 
155
  All computation results are open:
cfd-chaotic-advection/standard_map_lyapunov.cu ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Chaotic advection: Lyapunov spectrum of the Chirikov standard map
3
+ *
4
+ * Models area-preserving mixing on T^2 — the same phase-space structure as
5
+ * laminar 2D flows with periodic driving (chaotic advection conjectures).
6
+ *
7
+ * Map (mod 2π):
8
+ * p' = p + K sin(θ)
9
+ * θ' = θ + p'
10
+ *
11
+ * For each K, estimate the largest Lyapunov exponent Λ(K) by averaging
12
+ * Benettin tangent-vector growth over many initial conditions.
13
+ *
14
+ * Hardware: RTX 5090 (32 GB, compute capability 12.0)
15
+ * Compile: nvcc -O3 -arch=sm_120 -o standard_map_lyapunov \
16
+ * scripts/experiments/cfd-chaotic-advection/standard_map_lyapunov.cu -lm
17
+ * Run: ./standard_map_lyapunov [n_k] [n_ic] [n_iters] [k_max]
18
+ * ./standard_map_lyapunov 512 4096 20000 5.0
19
+ */
20
+
21
+ #include <cuda_runtime.h>
22
+ #include <math.h>
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+ #include <string.h>
26
+ #include <time.h>
27
+
28
+ #define PI 3.14159265358979323846
29
+ #define TWO_PI (2.0 * PI)
30
+ #define BLOCK 256
31
+
32
+ __device__ double d_mod2pi(double x) {
33
+ x = fmod(x, TWO_PI);
34
+ if (x < 0.0) x += TWO_PI;
35
+ return x;
36
+ }
37
+
38
+ __device__ unsigned long long d_splitmix64(unsigned long long *state) {
39
+ unsigned long long z = (*state += 0x9E3779B97F4A7C15ULL);
40
+ z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
41
+ z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
42
+ return z ^ (z >> 31);
43
+ }
44
+
45
+ __device__ double d_uniform01(unsigned long long *state) {
46
+ return (d_splitmix64(state) >> 11) * (1.0 / 9007199254740992.0);
47
+ }
48
+
49
+ __device__ double d_benettin_lyapunov(double K, double theta0, double p0,
50
+ int n_iters, unsigned long long seed) {
51
+ double theta = d_mod2pi(theta0);
52
+ double p = d_mod2pi(p0);
53
+ double v0 = 1.0, v1 = 0.0;
54
+ double sum_log = 0.0;
55
+ int count = 0;
56
+
57
+ for (int it = 0; it < n_iters; it++) {
58
+ double c = cos(theta);
59
+ double j00 = 1.0 + K * c;
60
+ double j01 = 1.0;
61
+ double j10 = K * c;
62
+ double j11 = 1.0;
63
+
64
+ double w0 = j00 * v0 + j01 * v1;
65
+ double w1 = j10 * v0 + j11 * v1;
66
+ double norm = sqrt(w0 * w0 + w1 * w1);
67
+ if (!(norm > 0.0) || isnan(norm) || isinf(norm)) return NAN;
68
+
69
+ sum_log += log(norm);
70
+ count++;
71
+ v0 = w0 / norm;
72
+ v1 = w1 / norm;
73
+
74
+ double p_new = d_mod2pi(p + K * sin(theta));
75
+ theta = d_mod2pi(theta + p_new);
76
+ p = p_new;
77
+ }
78
+ return sum_log / (double)count;
79
+ }
80
+
81
+ __global__ void lyapunov_kernel(int n_k, int n_ic, int n_iters,
82
+ double k_max, unsigned long long seed,
83
+ double *per_ic) {
84
+ int k_idx = blockIdx.x;
85
+ int ic_idx = blockIdx.y * blockDim.x + threadIdx.x;
86
+ if (k_idx >= n_k || ic_idx >= n_ic) return;
87
+
88
+ double K = (n_k <= 1) ? 0.0 : k_max * (double)k_idx / (double)(n_k - 1);
89
+ unsigned long long rng = seed ^ (0x9E3779B97F4A7C15ULL * (unsigned long long)k_idx)
90
+ ^ (0xD1B54A32D192ED03ULL * (unsigned long long)ic_idx);
91
+
92
+ double theta0 = d_uniform01(&rng) * TWO_PI;
93
+ double p0 = d_uniform01(&rng) * TWO_PI;
94
+ double lam = d_benettin_lyapunov(K, theta0, p0, n_iters, rng);
95
+
96
+ per_ic[(size_t)k_idx * (size_t)n_ic + (size_t)ic_idx] = lam;
97
+ }
98
+
99
+ static void check_cuda(cudaError_t err, const char *msg) {
100
+ if (err != cudaSuccess) {
101
+ fprintf(stderr, "CERTIFICATE_ERROR: %s: %s\n", msg, cudaGetErrorString(err));
102
+ exit(2);
103
+ }
104
+ }
105
+
106
+ static double now_seconds(void) {
107
+ struct timespec ts;
108
+ clock_gettime(CLOCK_MONOTONIC, &ts);
109
+ return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9;
110
+ }
111
+
112
+ int main(int argc, char **argv) {
113
+ int n_k = argc > 1 ? atoi(argv[1]) : 512;
114
+ int n_ic = argc > 2 ? atoi(argv[2]) : 4096;
115
+ int n_iters = argc > 3 ? atoi(argv[3]) : 20000;
116
+ double k_max = argc > 4 ? atof(argv[4]) : 5.0;
117
+ unsigned long long seed = 0xC0FFEEULL;
118
+
119
+ if (n_k < 2 || n_ic < 1 || n_iters < 100 || k_max <= 0.0) {
120
+ fprintf(stderr, "Usage: %s [n_k>=2] [n_ic] [n_iters] [k_max]\n", argv[0]);
121
+ return 1;
122
+ }
123
+
124
+ cudaDeviceProp prop;
125
+ check_cuda(cudaGetDeviceProperties(&prop, 0), "cudaGetDeviceProperties");
126
+ printf("==========================================\n");
127
+ printf(" CFD Chaotic Advection — Standard Map\n");
128
+ printf(" Device: %s (cc %d.%d)\n", prop.name, prop.major, prop.minor);
129
+ printf(" K grid: %d points in [0, %.6f]\n", n_k, k_max);
130
+ printf(" ICs per K: %d\n", n_ic);
131
+ printf(" Iterations: %d\n", n_iters);
132
+ printf(" Total trajectories: %lld\n", (long long)n_k * (long long)n_ic);
133
+ printf("==========================================\n\n");
134
+
135
+ size_t n_total = (size_t)n_k * (size_t)n_ic;
136
+ size_t bytes = n_total * sizeof(double);
137
+ double *h_per_ic = (double *)malloc(bytes);
138
+ double *d_per_ic = NULL;
139
+ if (!h_per_ic) {
140
+ fprintf(stderr, "CERTIFICATE_ERROR: host alloc failed (%zu bytes)\n", bytes);
141
+ return 2;
142
+ }
143
+ check_cuda(cudaMalloc(&d_per_ic, bytes), "cudaMalloc");
144
+
145
+ dim3 grid(n_k, (n_ic + BLOCK - 1) / BLOCK);
146
+ dim3 block(BLOCK);
147
+
148
+ double t0 = now_seconds();
149
+ lyapunov_kernel<<<grid, block>>>(n_k, n_ic, n_iters, k_max, seed, d_per_ic);
150
+ check_cuda(cudaDeviceSynchronize(), "kernel sync");
151
+ check_cuda(cudaMemcpy(h_per_ic, d_per_ic, bytes, cudaMemcpyDeviceToHost), "cudaMemcpy");
152
+
153
+ char csv_path[512];
154
+ snprintf(csv_path, sizeof(csv_path),
155
+ "scripts/experiments/cfd-chaotic-advection/results/lyapunov_k%d_ic%d_iter%d.csv",
156
+ n_k, n_ic, n_iters);
157
+
158
+ FILE *csv = fopen(csv_path, "w");
159
+ if (!csv) {
160
+ fprintf(stderr, "CERTIFICATE_ERROR: cannot open %s\n", csv_path);
161
+ return 2;
162
+ }
163
+ fprintf(csv, "k_index,K,mean_lyapunov,std_lyapunov,min_lyapunov,max_lyapunov,fraction_positive\n");
164
+
165
+ int nan_count = 0;
166
+ double k_crit_scan = -1.0;
167
+ int found_transition = 0;
168
+
169
+ for (int k_idx = 0; k_idx < n_k; k_idx++) {
170
+ double K = k_max * (double)k_idx / (double)(n_k - 1);
171
+ double sum = 0.0, sum2 = 0.0;
172
+ double mn = INFINITY, mx = -INFINITY;
173
+ int pos = 0, valid = 0;
174
+
175
+ for (int ic = 0; ic < n_ic; ic++) {
176
+ double v = h_per_ic[(size_t)k_idx * (size_t)n_ic + (size_t)ic];
177
+ if (isnan(v) || isinf(v)) {
178
+ nan_count++;
179
+ continue;
180
+ }
181
+ valid++;
182
+ sum += v;
183
+ sum2 += v * v;
184
+ if (v < mn) mn = v;
185
+ if (v > mx) mx = v;
186
+ if (v > 0.0) pos++;
187
+ }
188
+ if (valid == 0) {
189
+ fprintf(stderr, "CERTIFICATE_ERROR: no valid samples at K=%.6f\n", K);
190
+ return 2;
191
+ }
192
+ double mean = sum / (double)valid;
193
+ double var = sum2 / (double)valid - mean * mean;
194
+ if (var < 0.0) var = 0.0;
195
+ double std = sqrt(var);
196
+ double frac = (double)pos / (double)valid;
197
+
198
+ fprintf(csv, "%d,%.10f,%.10f,%.10f,%.10f,%.10f,%.6f\n",
199
+ k_idx, K, mean, std, mn, mx, frac);
200
+
201
+ if (!found_transition && K > 0.5 && mean > 0.01 && frac > 0.95) {
202
+ k_crit_scan = K;
203
+ found_transition = 1;
204
+ }
205
+ }
206
+ fclose(csv);
207
+
208
+ double elapsed = now_seconds() - t0;
209
+ printf("Wrote %s\n", csv_path);
210
+ printf("Elapsed: %.2f s (%.1f trajectories/s)\n", elapsed,
211
+ (double)n_total / elapsed);
212
+ printf("NaN/Inf samples: %d / %zu\n", nan_count, n_total);
213
+
214
+ /* Validation: K=0 should be near-integrable (Λ ≈ 0) */
215
+ double k0_mean = 0.0;
216
+ for (int ic = 0; ic < n_ic; ic++) k0_mean += h_per_ic[ic];
217
+ k0_mean /= (double)n_ic;
218
+ printf("Validation K=0 mean Λ = %.6e (expect ~0)\n", k0_mean);
219
+ if (fabs(k0_mean) > 0.05) {
220
+ fprintf(stderr, "CERTIFICATE_WARN: K=0 Lyapunov unexpectedly large\n");
221
+ }
222
+
223
+ if (found_transition) {
224
+ printf("Empirical bulk-chaos onset (mean>0.01, >95%% ICs positive): K ≈ %.4f\n",
225
+ k_crit_scan);
226
+ printf("Literature K_crit (standard map): ≈ 0.971635406\n");
227
+ }
228
+
229
+ if (nan_count > 0) {
230
+ fprintf(stderr, "CERTIFICATE_ERROR: numerical failures detected\n");
231
+ cudaFree(d_per_ic);
232
+ free(h_per_ic);
233
+ return 2;
234
+ }
235
+
236
+ cudaFree(d_per_ic);
237
+ free(h_per_ic);
238
+ return 0;
239
+ }