Add CFD chaotic advection standard map Lyapunov kernel (52nd kernel)
Browse files- README.md +15 -1
- cfd-chaotic-advection/standard_map_lyapunov.cu +239 -0
README.md
CHANGED
|
@@ -17,6 +17,9 @@ tags:
|
|
| 17 |
- erdos-straus
|
| 18 |
- prime-convergents
|
| 19 |
- flint-hills
|
|
|
|
|
|
|
|
|
|
| 20 |
- spectral-methods
|
| 21 |
- bigcompute
|
| 22 |
library_name: other
|
|
@@ -29,11 +32,12 @@ datasets:
|
|
| 29 |
- cahlen/hausdorff-dimension-spectrum
|
| 30 |
- cahlen/continued-fraction-spectra
|
| 31 |
- cahlen/ramanujan-machine-results
|
|
|
|
| 32 |
---
|
| 33 |
|
| 34 |
# bigcompute.science CUDA Kernels
|
| 35 |
|
| 36 |
-
|
| 37 |
|
| 38 |
All kernels are standalone — compile with `nvcc`, run from the command line. No PyTorch dependency.
|
| 39 |
|
|
@@ -136,6 +140,16 @@ nvcc -O3 -arch=sm_XX -o kernel kernel.cu -lm
|
|
| 136 |
`flint-hills/` — Flint Hills series partial sums:
|
| 137 |
- `flint_hills.cu` — high-precision partial sum to 10B terms
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
## Results
|
| 140 |
|
| 141 |
All computation results are open:
|
|
|
|
| 17 |
- erdos-straus
|
| 18 |
- prime-convergents
|
| 19 |
- flint-hills
|
| 20 |
+
- fluid-dynamics
|
| 21 |
+
- chaotic-advection
|
| 22 |
+
- cfd
|
| 23 |
- spectral-methods
|
| 24 |
- bigcompute
|
| 25 |
library_name: other
|
|
|
|
| 32 |
- cahlen/hausdorff-dimension-spectrum
|
| 33 |
- cahlen/continued-fraction-spectra
|
| 34 |
- cahlen/ramanujan-machine-results
|
| 35 |
+
- cahlen/cfd-chaotic-advection
|
| 36 |
---
|
| 37 |
|
| 38 |
# bigcompute.science CUDA Kernels
|
| 39 |
|
| 40 |
+
52 custom CUDA kernels for GPU-accelerated computational mathematics research. These kernels power the experiments at [bigcompute.science](https://bigcompute.science).
|
| 41 |
|
| 42 |
All kernels are standalone — compile with `nvcc`, run from the command line. No PyTorch dependency.
|
| 43 |
|
|
|
|
| 140 |
`flint-hills/` — Flint Hills series partial sums:
|
| 141 |
- `flint_hills.cu` — high-precision partial sum to 10B terms
|
| 142 |
|
| 143 |
+
### CFD / Chaotic Advection (1 kernel)
|
| 144 |
+
|
| 145 |
+
`cfd-chaotic-advection/` — Chirikov standard map Lyapunov spectrum (Benettin):
|
| 146 |
+
- `standard_map_lyapunov.cu` — 16.8M trajectories in 116.6s on RTX 5090 (sm_120)
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
nvcc -O3 -arch=sm_120 -o standard_map_lyapunov cfd-chaotic-advection/standard_map_lyapunov.cu -lm
|
| 150 |
+
./standard_map_lyapunov 2048 8192 50000 5.0
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
## Results
|
| 154 |
|
| 155 |
All computation results are open:
|
cfd-chaotic-advection/standard_map_lyapunov.cu
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Chaotic advection: Lyapunov spectrum of the Chirikov standard map
|
| 3 |
+
*
|
| 4 |
+
* Models area-preserving mixing on T^2 — the same phase-space structure as
|
| 5 |
+
* laminar 2D flows with periodic driving (chaotic advection conjectures).
|
| 6 |
+
*
|
| 7 |
+
* Map (mod 2π):
|
| 8 |
+
* p' = p + K sin(θ)
|
| 9 |
+
* θ' = θ + p'
|
| 10 |
+
*
|
| 11 |
+
* For each K, estimate the largest Lyapunov exponent Λ(K) by averaging
|
| 12 |
+
* Benettin tangent-vector growth over many initial conditions.
|
| 13 |
+
*
|
| 14 |
+
* Hardware: RTX 5090 (32 GB, compute capability 12.0)
|
| 15 |
+
* Compile: nvcc -O3 -arch=sm_120 -o standard_map_lyapunov \
|
| 16 |
+
* scripts/experiments/cfd-chaotic-advection/standard_map_lyapunov.cu -lm
|
| 17 |
+
* Run: ./standard_map_lyapunov [n_k] [n_ic] [n_iters] [k_max]
|
| 18 |
+
* ./standard_map_lyapunov 512 4096 20000 5.0
|
| 19 |
+
*/
|
| 20 |
+
|
| 21 |
+
#include <cuda_runtime.h>
|
| 22 |
+
#include <math.h>
|
| 23 |
+
#include <stdio.h>
|
| 24 |
+
#include <stdlib.h>
|
| 25 |
+
#include <string.h>
|
| 26 |
+
#include <time.h>
|
| 27 |
+
|
| 28 |
+
#define PI 3.14159265358979323846
|
| 29 |
+
#define TWO_PI (2.0 * PI)
|
| 30 |
+
#define BLOCK 256
|
| 31 |
+
|
| 32 |
+
__device__ double d_mod2pi(double x) {
|
| 33 |
+
x = fmod(x, TWO_PI);
|
| 34 |
+
if (x < 0.0) x += TWO_PI;
|
| 35 |
+
return x;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
__device__ unsigned long long d_splitmix64(unsigned long long *state) {
|
| 39 |
+
unsigned long long z = (*state += 0x9E3779B97F4A7C15ULL);
|
| 40 |
+
z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
|
| 41 |
+
z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
|
| 42 |
+
return z ^ (z >> 31);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
__device__ double d_uniform01(unsigned long long *state) {
|
| 46 |
+
return (d_splitmix64(state) >> 11) * (1.0 / 9007199254740992.0);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
__device__ double d_benettin_lyapunov(double K, double theta0, double p0,
|
| 50 |
+
int n_iters, unsigned long long seed) {
|
| 51 |
+
double theta = d_mod2pi(theta0);
|
| 52 |
+
double p = d_mod2pi(p0);
|
| 53 |
+
double v0 = 1.0, v1 = 0.0;
|
| 54 |
+
double sum_log = 0.0;
|
| 55 |
+
int count = 0;
|
| 56 |
+
|
| 57 |
+
for (int it = 0; it < n_iters; it++) {
|
| 58 |
+
double c = cos(theta);
|
| 59 |
+
double j00 = 1.0 + K * c;
|
| 60 |
+
double j01 = 1.0;
|
| 61 |
+
double j10 = K * c;
|
| 62 |
+
double j11 = 1.0;
|
| 63 |
+
|
| 64 |
+
double w0 = j00 * v0 + j01 * v1;
|
| 65 |
+
double w1 = j10 * v0 + j11 * v1;
|
| 66 |
+
double norm = sqrt(w0 * w0 + w1 * w1);
|
| 67 |
+
if (!(norm > 0.0) || isnan(norm) || isinf(norm)) return NAN;
|
| 68 |
+
|
| 69 |
+
sum_log += log(norm);
|
| 70 |
+
count++;
|
| 71 |
+
v0 = w0 / norm;
|
| 72 |
+
v1 = w1 / norm;
|
| 73 |
+
|
| 74 |
+
double p_new = d_mod2pi(p + K * sin(theta));
|
| 75 |
+
theta = d_mod2pi(theta + p_new);
|
| 76 |
+
p = p_new;
|
| 77 |
+
}
|
| 78 |
+
return sum_log / (double)count;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
__global__ void lyapunov_kernel(int n_k, int n_ic, int n_iters,
|
| 82 |
+
double k_max, unsigned long long seed,
|
| 83 |
+
double *per_ic) {
|
| 84 |
+
int k_idx = blockIdx.x;
|
| 85 |
+
int ic_idx = blockIdx.y * blockDim.x + threadIdx.x;
|
| 86 |
+
if (k_idx >= n_k || ic_idx >= n_ic) return;
|
| 87 |
+
|
| 88 |
+
double K = (n_k <= 1) ? 0.0 : k_max * (double)k_idx / (double)(n_k - 1);
|
| 89 |
+
unsigned long long rng = seed ^ (0x9E3779B97F4A7C15ULL * (unsigned long long)k_idx)
|
| 90 |
+
^ (0xD1B54A32D192ED03ULL * (unsigned long long)ic_idx);
|
| 91 |
+
|
| 92 |
+
double theta0 = d_uniform01(&rng) * TWO_PI;
|
| 93 |
+
double p0 = d_uniform01(&rng) * TWO_PI;
|
| 94 |
+
double lam = d_benettin_lyapunov(K, theta0, p0, n_iters, rng);
|
| 95 |
+
|
| 96 |
+
per_ic[(size_t)k_idx * (size_t)n_ic + (size_t)ic_idx] = lam;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
static void check_cuda(cudaError_t err, const char *msg) {
|
| 100 |
+
if (err != cudaSuccess) {
|
| 101 |
+
fprintf(stderr, "CERTIFICATE_ERROR: %s: %s\n", msg, cudaGetErrorString(err));
|
| 102 |
+
exit(2);
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
static double now_seconds(void) {
|
| 107 |
+
struct timespec ts;
|
| 108 |
+
clock_gettime(CLOCK_MONOTONIC, &ts);
|
| 109 |
+
return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
int main(int argc, char **argv) {
|
| 113 |
+
int n_k = argc > 1 ? atoi(argv[1]) : 512;
|
| 114 |
+
int n_ic = argc > 2 ? atoi(argv[2]) : 4096;
|
| 115 |
+
int n_iters = argc > 3 ? atoi(argv[3]) : 20000;
|
| 116 |
+
double k_max = argc > 4 ? atof(argv[4]) : 5.0;
|
| 117 |
+
unsigned long long seed = 0xC0FFEEULL;
|
| 118 |
+
|
| 119 |
+
if (n_k < 2 || n_ic < 1 || n_iters < 100 || k_max <= 0.0) {
|
| 120 |
+
fprintf(stderr, "Usage: %s [n_k>=2] [n_ic] [n_iters] [k_max]\n", argv[0]);
|
| 121 |
+
return 1;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
cudaDeviceProp prop;
|
| 125 |
+
check_cuda(cudaGetDeviceProperties(&prop, 0), "cudaGetDeviceProperties");
|
| 126 |
+
printf("==========================================\n");
|
| 127 |
+
printf(" CFD Chaotic Advection — Standard Map\n");
|
| 128 |
+
printf(" Device: %s (cc %d.%d)\n", prop.name, prop.major, prop.minor);
|
| 129 |
+
printf(" K grid: %d points in [0, %.6f]\n", n_k, k_max);
|
| 130 |
+
printf(" ICs per K: %d\n", n_ic);
|
| 131 |
+
printf(" Iterations: %d\n", n_iters);
|
| 132 |
+
printf(" Total trajectories: %lld\n", (long long)n_k * (long long)n_ic);
|
| 133 |
+
printf("==========================================\n\n");
|
| 134 |
+
|
| 135 |
+
size_t n_total = (size_t)n_k * (size_t)n_ic;
|
| 136 |
+
size_t bytes = n_total * sizeof(double);
|
| 137 |
+
double *h_per_ic = (double *)malloc(bytes);
|
| 138 |
+
double *d_per_ic = NULL;
|
| 139 |
+
if (!h_per_ic) {
|
| 140 |
+
fprintf(stderr, "CERTIFICATE_ERROR: host alloc failed (%zu bytes)\n", bytes);
|
| 141 |
+
return 2;
|
| 142 |
+
}
|
| 143 |
+
check_cuda(cudaMalloc(&d_per_ic, bytes), "cudaMalloc");
|
| 144 |
+
|
| 145 |
+
dim3 grid(n_k, (n_ic + BLOCK - 1) / BLOCK);
|
| 146 |
+
dim3 block(BLOCK);
|
| 147 |
+
|
| 148 |
+
double t0 = now_seconds();
|
| 149 |
+
lyapunov_kernel<<<grid, block>>>(n_k, n_ic, n_iters, k_max, seed, d_per_ic);
|
| 150 |
+
check_cuda(cudaDeviceSynchronize(), "kernel sync");
|
| 151 |
+
check_cuda(cudaMemcpy(h_per_ic, d_per_ic, bytes, cudaMemcpyDeviceToHost), "cudaMemcpy");
|
| 152 |
+
|
| 153 |
+
char csv_path[512];
|
| 154 |
+
snprintf(csv_path, sizeof(csv_path),
|
| 155 |
+
"scripts/experiments/cfd-chaotic-advection/results/lyapunov_k%d_ic%d_iter%d.csv",
|
| 156 |
+
n_k, n_ic, n_iters);
|
| 157 |
+
|
| 158 |
+
FILE *csv = fopen(csv_path, "w");
|
| 159 |
+
if (!csv) {
|
| 160 |
+
fprintf(stderr, "CERTIFICATE_ERROR: cannot open %s\n", csv_path);
|
| 161 |
+
return 2;
|
| 162 |
+
}
|
| 163 |
+
fprintf(csv, "k_index,K,mean_lyapunov,std_lyapunov,min_lyapunov,max_lyapunov,fraction_positive\n");
|
| 164 |
+
|
| 165 |
+
int nan_count = 0;
|
| 166 |
+
double k_crit_scan = -1.0;
|
| 167 |
+
int found_transition = 0;
|
| 168 |
+
|
| 169 |
+
for (int k_idx = 0; k_idx < n_k; k_idx++) {
|
| 170 |
+
double K = k_max * (double)k_idx / (double)(n_k - 1);
|
| 171 |
+
double sum = 0.0, sum2 = 0.0;
|
| 172 |
+
double mn = INFINITY, mx = -INFINITY;
|
| 173 |
+
int pos = 0, valid = 0;
|
| 174 |
+
|
| 175 |
+
for (int ic = 0; ic < n_ic; ic++) {
|
| 176 |
+
double v = h_per_ic[(size_t)k_idx * (size_t)n_ic + (size_t)ic];
|
| 177 |
+
if (isnan(v) || isinf(v)) {
|
| 178 |
+
nan_count++;
|
| 179 |
+
continue;
|
| 180 |
+
}
|
| 181 |
+
valid++;
|
| 182 |
+
sum += v;
|
| 183 |
+
sum2 += v * v;
|
| 184 |
+
if (v < mn) mn = v;
|
| 185 |
+
if (v > mx) mx = v;
|
| 186 |
+
if (v > 0.0) pos++;
|
| 187 |
+
}
|
| 188 |
+
if (valid == 0) {
|
| 189 |
+
fprintf(stderr, "CERTIFICATE_ERROR: no valid samples at K=%.6f\n", K);
|
| 190 |
+
return 2;
|
| 191 |
+
}
|
| 192 |
+
double mean = sum / (double)valid;
|
| 193 |
+
double var = sum2 / (double)valid - mean * mean;
|
| 194 |
+
if (var < 0.0) var = 0.0;
|
| 195 |
+
double std = sqrt(var);
|
| 196 |
+
double frac = (double)pos / (double)valid;
|
| 197 |
+
|
| 198 |
+
fprintf(csv, "%d,%.10f,%.10f,%.10f,%.10f,%.10f,%.6f\n",
|
| 199 |
+
k_idx, K, mean, std, mn, mx, frac);
|
| 200 |
+
|
| 201 |
+
if (!found_transition && K > 0.5 && mean > 0.01 && frac > 0.95) {
|
| 202 |
+
k_crit_scan = K;
|
| 203 |
+
found_transition = 1;
|
| 204 |
+
}
|
| 205 |
+
}
|
| 206 |
+
fclose(csv);
|
| 207 |
+
|
| 208 |
+
double elapsed = now_seconds() - t0;
|
| 209 |
+
printf("Wrote %s\n", csv_path);
|
| 210 |
+
printf("Elapsed: %.2f s (%.1f trajectories/s)\n", elapsed,
|
| 211 |
+
(double)n_total / elapsed);
|
| 212 |
+
printf("NaN/Inf samples: %d / %zu\n", nan_count, n_total);
|
| 213 |
+
|
| 214 |
+
/* Validation: K=0 should be near-integrable (Λ ≈ 0) */
|
| 215 |
+
double k0_mean = 0.0;
|
| 216 |
+
for (int ic = 0; ic < n_ic; ic++) k0_mean += h_per_ic[ic];
|
| 217 |
+
k0_mean /= (double)n_ic;
|
| 218 |
+
printf("Validation K=0 mean Λ = %.6e (expect ~0)\n", k0_mean);
|
| 219 |
+
if (fabs(k0_mean) > 0.05) {
|
| 220 |
+
fprintf(stderr, "CERTIFICATE_WARN: K=0 Lyapunov unexpectedly large\n");
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
if (found_transition) {
|
| 224 |
+
printf("Empirical bulk-chaos onset (mean>0.01, >95%% ICs positive): K ≈ %.4f\n",
|
| 225 |
+
k_crit_scan);
|
| 226 |
+
printf("Literature K_crit (standard map): ≈ 0.971635406\n");
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
if (nan_count > 0) {
|
| 230 |
+
fprintf(stderr, "CERTIFICATE_ERROR: numerical failures detected\n");
|
| 231 |
+
cudaFree(d_per_ic);
|
| 232 |
+
free(h_per_ic);
|
| 233 |
+
return 2;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
cudaFree(d_per_ic);
|
| 237 |
+
free(h_per_ic);
|
| 238 |
+
return 0;
|
| 239 |
+
}
|