Update README.md
Browse files
README.md
CHANGED
|
@@ -136,4 +136,89 @@ done.
|
|
| 136 |
All pass: True
|
| 137 |
Compiled: 0.72× vs cuSOLVER
|
| 138 |
========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
```
|
|
|
|
| 136 |
All pass: True
|
| 137 |
Compiled: 0.72× vs cuSOLVER
|
| 138 |
========================================================================
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
Testing the barrage setup is reveals much information about parallel processing capacity.
|
| 144 |
+
|
| 145 |
+
```
|
| 146 |
+
==============================================================================
|
| 147 |
+
Diagnostic: Parallel Root-Finding
|
| 148 |
+
==============================================================================
|
| 149 |
+
B=512 N=6
|
| 150 |
+
True eigenvalue range: [-2.106, 2.099]
|
| 151 |
+
Diagonal init range: [-1.561, 1.722]
|
| 152 |
+
|
| 153 |
+
--- Test 1: Pure Laguerre (no Aberth) ---
|
| 154 |
+
PurL it= 0 max_err=1.75e+00 min_gap=1.73e-06 |p(z)|=3.93e+00
|
| 155 |
+
PurL it= 1 max_err=1.84e+00 min_gap=1.67e-16 |p(z)|=5.39e-01
|
| 156 |
+
PurL it= 2 max_err=1.89e+00 min_gap=0.00e+00 |p(z)|=2.01e-02
|
| 157 |
+
PurL it= 3 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=1.73e-04
|
| 158 |
+
PurL it= 4 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=7.12e-07
|
| 159 |
+
PurL it= 9 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=2.88e-15
|
| 160 |
+
PurL it=14 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=2.88e-15
|
| 161 |
+
PurL it=19 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=2.88e-15
|
| 162 |
+
|
| 163 |
+
--- Test 2: Laguerre + Aberth (full) ---
|
| 164 |
+
LA-F it= 0 max_err=1.03e+02 min_gap=5.69e-07 |p(z)|=1.33e+12
|
| 165 |
+
LA-F it= 1 max_err=3.70e+02 min_gap=1.71e-06 |p(z)|=2.62e+15
|
| 166 |
+
LA-F it= 2 max_err=4.63e+02 min_gap=5.12e-06 |p(z)|=1.00e+16
|
| 167 |
+
LA-F it= 3 max_err=1.58e+03 min_gap=1.54e-05 |p(z)|=1.59e+19
|
| 168 |
+
LA-F it= 4 max_err=1.98e+03 min_gap=4.61e-05 |p(z)|=6.05e+19
|
| 169 |
+
LA-F it= 9 max_err=6.05e+03 min_gap=5.26e-04 |p(z)|=4.89e+22
|
| 170 |
+
LA-F it=14 max_err=1.85e+04 min_gap=8.50e-04 |p(z)|=3.95e+25
|
| 171 |
+
LA-F it=19 max_err=5.63e+04 min_gap=1.92e-02 |p(z)|=3.19e+28
|
| 172 |
+
|
| 173 |
+
--- Test 3: Laguerre + weak Aberth (0.1x) ---
|
| 174 |
+
LA.1 it= 0 max_err=2.89e+01 min_gap=5.69e-05 |p(z)|=7.50e+08
|
| 175 |
+
LA.1 it= 1 max_err=2.09e+01 min_gap=2.84e-06 |p(z)|=1.23e+08
|
| 176 |
+
LA.1 it= 2 max_err=1.35e+01 min_gap=6.74e-07 |p(z)|=1.06e+07
|
| 177 |
+
LA.1 it= 3 max_err=6.44e+00 min_gap=4.80e-08 |p(z)|=2.25e+05
|
| 178 |
+
LA.1 it= 4 max_err=1.89e+00 min_gap=4.16e-09 |p(z)|=1.99e+01
|
| 179 |
+
LA.1 it= 9 max_err=1.90e+00 min_gap=1.45e-14 |p(z)|=4.90e-03
|
| 180 |
+
LA.1 it=14 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=6.48e-04
|
| 181 |
+
LA.1 it=19 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=8.53e-05
|
| 182 |
+
|
| 183 |
+
--- Test 4: Pure Laguerre + re-sort ---
|
| 184 |
+
PL+S it= 0 max_err=1.75e+00 min_gap=1.73e-06 |p(z)|=3.93e+00
|
| 185 |
+
PL+S it= 1 max_err=1.84e+00 min_gap=1.67e-16 |p(z)|=5.39e-01
|
| 186 |
+
PL+S it= 2 max_err=1.89e+00 min_gap=0.00e+00 |p(z)|=2.01e-02
|
| 187 |
+
PL+S it= 3 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=1.73e-04
|
| 188 |
+
PL+S it= 4 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=7.12e-07
|
| 189 |
+
PL+S it= 9 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=2.88e-15
|
| 190 |
+
PL+S it=14 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=2.88e-15
|
| 191 |
+
PL+S it=19 max_err=1.90e+00 min_gap=0.00e+00 |p(z)|=2.88e-15
|
| 192 |
+
|
| 193 |
+
--- Test 5: Laguerre + Aberth damped (0.1 → 1.0) ---
|
| 194 |
+
LADa it= 0 max_err=2.89e+01 min_gap=5.69e-05 |p(z)|=7.50e+08
|
| 195 |
+
LADa it= 1 max_err=3.72e+02 min_gap=3.82e-06 |p(z)|=2.69e+15
|
| 196 |
+
LADa it= 2 max_err=1.13e+03 min_gap=4.82e-06 |p(z)|=2.08e+18
|
| 197 |
+
LADa it= 3 max_err=2.26e+03 min_gap=1.90e-06 |p(z)|=1.34e+20
|
| 198 |
+
LADa it= 4 max_err=3.77e+03 min_gap=9.02e-07 |p(z)|=2.88e+21
|
| 199 |
+
LADa it= 9 max_err=1.70e+04 min_gap=2.34e-05 |p(z)|=2.38e+25
|
| 200 |
+
LADa it=14 max_err=5.17e+04 min_gap=2.25e-03 |p(z)|=1.91e+28
|
| 201 |
+
LADa it=19 max_err=1.58e+05 min_gap=8.72e-03 |p(z)|=1.54e+31
|
| 202 |
+
|
| 203 |
+
--- Test 6: Newton + Aberth ---
|
| 204 |
+
NwAb it= 0 max_err=4.35e+02 min_gap=3.29e-05 |p(z)|=6.91e+15
|
| 205 |
+
NwAb it= 1 max_err=1.57e+01 min_gap=9.86e-05 |p(z)|=2.43e+07
|
| 206 |
+
NwAb it= 2 max_err=5.28e+01 min_gap=1.70e-05 |p(z)|=2.54e+10
|
| 207 |
+
NwAb it= 3 max_err=5.37e+01 min_gap=5.22e-05 |p(z)|=2.75e+10
|
| 208 |
+
NwAb it= 4 max_err=3.34e+02 min_gap=1.91e-04 |p(z)|=1.41e+15
|
| 209 |
+
NwAb it= 9 max_err=2.02e+00 min_gap=1.92e-02 |p(z)|=6.78e+02
|
| 210 |
+
NwAb it=14 max_err=1.05e-06 min_gap=1.92e-02 |p(z)|=1.24e-14
|
| 211 |
+
NwAb it=19 max_err=1.05e-06 min_gap=1.92e-02 |p(z)|=1.24e-14
|
| 212 |
+
|
| 213 |
+
--- Test 7: Pure Newton ---
|
| 214 |
+
PurN it= 0 max_err=3.51e+02 min_gap=6.24e-06 |p(z)|=1.93e+15
|
| 215 |
+
PurN it= 1 max_err=2.93e+02 min_gap=1.69e-09 |p(z)|=6.46e+14
|
| 216 |
+
PurN it= 2 max_err=2.44e+02 min_gap=0.00e+00 |p(z)|=2.16e+14
|
| 217 |
+
PurN it= 3 max_err=2.03e+02 min_gap=0.00e+00 |p(z)|=7.25e+13
|
| 218 |
+
PurN it= 4 max_err=1.69e+02 min_gap=0.00e+00 |p(z)|=2.43e+13
|
| 219 |
+
PurN it= 9 max_err=6.69e+01 min_gap=0.00e+00 |p(z)|=1.02e+11
|
| 220 |
+
PurN it=14 max_err=2.60e+01 min_gap=0.00e+00 |p(z)|=4.31e+08
|
| 221 |
+
PurN it=19 max_err=9.63e+00 min_gap=0.00e+00 |p(z)|=1.81e+06
|
| 222 |
+
==============================================================================
|
| 223 |
+
|
| 224 |
```
|