tmp
/
pip-install-ghxuqwgs
/numpy_78e94bf2b6094bf9a1f3d92042f9bf46
/numpy
/linalg
/umath_linalg.c.src
| /* -*- c -*- */ | |
| /* | |
| ***************************************************************************** | |
| ** INCLUDES ** | |
| ***************************************************************************** | |
| */ | |
| #define NPY_NO_DEPRECATED_API NPY_API_VERSION | |
| #include "Python.h" | |
| #include "numpy/arrayobject.h" | |
| #include "numpy/ufuncobject.h" | |
| #include "npy_pycompat.h" | |
| #include "npy_config.h" | |
| #include <stddef.h> | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include <math.h> | |
| static const char* umath_linalg_version_string = "0.1.4"; | |
| /* | |
| **************************************************************************** | |
| * Debugging support * | |
| **************************************************************************** | |
| */ | |
| #define TRACE_TXT(...) do { fprintf (stderr, __VA_ARGS__); } while (0) | |
| #define STACK_TRACE do {} while (0) | |
| #define TRACE\ | |
| do { \ | |
| fprintf (stderr, \ | |
| "%s:%d:%s\n", \ | |
| __FILE__, \ | |
| __LINE__, \ | |
| __FUNCTION__); \ | |
| STACK_TRACE; \ | |
| } while (0) | |
| #if 0 | |
| #include <execinfo.h> | |
| void | |
| dbg_stack_trace() | |
| { | |
| void *trace[32]; | |
| size_t size; | |
| size = backtrace(trace, sizeof(trace)/sizeof(trace[0])); | |
| backtrace_symbols_fd(trace, size, 1); | |
| } | |
| #undef STACK_TRACE | |
| #define STACK_TRACE do { dbg_stack_trace(); } while (0) | |
| #endif | |
| /* | |
| ***************************************************************************** | |
| * BLAS/LAPACK calling macros * | |
| ***************************************************************************** | |
| */ | |
| #ifdef NO_APPEND_FORTRAN | |
| # define FNAME(x) x | |
| #else | |
| # define FNAME(x) x##_ | |
| #endif | |
| typedef struct { float r, i; } f2c_complex; | |
| typedef struct { double r, i; } f2c_doublecomplex; | |
| /* typedef long int (*L_fp)(); */ | |
| extern int | |
| FNAME(sgeev)(char *jobvl, char *jobvr, int *n, | |
| float a[], int *lda, float wr[], float wi[], | |
| float vl[], int *ldvl, float vr[], int *ldvr, | |
| float work[], int lwork[], | |
| int *info); | |
| extern int | |
| FNAME(dgeev)(char *jobvl, char *jobvr, int *n, | |
| double a[], int *lda, double wr[], double wi[], | |
| double vl[], int *ldvl, double vr[], int *ldvr, | |
| double work[], int lwork[], | |
| int *info); | |
| extern int | |
| FNAME(cgeev)(char *jobvl, char *jobvr, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| f2c_doublecomplex w[], | |
| f2c_doublecomplex vl[], int *ldvl, | |
| f2c_doublecomplex vr[], int *ldvr, | |
| f2c_doublecomplex work[], int *lwork, | |
| double rwork[], | |
| int *info); | |
| extern int | |
| FNAME(zgeev)(char *jobvl, char *jobvr, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| f2c_doublecomplex w[], | |
| f2c_doublecomplex vl[], int *ldvl, | |
| f2c_doublecomplex vr[], int *ldvr, | |
| f2c_doublecomplex work[], int *lwork, | |
| double rwork[], | |
| int *info); | |
| extern int | |
| FNAME(ssyevd)(char *jobz, char *uplo, int *n, | |
| float a[], int *lda, float w[], float work[], | |
| int *lwork, int iwork[], int *liwork, | |
| int *info); | |
| extern int | |
| FNAME(dsyevd)(char *jobz, char *uplo, int *n, | |
| double a[], int *lda, double w[], double work[], | |
| int *lwork, int iwork[], int *liwork, | |
| int *info); | |
| extern int | |
| FNAME(cheevd)(char *jobz, char *uplo, int *n, | |
| f2c_complex a[], int *lda, | |
| float w[], f2c_complex work[], | |
| int *lwork, float rwork[], int *lrwork, int iwork[], | |
| int *liwork, | |
| int *info); | |
| extern int | |
| FNAME(zheevd)(char *jobz, char *uplo, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| double w[], f2c_doublecomplex work[], | |
| int *lwork, double rwork[], int *lrwork, int iwork[], | |
| int *liwork, | |
| int *info); | |
| extern int | |
| FNAME(dgelsd)(int *m, int *n, int *nrhs, | |
| double a[], int *lda, double b[], int *ldb, | |
| double s[], double *rcond, int *rank, | |
| double work[], int *lwork, int iwork[], | |
| int *info); | |
| extern int | |
| FNAME(zgelsd)(int *m, int *n, int *nrhs, | |
| f2c_doublecomplex a[], int *lda, | |
| f2c_doublecomplex b[], int *ldb, | |
| double s[], double *rcond, int *rank, | |
| f2c_doublecomplex work[], int *lwork, | |
| double rwork[], int iwork[], | |
| int *info); | |
| extern int | |
| FNAME(sgesv)(int *n, int *nrhs, | |
| float a[], int *lda, | |
| int ipiv[], | |
| float b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(dgesv)(int *n, int *nrhs, | |
| double a[], int *lda, | |
| int ipiv[], | |
| double b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(cgesv)(int *n, int *nrhs, | |
| f2c_complex a[], int *lda, | |
| int ipiv[], | |
| f2c_complex b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(zgesv)(int *n, int *nrhs, | |
| f2c_doublecomplex a[], int *lda, | |
| int ipiv[], | |
| f2c_doublecomplex b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(sgetrf)(int *m, int *n, | |
| float a[], int *lda, | |
| int ipiv[], | |
| int *info); | |
| extern int | |
| FNAME(dgetrf)(int *m, int *n, | |
| double a[], int *lda, | |
| int ipiv[], | |
| int *info); | |
| extern int | |
| FNAME(cgetrf)(int *m, int *n, | |
| f2c_complex a[], int *lda, | |
| int ipiv[], | |
| int *info); | |
| extern int | |
| FNAME(zgetrf)(int *m, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| int ipiv[], | |
| int *info); | |
| extern int | |
| FNAME(spotrf)(char *uplo, int *n, | |
| float a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(dpotrf)(char *uplo, int *n, | |
| double a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(cpotrf)(char *uplo, int *n, | |
| f2c_complex a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(zpotrf)(char *uplo, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(sgesdd)(char *jobz, int *m, int *n, | |
| float a[], int *lda, float s[], float u[], | |
| int *ldu, float vt[], int *ldvt, float work[], | |
| int *lwork, int iwork[], int *info); | |
| extern int | |
| FNAME(dgesdd)(char *jobz, int *m, int *n, | |
| double a[], int *lda, double s[], double u[], | |
| int *ldu, double vt[], int *ldvt, double work[], | |
| int *lwork, int iwork[], int *info); | |
| extern int | |
| FNAME(cgesdd)(char *jobz, int *m, int *n, | |
| f2c_complex a[], int *lda, | |
| float s[], f2c_complex u[], int *ldu, | |
| f2c_complex vt[], int *ldvt, | |
| f2c_complex work[], int *lwork, | |
| float rwork[], int iwork[], int *info); | |
| extern int | |
| FNAME(zgesdd)(char *jobz, int *m, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| double s[], f2c_doublecomplex u[], int *ldu, | |
| f2c_doublecomplex vt[], int *ldvt, | |
| f2c_doublecomplex work[], int *lwork, | |
| double rwork[], int iwork[], int *info); | |
| extern int | |
| FNAME(spotrs)(char *uplo, int *n, int *nrhs, | |
| float a[], int *lda, | |
| float b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(dpotrs)(char *uplo, int *n, int *nrhs, | |
| double a[], int *lda, | |
| double b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(cpotrs)(char *uplo, int *n, int *nrhs, | |
| f2c_complex a[], int *lda, | |
| f2c_complex b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(zpotrs)(char *uplo, int *n, int *nrhs, | |
| f2c_doublecomplex a[], int *lda, | |
| f2c_doublecomplex b[], int *ldb, | |
| int *info); | |
| extern int | |
| FNAME(spotri)(char *uplo, int *n, | |
| float a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(dpotri)(char *uplo, int *n, | |
| double a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(cpotri)(char *uplo, int *n, | |
| f2c_complex a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(zpotri)(char *uplo, int *n, | |
| f2c_doublecomplex a[], int *lda, | |
| int *info); | |
| extern int | |
| FNAME(scopy)(int *n, | |
| float *sx, int *incx, | |
| float *sy, int *incy); | |
| extern int | |
| FNAME(dcopy)(int *n, | |
| double *sx, int *incx, | |
| double *sy, int *incy); | |
| extern int | |
| FNAME(ccopy)(int *n, | |
| f2c_complex *sx, int *incx, | |
| f2c_complex *sy, int *incy); | |
| extern int | |
| FNAME(zcopy)(int *n, | |
| f2c_doublecomplex *sx, int *incx, | |
| f2c_doublecomplex *sy, int *incy); | |
| extern float | |
| FNAME(sdot)(int *n, | |
| float *sx, int *incx, | |
| float *sy, int *incy); | |
| extern double | |
| FNAME(ddot)(int *n, | |
| double *sx, int *incx, | |
| double *sy, int *incy); | |
| extern f2c_complex | |
| FNAME(cdotu)(int *n, | |
| f2c_complex *sx, int *incx, | |
| f2c_complex *sy, int *incy); | |
| extern f2c_doublecomplex | |
| FNAME(zdotu)(int *n, | |
| f2c_doublecomplex *sx, int *incx, | |
| f2c_doublecomplex *sy, int *incy); | |
| extern f2c_complex | |
| FNAME(cdotc)(int *n, | |
| f2c_complex *sx, int *incx, | |
| f2c_complex *sy, int *incy); | |
| extern f2c_doublecomplex | |
| FNAME(zdotc)(int *n, | |
| f2c_doublecomplex *sx, int *incx, | |
| f2c_doublecomplex *sy, int *incy); | |
| extern int | |
| FNAME(sgemm)(char *transa, char *transb, | |
| int *m, int *n, int *k, | |
| float *alpha, | |
| float *a, int *lda, | |
| float *b, int *ldb, | |
| float *beta, | |
| float *c, int *ldc); | |
| extern int | |
| FNAME(dgemm)(char *transa, char *transb, | |
| int *m, int *n, int *k, | |
| double *alpha, | |
| double *a, int *lda, | |
| double *b, int *ldb, | |
| double *beta, | |
| double *c, int *ldc); | |
| extern int | |
| FNAME(cgemm)(char *transa, char *transb, | |
| int *m, int *n, int *k, | |
| f2c_complex *alpha, | |
| f2c_complex *a, int *lda, | |
| f2c_complex *b, int *ldb, | |
| f2c_complex *beta, | |
| f2c_complex *c, int *ldc); | |
| extern int | |
| FNAME(zgemm)(char *transa, char *transb, | |
| int *m, int *n, int *k, | |
| f2c_doublecomplex *alpha, | |
| f2c_doublecomplex *a, int *lda, | |
| f2c_doublecomplex *b, int *ldb, | |
| f2c_doublecomplex *beta, | |
| f2c_doublecomplex *c, int *ldc); | |
| #define LAPACK_T(FUNC) \ | |
| TRACE_TXT("Calling LAPACK ( " # FUNC " )\n"); \ | |
| FNAME(FUNC) | |
| #define BLAS(FUNC) \ | |
| FNAME(FUNC) | |
| #define LAPACK(FUNC) \ | |
| FNAME(FUNC) | |
| typedef int fortran_int; | |
| typedef float fortran_real; | |
| typedef double fortran_doublereal; | |
| typedef f2c_complex fortran_complex; | |
| typedef f2c_doublecomplex fortran_doublecomplex; | |
| /* | |
| ***************************************************************************** | |
| ** Some handy functions ** | |
| ***************************************************************************** | |
| */ | |
| static inline void * | |
| offset_ptr(void* ptr, ptrdiff_t offset) | |
| { | |
| return (void*)((npy_uint8*)ptr + offset); | |
| } | |
| static inline int | |
| get_fp_invalid_and_clear(void) | |
| { | |
| int status; | |
| status = npy_clear_floatstatus(); | |
| return !!(status & NPY_FPE_INVALID); | |
| } | |
| static inline void | |
| set_fp_invalid_or_clear(int error_occurred) | |
| { | |
| if (error_occurred) { | |
| npy_set_floatstatus_invalid(); | |
| } | |
| else { | |
| npy_clear_floatstatus(); | |
| } | |
| } | |
| /* | |
| ***************************************************************************** | |
| ** Some handy constants ** | |
| ***************************************************************************** | |
| */ | |
| #define UMATH_LINALG_MODULE_NAME "_umath_linalg" | |
| typedef union { | |
| fortran_complex f; | |
| npy_cfloat npy; | |
| float array[2]; | |
| } COMPLEX_t; | |
| typedef union { | |
| fortran_doublecomplex f; | |
| npy_cdouble npy; | |
| double array[2]; | |
| } DOUBLECOMPLEX_t; | |
| static float s_one; | |
| static float s_zero; | |
| static float s_minus_one; | |
| static float s_ninf; | |
| static float s_nan; | |
| static double d_one; | |
| static double d_zero; | |
| static double d_minus_one; | |
| static double d_ninf; | |
| static double d_nan; | |
| static COMPLEX_t c_one; | |
| static COMPLEX_t c_zero; | |
| static COMPLEX_t c_minus_one; | |
| static COMPLEX_t c_ninf; | |
| static COMPLEX_t c_nan; | |
| static DOUBLECOMPLEX_t z_one; | |
| static DOUBLECOMPLEX_t z_zero; | |
| static DOUBLECOMPLEX_t z_minus_one; | |
| static DOUBLECOMPLEX_t z_ninf; | |
| static DOUBLECOMPLEX_t z_nan; | |
| static void init_constants(void) | |
| { | |
| /* | |
| this is needed as NPY_INFINITY and NPY_NAN macros | |
| can't be used as initializers. I prefer to just set | |
| all the constants the same way. | |
| */ | |
| s_one = 1.0f; | |
| s_zero = 0.0f; | |
| s_minus_one = -1.0f; | |
| s_ninf = -NPY_INFINITYF; | |
| s_nan = NPY_NANF; | |
| d_one = 1.0; | |
| d_zero = 0.0; | |
| d_minus_one = -1.0; | |
| d_ninf = -NPY_INFINITY; | |
| d_nan = NPY_NAN; | |
| c_one.array[0] = 1.0f; | |
| c_one.array[1] = 0.0f; | |
| c_zero.array[0] = 0.0f; | |
| c_zero.array[1] = 0.0f; | |
| c_minus_one.array[0] = -1.0f; | |
| c_minus_one.array[1] = 0.0f; | |
| c_ninf.array[0] = -NPY_INFINITYF; | |
| c_ninf.array[1] = 0.0f; | |
| c_nan.array[0] = NPY_NANF; | |
| c_nan.array[1] = NPY_NANF; | |
| z_one.array[0] = 1.0; | |
| z_one.array[1] = 0.0; | |
| z_zero.array[0] = 0.0; | |
| z_zero.array[1] = 0.0; | |
| z_minus_one.array[0] = -1.0; | |
| z_minus_one.array[1] = 0.0; | |
| z_ninf.array[0] = -NPY_INFINITY; | |
| z_ninf.array[1] = 0.0; | |
| z_nan.array[0] = NPY_NAN; | |
| z_nan.array[1] = NPY_NAN; | |
| } | |
| /* | |
| ***************************************************************************** | |
| ** Structs used for data rearrangement ** | |
| ***************************************************************************** | |
| */ | |
| /* this struct contains information about how to linearize in a local buffer | |
| a matrix so that it can be used by blas functions. | |
| All strides are specified in number of elements (similar to what blas | |
| expects) | |
| dst_row_strides: number of elements between different row. Matrix is | |
| considered row-major | |
| dst_column_strides: number of elements between differnt columns in the | |
| destination buffer | |
| rows: number of rows of the matrix | |
| columns: number of columns of the matrix | |
| src_row_strides: strides needed to access the next row in the source matrix | |
| src_column_strides: strides needed to access the next column in the source | |
| matrix | |
| */ | |
| typedef struct linearize_data_struct | |
| { | |
| size_t rows; | |
| size_t columns; | |
| ptrdiff_t row_strides; | |
| ptrdiff_t column_strides; | |
| } LINEARIZE_DATA_t; | |
| static inline void | |
| init_linearize_data(LINEARIZE_DATA_t *lin_data, | |
| int rows, | |
| int columns, | |
| ptrdiff_t row_strides, | |
| ptrdiff_t column_strides) | |
| { | |
| lin_data->rows = rows; | |
| lin_data->columns = columns; | |
| lin_data->row_strides = row_strides; | |
| lin_data->column_strides = column_strides; | |
| } | |
| static inline void | |
| dump_ufunc_object(PyUFuncObject* ufunc) | |
| { | |
| TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n", | |
| ufunc->core_enabled? "generalized ufunc" : "scalar ufunc", | |
| ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes); | |
| if (ufunc->core_enabled) { | |
| int arg; | |
| int dim; | |
| TRACE_TXT("\t%s (%d dimension(s) detected).\n", | |
| ufunc->core_signature, ufunc->core_num_dim_ix); | |
| for (arg = 0; arg < ufunc->nargs; arg++){ | |
| int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg]; | |
| TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (", | |
| arg, arg < ufunc->nin? "INPUT" : "OUTPUT", | |
| ufunc->core_num_dims[arg]); | |
| for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) { | |
| TRACE_TXT(" %d", arg_dim_ix[dim]); | |
| } | |
| TRACE_TXT(" )\n"); | |
| } | |
| } | |
| } | |
| static inline void | |
| dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) | |
| { | |
| TRACE_TXT("\n\t%s rows: %zd columns: %zd"\ | |
| "\n\t\trow_strides: %td column_strides: %td"\ | |
| "\n", name, params->rows, params->columns, | |
| params->row_strides, params->column_strides); | |
| } | |
| static inline float | |
| FLOAT_add(float op1, float op2) | |
| { | |
| return op1 + op2; | |
| } | |
| static inline double | |
| DOUBLE_add(double op1, double op2) | |
| { | |
| return op1 + op2; | |
| } | |
| static inline COMPLEX_t | |
| CFLOAT_add(COMPLEX_t op1, COMPLEX_t op2) | |
| { | |
| COMPLEX_t result; | |
| result.array[0] = op1.array[0] + op2.array[0]; | |
| result.array[1] = op1.array[1] + op2.array[1]; | |
| return result; | |
| } | |
| static inline DOUBLECOMPLEX_t | |
| CDOUBLE_add(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
| { | |
| DOUBLECOMPLEX_t result; | |
| result.array[0] = op1.array[0] + op2.array[0]; | |
| result.array[1] = op1.array[1] + op2.array[1]; | |
| return result; | |
| } | |
| static inline float | |
| FLOAT_mul(float op1, float op2) | |
| { | |
| return op1*op2; | |
| } | |
| static inline double | |
| DOUBLE_mul(double op1, double op2) | |
| { | |
| return op1*op2; | |
| } | |
| static inline COMPLEX_t | |
| CFLOAT_mul(COMPLEX_t op1, COMPLEX_t op2) | |
| { | |
| COMPLEX_t result; | |
| result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; | |
| result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; | |
| return result; | |
| } | |
| static inline DOUBLECOMPLEX_t | |
| CDOUBLE_mul(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
| { | |
| DOUBLECOMPLEX_t result; | |
| result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; | |
| result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; | |
| return result; | |
| } | |
| static inline float | |
| FLOAT_mulc(float op1, float op2) | |
| { | |
| return op1*op2; | |
| } | |
| static inline double | |
| DOUBLE_mulc(float op1, float op2) | |
| { | |
| return op1*op2; | |
| } | |
| static inline COMPLEX_t | |
| CFLOAT_mulc(COMPLEX_t op1, COMPLEX_t op2) | |
| { | |
| COMPLEX_t result; | |
| result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; | |
| result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; | |
| return result; | |
| } | |
| static inline DOUBLECOMPLEX_t | |
| CDOUBLE_mulc(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
| { | |
| DOUBLECOMPLEX_t result; | |
| result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; | |
| result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; | |
| return result; | |
| } | |
| static inline void | |
| print_FLOAT(npy_float s) | |
| { | |
| TRACE_TXT(" %8.4f", s); | |
| } | |
| static inline void | |
| print_DOUBLE(npy_double d) | |
| { | |
| TRACE_TXT(" %10.6f", d); | |
| } | |
| static inline void | |
| print_CFLOAT(npy_cfloat c) | |
| { | |
| float* c_parts = (float*)&c; | |
| TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]); | |
| } | |
| static inline void | |
| print_CDOUBLE(npy_cdouble z) | |
| { | |
| double* z_parts = (double*)&z; | |
| TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]); | |
| } | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
| */ | |
| static inline void | |
| dump_@TYPE@_matrix(const char* name, | |
| size_t rows, size_t columns, | |
| const @typ@* ptr) | |
| { | |
| size_t i,j; | |
| TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); | |
| for (i=0; i<rows; i++) | |
| { | |
| TRACE_TXT("| "); | |
| for (j=0; j<columns; j++) | |
| { | |
| print_@TYPE@(ptr[j*rows + i]); | |
| TRACE_TXT(", "); | |
| } | |
| TRACE_TXT(" |\n"); | |
| } | |
| } | |
| /**end repeat**/ | |
| /* | |
| ***************************************************************************** | |
| ** Basics ** | |
| ***************************************************************************** | |
| */ | |
| #define INIT_OUTER_LOOP_1 \ | |
| npy_intp dN = *dimensions++;\ | |
| npy_intp N_;\ | |
| npy_intp s0 = *steps++; | |
| #define INIT_OUTER_LOOP_2 \ | |
| INIT_OUTER_LOOP_1\ | |
| npy_intp s1 = *steps++; | |
| #define INIT_OUTER_LOOP_3 \ | |
| INIT_OUTER_LOOP_2\ | |
| npy_intp s2 = *steps++; | |
| #define INIT_OUTER_LOOP_4 \ | |
| INIT_OUTER_LOOP_3\ | |
| npy_intp s3 = *steps++; | |
| #define INIT_OUTER_LOOP_5 \ | |
| INIT_OUTER_LOOP_4\ | |
| npy_intp s4 = *steps++; | |
| #define INIT_OUTER_LOOP_6 \ | |
| INIT_OUTER_LOOP_5\ | |
| npy_intp s5 = *steps++; | |
| #define BEGIN_OUTER_LOOP_2 \ | |
| for (N_ = 0;\ | |
| N_ < dN;\ | |
| N_++, args[0] += s0,\ | |
| args[1] += s1) { | |
| #define BEGIN_OUTER_LOOP_3 \ | |
| for (N_ = 0;\ | |
| N_ < dN;\ | |
| N_++, args[0] += s0,\ | |
| args[1] += s1,\ | |
| args[2] += s2) { | |
| #define BEGIN_OUTER_LOOP_4 \ | |
| for (N_ = 0;\ | |
| N_ < dN;\ | |
| N_++, args[0] += s0,\ | |
| args[1] += s1,\ | |
| args[2] += s2,\ | |
| args[3] += s3) { | |
| #define BEGIN_OUTER_LOOP_5 \ | |
| for (N_ = 0;\ | |
| N_ < dN;\ | |
| N_++, args[0] += s0,\ | |
| args[1] += s1,\ | |
| args[2] += s2,\ | |
| args[3] += s3,\ | |
| args[4] += s4) { | |
| #define BEGIN_OUTER_LOOP_6 \ | |
| for (N_ = 0;\ | |
| N_ < dN;\ | |
| N_++, args[0] += s0,\ | |
| args[1] += s1,\ | |
| args[2] += s2,\ | |
| args[3] += s3,\ | |
| args[4] += s4,\ | |
| args[5] += s5) { | |
| #define END_OUTER_LOOP } | |
| static inline void | |
| update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count) | |
| { | |
| size_t i; | |
| for (i=0; i < count; ++i) { | |
| bases[i] += offsets[i]; | |
| } | |
| } | |
| /* disable -Wmaybe-uninitialized as there is some code that generate false | |
| positives with this warning | |
| */ | |
| #pragma GCC diagnostic push | |
| #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" | |
| /* | |
| ***************************************************************************** | |
| ** HELPER FUNCS ** | |
| ***************************************************************************** | |
| */ | |
| /* rearranging of 2D matrices using blas */ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# | |
| #copy=scopy,dcopy,ccopy,zcopy# | |
| #nan=s_nan, d_nan, c_nan, z_nan# | |
| */ | |
| static inline void * | |
| linearize_@TYPE@_matrix(void *dst_in, | |
| void *src_in, | |
| const LINEARIZE_DATA_t* data) | |
| { | |
| @typ@ *src = (@typ@ *) src_in; | |
| @typ@ *dst = (@typ@ *) dst_in; | |
| if (dst) { | |
| int i, j; | |
| @typ@* rv = dst; | |
| fortran_int columns = (fortran_int)data->columns; | |
| fortran_int column_strides = | |
| (fortran_int)(data->column_strides/sizeof(@typ@)); | |
| fortran_int one = 1; | |
| for (i=0; i< data->rows; i++) { | |
| if (column_strides > 0) { | |
| FNAME(@copy@)(&columns, | |
| (void*)src, &column_strides, | |
| (void*)dst, &one); | |
| } | |
| else if (column_strides < 0) { | |
| FNAME(@copy@)(&columns, | |
| (void*)((@typ@*)src + (columns-1)*column_strides), | |
| &column_strides, | |
| (void*)dst, &one); | |
| } | |
| else { | |
| /* | |
| * Zero stride has undefined behavior in some BLAS | |
| * implementations (e.g. OSX Accelerate), so do it | |
| * manually | |
| */ | |
| for (j = 0; j < columns; ++j) { | |
| memcpy((@typ@*)dst + j, (@typ@*)src, sizeof(@typ@)); | |
| } | |
| } | |
| src += data->row_strides/sizeof(@typ@); | |
| dst += data->columns; | |
| } | |
| return rv; | |
| } else { | |
| return src; | |
| } | |
| } | |
| static inline void * | |
| delinearize_@TYPE@_matrix(void *dst_in, | |
| void *src_in, | |
| const LINEARIZE_DATA_t* data) | |
| { | |
| @typ@ *src = (@typ@ *) src_in; | |
| @typ@ *dst = (@typ@ *) dst_in; | |
| if (src) { | |
| int i; | |
| @typ@ *rv = src; | |
| fortran_int columns = (fortran_int)data->columns; | |
| fortran_int column_strides = | |
| (fortran_int)(data->column_strides/sizeof(@typ@)); | |
| fortran_int one = 1; | |
| for (i=0; i < data->rows; i++) { | |
| if (column_strides > 0) { | |
| FNAME(@copy@)(&columns, | |
| (void*)src, &one, | |
| (void*)dst, &column_strides); | |
| } | |
| else if (column_strides < 0) { | |
| FNAME(@copy@)(&columns, | |
| (void*)src, &one, | |
| (void*)((@typ@*)dst + (columns-1)*column_strides), | |
| &column_strides); | |
| } | |
| else { | |
| /* | |
| * Zero stride has undefined behavior in some BLAS | |
| * implementations (e.g. OSX Accelerate), so do it | |
| * manually | |
| */ | |
| if (columns > 0) { | |
| memcpy((@typ@*)dst, (@typ@*)src + (columns-1), sizeof(@typ@)); | |
| } | |
| } | |
| src += data->columns; | |
| dst += data->row_strides/sizeof(@typ@); | |
| } | |
| return rv; | |
| } else { | |
| return src; | |
| } | |
| } | |
| static inline void | |
| nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) | |
| { | |
| @typ@ *dst = (@typ@ *) dst_in; | |
| int i,j; | |
| for (i=0; i < data->rows; i++) { | |
| @typ@ *cp = dst; | |
| ptrdiff_t cs = data->column_strides/sizeof(@typ@); | |
| for (j=0; j< data->columns; ++j) { | |
| *cp = @nan@; | |
| cp += cs; | |
| } | |
| dst += data->row_strides/sizeof(@typ@); | |
| } | |
| } | |
| /**end repeat**/ | |
| /* identity square matrix generation */ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# | |
| #cblas_type=s,d,c,z# | |
| */ | |
| static inline void | |
| identity_@TYPE@_matrix(void *ptr, size_t n) | |
| { | |
| size_t i; | |
| @typ@ *matrix = (@typ@*) ptr; | |
| /* in IEEE floating point, zeroes are represented as bitwise 0 */ | |
| memset(matrix, 0, n*n*sizeof(@typ@)); | |
| for (i = 0; i < n; ++i) | |
| { | |
| *matrix = @cblas_type@_one; | |
| matrix += n+1; | |
| } | |
| } | |
| /**end repeat**/ | |
| /* lower/upper triangular matrix using blas (in place) */ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# | |
| #cblas_type=s,d,c,z# | |
| */ | |
| static inline void | |
| triu_@TYPE@_matrix(void *ptr, size_t n) | |
| { | |
| size_t i,j; | |
| @typ@ *matrix = (@typ@*)ptr; | |
| matrix += n; | |
| for (i=1; i < n; ++i) { | |
| for (j=0; j<i; ++j) { | |
| matrix[j] = @cblas_type@_zero; | |
| } | |
| matrix += n; | |
| } | |
| } | |
| /**end repeat**/ | |
| /* -------------------------------------------------------------------------- */ | |
| /* Determinants */ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE# | |
| #typ=npy_float, npy_double# | |
| #log_func=npy_logf,npy_log# | |
| #exp_func=npy_expf,npy_exp# | |
| #zero=0.0f,0.0# | |
| */ | |
| static inline void | |
| @TYPE@_slogdet_from_factored_diagonal(@typ@* src, | |
| fortran_int m, | |
| @typ@ *sign, | |
| @typ@ *logdet) | |
| { | |
| @typ@ acc_sign = *sign; | |
| @typ@ acc_logdet = @zero@; | |
| int i; | |
| for (i = 0; i < m; i++) { | |
| @typ@ abs_element = *src; | |
| if (abs_element < @zero@) { | |
| acc_sign = -acc_sign; | |
| abs_element = -abs_element; | |
| } | |
| acc_logdet += @log_func@(abs_element); | |
| src += m+1; | |
| } | |
| *sign = acc_sign; | |
| *logdet = acc_logdet; | |
| } | |
| static inline @typ@ | |
| @TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet) | |
| { | |
| @typ@ result = sign * @exp_func@(logdet); | |
| return result; | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=CFLOAT,CDOUBLE# | |
| #typ=npy_cfloat, npy_cdouble# | |
| #basetyp=npy_float, npy_double# | |
| #abs_func=npy_cabsf, npy_cabs# | |
| #log_func=npy_logf, npy_log# | |
| #exp_func=npy_expf, npy_exp# | |
| #zero=0.0f,0.0# | |
| */ | |
| #define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0]) | |
| #define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1]) | |
| static inline @typ@ | |
| @TYPE@_mult(@typ@ op1, @typ@ op2) | |
| { | |
| @typ@ rv; | |
| RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2); | |
| IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2); | |
| return rv; | |
| } | |
| static inline void | |
| @TYPE@_slogdet_from_factored_diagonal(@typ@* src, | |
| fortran_int m, | |
| @typ@ *sign, | |
| @basetyp@ *logdet) | |
| { | |
| int i; | |
| @typ@ sign_acc = *sign; | |
| @basetyp@ logdet_acc = @zero@; | |
| for (i = 0; i < m; i++) | |
| { | |
| @basetyp@ abs_element = @abs_func@(*src); | |
| @typ@ sign_element; | |
| RE(sign_element) = RE(*src) / abs_element; | |
| IM(sign_element) = IM(*src) / abs_element; | |
| sign_acc = @TYPE@_mult(sign_acc, sign_element); | |
| logdet_acc += @log_func@(abs_element); | |
| src += m + 1; | |
| } | |
| *sign = sign_acc; | |
| *logdet = logdet_acc; | |
| } | |
| static inline @typ@ | |
| @TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet) | |
| { | |
| @typ@ tmp; | |
| RE(tmp) = @exp_func@(logdet); | |
| IM(tmp) = @zero@; | |
| return @TYPE@_mult(sign, tmp); | |
| } | |
| #undef RE | |
| #undef IM | |
| /**end repeat**/ | |
| /* As in the linalg package, the determinant is computed via LU factorization | |
| * using LAPACK. | |
| * slogdet computes sign + log(determinant). | |
| * det computes sign * exp(slogdet). | |
| */ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
| #basetyp=npy_float,npy_double,npy_float,npy_double# | |
| #cblas_type=s,d,c,z# | |
| */ | |
| static inline void | |
| @TYPE@_slogdet_single_element(fortran_int m, | |
| void* src, | |
| fortran_int* pivots, | |
| @typ@ *sign, | |
| @basetyp@ *logdet) | |
| { | |
| fortran_int info = 0; | |
| int i; | |
| /* note: done in place */ | |
| LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &m, pivots, &info); | |
| if (info == 0) | |
| { | |
| int change_sign = 0; | |
| /* note: fortran uses 1 based indexing */ | |
| for (i=0; i < m; i++) | |
| { | |
| change_sign += (pivots[i] != (i+1)); | |
| } | |
| memcpy(sign, | |
| (change_sign % 2)? | |
| &@cblas_type@_minus_one : | |
| &@cblas_type@_one | |
| , sizeof(*sign)); | |
| @TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet); | |
| } else { | |
| /* | |
| if getrf fails, use 0 as sign and -inf as logdet | |
| */ | |
| memcpy(sign, &@cblas_type@_zero, sizeof(*sign)); | |
| memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet)); | |
| } | |
| } | |
| static void | |
| @TYPE@_slogdet(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| fortran_int m; | |
| npy_uint8 *tmp_buff = NULL; | |
| size_t matrix_size; | |
| size_t pivot_size; | |
| /* notes: | |
| * matrix will need to be copied always, as factorization in lapack is | |
| * made inplace | |
| * matrix will need to be in column-major order, as expected by lapack | |
| * code (fortran) | |
| * always a square matrix | |
| * need to allocate memory for both, matrix_buffer and pivot buffer | |
| */ | |
| INIT_OUTER_LOOP_3 | |
| m = (fortran_int) dimensions[0]; | |
| matrix_size = m*m*sizeof(@typ@); | |
| pivot_size = m*sizeof(fortran_int); | |
| tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
| if (tmp_buff) | |
| { | |
| LINEARIZE_DATA_t lin_data; | |
| /* swapped steps to get matrix in FORTRAN order */ | |
| init_linearize_data(&lin_data, m, m, | |
| (ptrdiff_t)steps[1], | |
| (ptrdiff_t)steps[0]); | |
| BEGIN_OUTER_LOOP_3 | |
| linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); | |
| @TYPE@_slogdet_single_element(m, | |
| (void*)tmp_buff, | |
| (fortran_int*)(tmp_buff+matrix_size), | |
| (@typ@*)args[1], | |
| (@basetyp@*)args[2]); | |
| END_OUTER_LOOP | |
| free(tmp_buff); | |
| } | |
| } | |
| static void | |
| @TYPE@_det(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| fortran_int m; | |
| npy_uint8 *tmp_buff; | |
| size_t matrix_size; | |
| size_t pivot_size; | |
| /* notes: | |
| * matrix will need to be copied always, as factorization in lapack is | |
| * made inplace | |
| * matrix will need to be in column-major order, as expected by lapack | |
| * code (fortran) | |
| * always a square matrix | |
| * need to allocate memory for both, matrix_buffer and pivot buffer | |
| */ | |
| INIT_OUTER_LOOP_2 | |
| m = (fortran_int) dimensions[0]; | |
| matrix_size = m*m*sizeof(@typ@); | |
| pivot_size = m*sizeof(fortran_int); | |
| tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
| if (tmp_buff) | |
| { | |
| LINEARIZE_DATA_t lin_data; | |
| @typ@ sign; | |
| @basetyp@ logdet; | |
| /* swapped steps to get matrix in FORTRAN order */ | |
| init_linearize_data(&lin_data, m, m, | |
| (ptrdiff_t)steps[1], | |
| (ptrdiff_t)steps[0]); | |
| BEGIN_OUTER_LOOP_2 | |
| linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); | |
| @TYPE@_slogdet_single_element(m, | |
| (void*)tmp_buff, | |
| (fortran_int*)(tmp_buff+matrix_size), | |
| &sign, | |
| &logdet); | |
| *(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet); | |
| END_OUTER_LOOP | |
| free(tmp_buff); | |
| } | |
| } | |
| /**end repeat**/ | |
| /* -------------------------------------------------------------------------- */ | |
| /* Eigh family */ | |
| typedef struct eigh_params_struct { | |
| void *A; /* matrix */ | |
| void *W; /* eigenvalue vector */ | |
| void *WORK; /* main work buffer */ | |
| void *RWORK; /* secondary work buffer (for complex versions) */ | |
| void *IWORK; | |
| fortran_int N; | |
| fortran_int LWORK; | |
| fortran_int LRWORK; | |
| fortran_int LIWORK; | |
| char JOBZ; | |
| char UPLO; | |
| } EIGH_PARAMS_t; | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE# | |
| #typ=npy_float,npy_double# | |
| #ftyp=fortran_real,fortran_doublereal# | |
| #lapack_func=ssyevd,dsyevd# | |
| */ | |
| /* | |
| * Initialize the parameters to use in for the lapack function _syevd | |
| * Handles buffer allocation | |
| */ | |
| static inline int | |
| init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, | |
| fortran_int N) | |
| { | |
| npy_uint8 *mem_buff = NULL; | |
| npy_uint8 *mem_buff2 = NULL; | |
| @typ@ query_work_size; | |
| fortran_int query_iwork_size; | |
| fortran_int lwork = -1; | |
| fortran_int liwork = -1; | |
| fortran_int info; | |
| npy_uint8 *a, *w, *work, *iwork; | |
| size_t alloc_size = N*(N+1)*sizeof(@typ@); | |
| mem_buff = malloc(alloc_size); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| w = mem_buff + N*N*sizeof(@typ@); | |
| LAPACK(@lapack_func@)(&JOBZ, &UPLO, &N, | |
| (@ftyp@*)a, &N, (@ftyp@*)w, | |
| &query_work_size, &lwork, | |
| &query_iwork_size, &liwork, | |
| &info); | |
| if (info != 0) | |
| goto error; | |
| work = mem_buff; | |
| lwork = (fortran_int)query_work_size; | |
| liwork = query_iwork_size; | |
| mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int)); | |
| if (!mem_buff2) | |
| goto error; | |
| work = mem_buff2; | |
| iwork = mem_buff2 + lwork*sizeof(@typ@); | |
| params->A = a; | |
| params->W = w; | |
| params->WORK = work; | |
| params->RWORK = NULL; /* unused */ | |
| params->IWORK = iwork; | |
| params->N = N; | |
| params->LWORK = lwork; | |
| params->LRWORK = 0; /* unused */ | |
| params->LIWORK = liwork; | |
| params->JOBZ = JOBZ; | |
| params->UPLO = UPLO; | |
| return 1; | |
| error: | |
| /* something failed */ | |
| memset(params, 0, sizeof(*params)); | |
| free(mem_buff2); | |
| free(mem_buff); | |
| return 0; | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(EIGH_PARAMS_t *params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
| params->A, ¶ms->N, params->W, | |
| params->WORK, ¶ms->LWORK, | |
| params->IWORK, ¶ms->LIWORK, | |
| &rv); | |
| return rv; | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=CFLOAT,CDOUBLE# | |
| #typ=npy_cfloat,npy_cdouble# | |
| #basetyp=npy_float,npy_double# | |
| #ftyp=fortran_complex,fortran_doublecomplex# | |
| #fbasetyp=fortran_real,fortran_doublereal# | |
| #lapack_func=cheevd,zheevd# | |
| */ | |
| /* | |
| * Initialize the parameters to use in for the lapack function _heev | |
| * Handles buffer allocation | |
| */ | |
| static inline int | |
| init_@lapack_func@(EIGH_PARAMS_t *params, | |
| char JOBZ, | |
| char UPLO, | |
| fortran_int N) | |
| { | |
| npy_uint8 *mem_buff = NULL; | |
| npy_uint8 *mem_buff2 = NULL; | |
| @ftyp@ query_work_size; | |
| @fbasetyp@ query_rwork_size; | |
| fortran_int query_iwork_size; | |
| fortran_int lwork = -1; | |
| fortran_int lrwork = -1; | |
| fortran_int liwork = -1; | |
| npy_uint8 *a, *w, *work, *rwork, *iwork; | |
| fortran_int info; | |
| mem_buff = malloc(N*N*sizeof(@typ@)+N*sizeof(@basetyp@)); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| w = mem_buff+N*N*sizeof(@typ@); | |
| LAPACK(@lapack_func@)(&JOBZ, &UPLO, &N, | |
| (@ftyp@*)a, &N, (@fbasetyp@*)w, | |
| &query_work_size, &lwork, | |
| &query_rwork_size, &lrwork, | |
| &query_iwork_size, &liwork, | |
| &info); | |
| if (info != 0) | |
| goto error; | |
| lwork = (fortran_int)*(@fbasetyp@*)&query_work_size; | |
| lrwork = (fortran_int)query_rwork_size; | |
| liwork = query_iwork_size; | |
| mem_buff2 = malloc(lwork*sizeof(@typ@) + | |
| lrwork*sizeof(@basetyp@) + | |
| liwork*sizeof(fortran_int)); | |
| if (!mem_buff2) | |
| goto error; | |
| work = mem_buff2; | |
| rwork = work + lwork*sizeof(@typ@); | |
| iwork = rwork + lrwork*sizeof(@basetyp@); | |
| params->A = a; | |
| params->W = w; | |
| params->WORK = work; | |
| params->RWORK = rwork; | |
| params->IWORK = iwork; | |
| params->N = N; | |
| params->LWORK = lwork; | |
| params->LRWORK = lrwork; | |
| params->LIWORK = liwork; | |
| params->JOBZ = JOBZ; | |
| params->UPLO = UPLO; | |
| return 1; | |
| /* something failed */ | |
| error: | |
| memset(params, 0, sizeof(*params)); | |
| free(mem_buff2); | |
| free(mem_buff); | |
| return 0; | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(EIGH_PARAMS_t *params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
| params->A, ¶ms->N, params->W, | |
| params->WORK, ¶ms->LWORK, | |
| params->RWORK, ¶ms->LRWORK, | |
| params->IWORK, ¶ms->LIWORK, | |
| &rv); | |
| return rv; | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #BASETYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# | |
| #typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
| #basetyp=npy_float,npy_double,npy_float,npy_double# | |
| #lapack_func=ssyevd,dsyevd,cheevd,zheevd# | |
| **/ | |
| /* | |
| * (M,M)->(M,)(M,M) | |
| * dimensions[1] -> M | |
| * args[0] -> A[in] | |
| * args[1] -> W | |
| * args[2] -> A[out] | |
| */ | |
| static inline void | |
| release_@lapack_func@(EIGH_PARAMS_t *params) | |
| { | |
| /* allocated memory in A and WORK */ | |
| free(params->A); | |
| free(params->WORK); | |
| memset(params, 0, sizeof(*params)); | |
| } | |
| static inline void | |
| @TYPE@_eigh_wrapper(char JOBZ, | |
| char UPLO, | |
| char**args, | |
| npy_intp* dimensions, | |
| npy_intp* steps) | |
| { | |
| ptrdiff_t outer_steps[3]; | |
| size_t iter; | |
| size_t outer_dim = *dimensions++; | |
| size_t op_count = (JOBZ=='N')?2:3; | |
| EIGH_PARAMS_t eigh_params; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| for (iter=0; iter < op_count; ++iter) { | |
| outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
| } | |
| steps += op_count; | |
| if (init_@lapack_func@(&eigh_params, | |
| JOBZ, | |
| UPLO, | |
| (fortran_int)dimensions[0])) { | |
| LINEARIZE_DATA_t matrix_in_ld; | |
| LINEARIZE_DATA_t eigenvectors_out_ld; | |
| LINEARIZE_DATA_t eigenvalues_out_ld; | |
| init_linearize_data(&matrix_in_ld, | |
| eigh_params.N, eigh_params.N, | |
| steps[1], steps[0]); | |
| init_linearize_data(&eigenvalues_out_ld, | |
| 1, eigh_params.N, | |
| 0, steps[2]); | |
| if ('V' == eigh_params.JOBZ) { | |
| init_linearize_data(&eigenvectors_out_ld, | |
| eigh_params.N, eigh_params.N, | |
| steps[4], steps[3]); | |
| } | |
| for (iter = 0; iter < outer_dim; ++iter) { | |
| int not_ok; | |
| /* copy the matrix in */ | |
| linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld); | |
| not_ok = call_@lapack_func@(&eigh_params); | |
| if (!not_ok) { | |
| /* lapack ok, copy result out */ | |
| delinearize_@BASETYPE@_matrix(args[1], | |
| eigh_params.W, | |
| &eigenvalues_out_ld); | |
| if ('V' == eigh_params.JOBZ) { | |
| delinearize_@TYPE@_matrix(args[2], | |
| eigh_params.A, | |
| &eigenvectors_out_ld); | |
| } | |
| } else { | |
| /* lapack fail, set result to nan */ | |
| error_occurred = 1; | |
| nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld); | |
| if ('V' == eigh_params.JOBZ) { | |
| nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld); | |
| } | |
| } | |
| update_pointers((npy_uint8**)args, outer_steps, op_count); | |
| } | |
| release_@lapack_func@(&eigh_params); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| */ | |
| static void | |
| @TYPE@_eighlo(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps); | |
| } | |
| static void | |
| @TYPE@_eighup(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void* NPY_UNUSED(func)) | |
| { | |
| @TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps); | |
| } | |
| static void | |
| @TYPE@_eigvalshlo(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void* NPY_UNUSED(func)) | |
| { | |
| @TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps); | |
| } | |
| static void | |
| @TYPE@_eigvalshup(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void* NPY_UNUSED(func)) | |
| { | |
| @TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps); | |
| } | |
| /**end repeat**/ | |
| /* -------------------------------------------------------------------------- */ | |
| /* Solve family (includes inv) */ | |
| typedef struct gesv_params_struct | |
| { | |
| void *A; /* A is (N,N) of base type */ | |
| void *B; /* B is (N,NRHS) of base type */ | |
| fortran_int * IPIV; /* IPIV is (N) */ | |
| fortran_int N; | |
| fortran_int NRHS; | |
| fortran_int LDA; | |
| fortran_int LDB; | |
| } GESV_PARAMS_t; | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
| #ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex# | |
| #lapack_func=sgesv,dgesv,cgesv,zgesv# | |
| */ | |
| /* | |
| * Initialize the parameters to use in for the lapack function _heev | |
| * Handles buffer allocation | |
| */ | |
| static inline int | |
| init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) | |
| { | |
| npy_uint8 *mem_buff = NULL; | |
| npy_uint8 *a, *b, *ipiv; | |
| mem_buff = malloc(N*N*sizeof(@ftyp@) + | |
| N*NRHS*sizeof(@ftyp@) + | |
| N*sizeof(fortran_int)); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| b = a + N*N*sizeof(@ftyp@); | |
| ipiv = b + N*NRHS*sizeof(@ftyp@); | |
| params->A = a; | |
| params->B = b; | |
| params->IPIV = (fortran_int*)ipiv; | |
| params->N = N; | |
| params->NRHS = NRHS; | |
| params->LDA = N; | |
| params->LDB = N; | |
| return 1; | |
| error: | |
| free(mem_buff); | |
| memset(params, 0, sizeof(*params)); | |
| return 0; | |
| } | |
| static inline void | |
| release_@lapack_func@(GESV_PARAMS_t *params) | |
| { | |
| /* memory block base is in A */ | |
| free(params->A); | |
| memset(params, 0, sizeof(*params)); | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(GESV_PARAMS_t *params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->N, ¶ms->NRHS, | |
| params->A, ¶ms->LDA, | |
| params->IPIV, | |
| params->B, ¶ms->LDB, | |
| &rv); | |
| return rv; | |
| } | |
| static void | |
| @TYPE@_solve(char **args, npy_intp *dimensions, npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| GESV_PARAMS_t params; | |
| fortran_int n, nrhs; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| INIT_OUTER_LOOP_3 | |
| n = (fortran_int)dimensions[0]; | |
| nrhs = (fortran_int)dimensions[1]; | |
| if (init_@lapack_func@(¶ms, n, nrhs)) { | |
| LINEARIZE_DATA_t a_in, b_in, r_out; | |
| init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
| init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); | |
| init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); | |
| BEGIN_OUTER_LOOP_3 | |
| int not_ok; | |
| linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
| linearize_@TYPE@_matrix(params.B, args[1], &b_in); | |
| not_ok =call_@lapack_func@(¶ms); | |
| if (!not_ok) { | |
| delinearize_@TYPE@_matrix(args[2], params.B, &r_out); | |
| } else { | |
| error_occurred = 1; | |
| nan_@TYPE@_matrix(args[2], &r_out); | |
| } | |
| END_OUTER_LOOP | |
| release_@lapack_func@(¶ms); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| static void | |
| @TYPE@_solve1(char **args, npy_intp *dimensions, npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| GESV_PARAMS_t params; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| fortran_int n; | |
| INIT_OUTER_LOOP_3 | |
| n = (fortran_int)dimensions[0]; | |
| if (init_@lapack_func@(¶ms, n, 1)) { | |
| LINEARIZE_DATA_t a_in, b_in, r_out; | |
| init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
| init_linearize_data(&b_in, 1, n, 1, steps[2]); | |
| init_linearize_data(&r_out, 1, n, 1, steps[3]); | |
| BEGIN_OUTER_LOOP_3 | |
| int not_ok; | |
| linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
| linearize_@TYPE@_matrix(params.B, args[1], &b_in); | |
| not_ok = call_@lapack_func@(¶ms); | |
| if (!not_ok) { | |
| delinearize_@TYPE@_matrix(args[2], params.B, &r_out); | |
| } else { | |
| error_occurred = 1; | |
| nan_@TYPE@_matrix(args[2], &r_out); | |
| } | |
| END_OUTER_LOOP | |
| release_@lapack_func@(¶ms); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| static void | |
| @TYPE@_inv(char **args, npy_intp *dimensions, npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| GESV_PARAMS_t params; | |
| fortran_int n; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| INIT_OUTER_LOOP_2 | |
| n = (fortran_int)dimensions[0]; | |
| if (init_@lapack_func@(¶ms, n, n)) { | |
| LINEARIZE_DATA_t a_in, r_out; | |
| init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
| init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
| BEGIN_OUTER_LOOP_2 | |
| int not_ok; | |
| linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
| identity_@TYPE@_matrix(params.B, n); | |
| not_ok = call_@lapack_func@(¶ms); | |
| if (!not_ok) { | |
| delinearize_@TYPE@_matrix(args[1], params.B, &r_out); | |
| } else { | |
| error_occurred = 1; | |
| nan_@TYPE@_matrix(args[1], &r_out); | |
| } | |
| END_OUTER_LOOP | |
| release_@lapack_func@(¶ms); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| /**end repeat**/ | |
| /* -------------------------------------------------------------------------- */ | |
| /* Cholesky decomposition */ | |
| typedef struct potr_params_struct | |
| { | |
| void *A; | |
| fortran_int N; | |
| fortran_int LDA; | |
| char UPLO; | |
| } POTR_PARAMS_t; | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #ftyp=fortran_real, fortran_doublereal, | |
| fortran_complex, fortran_doublecomplex# | |
| #lapack_func=spotrf,dpotrf,cpotrf,zpotrf# | |
| */ | |
| static inline int | |
| init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N) | |
| { | |
| npy_uint8 *mem_buff = NULL; | |
| npy_uint8 *a; | |
| mem_buff = malloc(N*N*sizeof(@ftyp@)); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| params->A = a; | |
| params->N = N; | |
| params->LDA = N; | |
| params->UPLO = UPLO; | |
| return 1; | |
| error: | |
| free(mem_buff); | |
| memset(params, 0, sizeof(*params)); | |
| return 0; | |
| } | |
| static inline void | |
| release_@lapack_func@(POTR_PARAMS_t *params) | |
| { | |
| /* memory block base in A */ | |
| free(params->A); | |
| memset(params, 0, sizeof(*params)); | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(POTR_PARAMS_t *params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->UPLO, | |
| ¶ms->N, params->A, ¶ms->LDA, | |
| &rv); | |
| return rv; | |
| } | |
| static void | |
| @TYPE@_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) | |
| { | |
| POTR_PARAMS_t params; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| fortran_int n; | |
| INIT_OUTER_LOOP_2 | |
| assert(uplo == 'L'); | |
| n = (fortran_int)dimensions[0]; | |
| if (init_@lapack_func@(¶ms, uplo, n)) | |
| { | |
| LINEARIZE_DATA_t a_in, r_out; | |
| init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
| init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
| BEGIN_OUTER_LOOP_2 | |
| int not_ok; | |
| linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
| not_ok = call_@lapack_func@(¶ms); | |
| if (!not_ok) { | |
| triu_@TYPE@_matrix(params.A, params.N); | |
| delinearize_@TYPE@_matrix(args[1], params.A, &r_out); | |
| } else { | |
| error_occurred = 1; | |
| nan_@TYPE@_matrix(args[1], &r_out); | |
| } | |
| END_OUTER_LOOP | |
| release_@lapack_func@(¶ms); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| static void | |
| @TYPE@_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_cholesky('L', args, dimensions, steps); | |
| } | |
| /**end repeat**/ | |
| /* -------------------------------------------------------------------------- */ | |
| /* eig family */ | |
| typedef struct geev_params_struct { | |
| void *A; | |
| void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ | |
| void *WI; | |
| void *VLR; /* REAL VL buffers for _geev where _ is s, d */ | |
| void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */ | |
| void *WORK; | |
| void *W; /* final w */ | |
| void *VL; /* final vl */ | |
| void *VR; /* final vr */ | |
| fortran_int N; | |
| fortran_int LDA; | |
| fortran_int LDVL; | |
| fortran_int LDVR; | |
| fortran_int LWORK; | |
| char JOBVL; | |
| char JOBVR; | |
| } GEEV_PARAMS_t; | |
| static inline void | |
| dump_geev_params(const char *name, GEEV_PARAMS_t* params) | |
| { | |
| TRACE_TXT("\n%s\n" | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %p\n"\ | |
| "\t%10s: %d\n"\ | |
| "\t%10s: %d\n"\ | |
| "\t%10s: %d\n"\ | |
| "\t%10s: %d\n"\ | |
| "\t%10s: %d\n"\ | |
| "\t%10s: %c\n"\ | |
| "\t%10s: %c\n", | |
| name, | |
| "A", params->A, | |
| "WR", params->WR, | |
| "WI", params->WI, | |
| "VLR", params->VLR, | |
| "VRR", params->VRR, | |
| "WORK", params->WORK, | |
| "W", params->W, | |
| "VL", params->VL, | |
| "VR", params->VR, | |
| "N", (int)params->N, | |
| "LDA", (int)params->LDA, | |
| "LDVL", (int)params->LDVL, | |
| "LDVR", (int)params->LDVR, | |
| "LWORK", (int)params->LWORK, | |
| "JOBVL", params->JOBVL, | |
| "JOBVR", params->JOBVR); | |
| } | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE# | |
| #CTYPE=CFLOAT,CDOUBLE# | |
| #typ=float,double# | |
| #complextyp=COMPLEX_t,DOUBLECOMPLEX_t# | |
| #lapack_func=sgeev,dgeev# | |
| #zero=0.0f,0.0# | |
| */ | |
| static inline int | |
| init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) | |
| { | |
| npy_uint8 *mem_buff=NULL; | |
| npy_uint8 *mem_buff2=NULL; | |
| npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; | |
| size_t a_size = n*n*sizeof(@typ@); | |
| size_t wr_size = n*sizeof(@typ@); | |
| size_t wi_size = n*sizeof(@typ@); | |
| size_t vlr_size = jobvl=='V' ? n*n*sizeof(@typ@) : 0; | |
| size_t vrr_size = jobvr=='V' ? n*n*sizeof(@typ@) : 0; | |
| size_t w_size = wr_size*2; | |
| size_t vl_size = vlr_size*2; | |
| size_t vr_size = vrr_size*2; | |
| size_t work_count = 0; | |
| @typ@ work_size_query; | |
| fortran_int do_size_query = -1; | |
| fortran_int rv; | |
| /* allocate data for known sizes (all but work) */ | |
| mem_buff = malloc(a_size + wr_size + wi_size + | |
| vlr_size + vrr_size + | |
| w_size + vl_size + vr_size); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| wr = a + a_size; | |
| wi = wr + wr_size; | |
| vlr = wi + wi_size; | |
| vrr = vlr + vlr_size; | |
| w = vrr + vrr_size; | |
| vl = w + w_size; | |
| vr = vl + vl_size; | |
| LAPACK(@lapack_func@)(&jobvl, &jobvr, &n, | |
| (void *)a, &n, (void *)wr, (void *)wi, | |
| (void *)vl, &n, (void *)vr, &n, | |
| &work_size_query, &do_size_query, | |
| &rv); | |
| if (0 != rv) | |
| goto error; | |
| work_count = (size_t)work_size_query; | |
| mem_buff2 = malloc(work_count*sizeof(@typ@)); | |
| if (!mem_buff2) | |
| goto error; | |
| work = mem_buff2; | |
| params->A = a; | |
| params->WR = wr; | |
| params->WI = wi; | |
| params->VLR = vlr; | |
| params->VRR = vrr; | |
| params->WORK = work; | |
| params->W = w; | |
| params->VL = vl; | |
| params->VR = vr; | |
| params->N = n; | |
| params->LDA = n; | |
| params->LDVL = n; | |
| params->LDVR = n; | |
| params->LWORK = (fortran_int)work_count; | |
| params->JOBVL = jobvl; | |
| params->JOBVR = jobvr; | |
| return 1; | |
| error: | |
| free(mem_buff2); | |
| free(mem_buff); | |
| memset(params, 0, sizeof(*params)); | |
| return 0; | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(GEEV_PARAMS_t* params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, | |
| ¶ms->N, params->A, ¶ms->LDA, | |
| params->WR, params->WI, | |
| params->VLR, ¶ms->LDVL, | |
| params->VRR, ¶ms->LDVR, | |
| params->WORK, ¶ms->LWORK, | |
| &rv); | |
| return rv; | |
| } | |
| static inline void | |
| mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n) | |
| { | |
| size_t iter; | |
| for (iter = 0; iter < n; ++iter) { | |
| c[iter].array[0] = re[iter]; | |
| c[iter].array[1] = @zero@; | |
| } | |
| } | |
| static inline void | |
| mk_@TYPE@_complex_array(@complextyp@ *c, | |
| const @typ@ *re, | |
| const @typ@ *im, | |
| size_t n) | |
| { | |
| size_t iter; | |
| for (iter = 0; iter < n; ++iter) { | |
| c[iter].array[0] = re[iter]; | |
| c[iter].array[1] = im[iter]; | |
| } | |
| } | |
| static inline void | |
| mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c, | |
| const @typ@ *r, | |
| size_t n) | |
| { | |
| size_t iter; | |
| for (iter = 0; iter < n; ++iter) { | |
| @typ@ re = r[iter]; | |
| @typ@ im = r[iter+n]; | |
| c[iter].array[0] = re; | |
| c[iter].array[1] = im; | |
| c[iter+n].array[0] = re; | |
| c[iter+n].array[1] = -im; | |
| } | |
| } | |
| /* | |
| * make the complex eigenvectors from the real array produced by sgeev/zgeev. | |
| * c is the array where the results will be left. | |
| * r is the source array of reals produced by sgeev/zgeev | |
| * i is the eigenvalue imaginary part produced by sgeev/zgeev | |
| * n is so that the order of the matrix is n by n | |
| */ | |
| static inline void | |
| mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c, | |
| const @typ@ *r, | |
| const @typ@ *i, | |
| size_t n) | |
| { | |
| size_t iter = 0; | |
| while (iter < n) | |
| { | |
| if (i[iter] == @zero@) { | |
| /* eigenvalue was real, eigenvectors as well... */ | |
| mk_@TYPE@_complex_array_from_real(c, r, n); | |
| c += n; | |
| r += n; | |
| iter ++; | |
| } else { | |
| /* eigenvalue was complex, generate a pair of eigenvectors */ | |
| mk_@TYPE@_complex_array_conjugate_pair(c, r, n); | |
| c += 2*n; | |
| r += 2*n; | |
| iter += 2; | |
| } | |
| } | |
| } | |
| static inline void | |
| process_@lapack_func@_results(GEEV_PARAMS_t *params) | |
| { | |
| /* REAL versions of geev need the results to be translated | |
| * into complex versions. This is the way to deal with imaginary | |
| * results. In our gufuncs we will always return complex arrays! | |
| */ | |
| mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N); | |
| /* handle the eigenvectors */ | |
| if ('V' == params->JOBVL) { | |
| mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR, | |
| params->WI, params->N); | |
| } | |
| if ('V' == params->JOBVR) { | |
| mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR, | |
| params->WI, params->N); | |
| } | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=CFLOAT,CDOUBLE# | |
| #typ=COMPLEX_t,DOUBLECOMPLEX_t# | |
| #ftyp=fortran_complex,fortran_doublecomplex# | |
| #realtyp=float,double# | |
| #lapack_func=cgeev,zgeev# | |
| */ | |
| static inline int | |
| init_@lapack_func@(GEEV_PARAMS_t* params, | |
| char jobvl, | |
| char jobvr, | |
| fortran_int n) | |
| { | |
| npy_uint8 *mem_buff = NULL; | |
| npy_uint8 *mem_buff2 = NULL; | |
| npy_uint8 *a, *w, *vl, *vr, *work, *rwork; | |
| size_t a_size = n*n*sizeof(@ftyp@); | |
| size_t w_size = n*sizeof(@ftyp@); | |
| size_t vl_size = jobvl=='V'? n*n*sizeof(@ftyp@) : 0; | |
| size_t vr_size = jobvr=='V'? n*n*sizeof(@ftyp@) : 0; | |
| size_t rwork_size = 2*n*sizeof(@realtyp@); | |
| size_t work_count = 0; | |
| @typ@ work_size_query; | |
| fortran_int do_size_query = -1; | |
| fortran_int rv; | |
| size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; | |
| mem_buff = malloc(total_size); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| w = a + a_size; | |
| vl = w + w_size; | |
| vr = vl + vl_size; | |
| rwork = vr + vr_size; | |
| LAPACK(@lapack_func@)(&jobvl, &jobvr, &n, | |
| (void *)a, &n, (void *)w, | |
| (void *)vl, &n, (void *)vr, &n, | |
| (void *)&work_size_query, &do_size_query, | |
| (void *)rwork, | |
| &rv); | |
| if (0 != rv) | |
| goto error; | |
| work_count = (size_t) work_size_query.array[0]; | |
| mem_buff2 = malloc(work_count*sizeof(@ftyp@)); | |
| if (!mem_buff2) | |
| goto error; | |
| work = mem_buff2; | |
| params->A = a; | |
| params->WR = rwork; | |
| params->WI = NULL; | |
| params->VLR = NULL; | |
| params->VRR = NULL; | |
| params->VL = vl; | |
| params->VR = vr; | |
| params->WORK = work; | |
| params->W = w; | |
| params->N = n; | |
| params->LDA = n; | |
| params->LDVL = n; | |
| params->LDVR = n; | |
| params->LWORK = (fortran_int)work_count; | |
| params->JOBVL = jobvl; | |
| params->JOBVR = jobvr; | |
| return 1; | |
| error: | |
| free(mem_buff2); | |
| free(mem_buff); | |
| memset(params, 0, sizeof(*params)); | |
| return 0; | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(GEEV_PARAMS_t* params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, | |
| ¶ms->N, params->A, ¶ms->LDA, | |
| params->W, | |
| params->VL, ¶ms->LDVL, | |
| params->VR, ¶ms->LDVR, | |
| params->WORK, ¶ms->LWORK, | |
| params->WR, /* actually RWORK */ | |
| &rv); | |
| return rv; | |
| } | |
| static inline void | |
| process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params)) | |
| { | |
| /* nothing to do here, complex versions are ready to copy out */ | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CDOUBLE# | |
| #COMPLEXTYPE=CFLOAT,CDOUBLE,CDOUBLE# | |
| #ftype=fortran_real,fortran_doublereal,fortran_doublecomplex# | |
| #lapack_func=sgeev,dgeev,zgeev# | |
| */ | |
| static inline void | |
| release_@lapack_func@(GEEV_PARAMS_t *params) | |
| { | |
| free(params->WORK); | |
| free(params->A); | |
| memset(params, 0, sizeof(*params)); | |
| } | |
| static inline void | |
| @TYPE@_eig_wrapper(char JOBVL, | |
| char JOBVR, | |
| char**args, | |
| npy_intp* dimensions, | |
| npy_intp* steps) | |
| { | |
| ptrdiff_t outer_steps[4]; | |
| size_t iter; | |
| size_t outer_dim = *dimensions++; | |
| size_t op_count = 2; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| GEEV_PARAMS_t geev_params; | |
| assert(JOBVL == 'N'); | |
| STACK_TRACE; | |
| op_count += 'V'==JOBVL?1:0; | |
| op_count += 'V'==JOBVR?1:0; | |
| for (iter=0; iter < op_count; ++iter) { | |
| outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
| } | |
| steps += op_count; | |
| if (init_@lapack_func@(&geev_params, | |
| JOBVL, JOBVR, | |
| (fortran_int)dimensions[0])) { | |
| LINEARIZE_DATA_t a_in; | |
| LINEARIZE_DATA_t w_out; | |
| LINEARIZE_DATA_t vl_out; | |
| LINEARIZE_DATA_t vr_out; | |
| init_linearize_data(&a_in, | |
| geev_params.N, geev_params.N, | |
| steps[1], steps[0]); | |
| steps += 2; | |
| init_linearize_data(&w_out, | |
| 1, geev_params.N, | |
| 0, steps[0]); | |
| steps += 1; | |
| if ('V' == geev_params.JOBVL) { | |
| init_linearize_data(&vl_out, | |
| geev_params.N, geev_params.N, | |
| steps[1], steps[0]); | |
| steps += 2; | |
| } | |
| if ('V' == geev_params.JOBVR) { | |
| init_linearize_data(&vr_out, | |
| geev_params.N, geev_params.N, | |
| steps[1], steps[0]); | |
| } | |
| for (iter = 0; iter < outer_dim; ++iter) { | |
| int not_ok; | |
| char **arg_iter = args; | |
| /* copy the matrix in */ | |
| linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in); | |
| not_ok = call_@lapack_func@(&geev_params); | |
| if (!not_ok) { | |
| process_@lapack_func@_results(&geev_params); | |
| delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, | |
| geev_params.W, | |
| &w_out); | |
| if ('V' == geev_params.JOBVL) | |
| delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, | |
| geev_params.VL, | |
| &vl_out); | |
| if ('V' == geev_params.JOBVR) | |
| delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, | |
| geev_params.VR, | |
| &vr_out); | |
| } else { | |
| /* geev failed */ | |
| error_occurred = 1; | |
| nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out); | |
| if ('V' == geev_params.JOBVL) | |
| nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out); | |
| if ('V' == geev_params.JOBVR) | |
| nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out); | |
| } | |
| update_pointers((npy_uint8**)args, outer_steps, op_count); | |
| } | |
| release_@lapack_func@(&geev_params); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| static void | |
| @TYPE@_eig(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_eig_wrapper('N', 'V', args, dimensions, steps); | |
| } | |
| static void | |
| @TYPE@_eigvals(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_eig_wrapper('N', 'N', args, dimensions, steps); | |
| } | |
| /**end repeat**/ | |
| /* -------------------------------------------------------------------------- */ | |
| /* singular value decomposition */ | |
| typedef struct gessd_params_struct | |
| { | |
| void *A; | |
| void *S; | |
| void *U; | |
| void *VT; | |
| void *WORK; | |
| void *RWORK; | |
| void *IWORK; | |
| fortran_int M; | |
| fortran_int N; | |
| fortran_int LDA; | |
| fortran_int LDU; | |
| fortran_int LDVT; | |
| fortran_int LWORK; | |
| char JOBZ; | |
| } GESDD_PARAMS_t; | |
| static inline void | |
| dump_gesdd_params(const char *name, | |
| GESDD_PARAMS_t *params) | |
| { | |
| TRACE_TXT("\n%s:\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18p\n"\ | |
| "%14s: %18d\n"\ | |
| "%14s: %18d\n"\ | |
| "%14s: %18d\n"\ | |
| "%14s: %18d\n"\ | |
| "%14s: %18d\n"\ | |
| "%14s: %18d\n"\ | |
| "%14s: %15c'%c'\n", | |
| name, | |
| "A", params->A, | |
| "S", params->S, | |
| "U", params->U, | |
| "VT", params->VT, | |
| "WORK", params->WORK, | |
| "RWORK", params->RWORK, | |
| "IWORK", params->IWORK, | |
| "M", (int)params->M, | |
| "N", (int)params->N, | |
| "LDA", (int)params->LDA, | |
| "LDU", (int)params->LDU, | |
| "LDVT", (int)params->LDVT, | |
| "LWORK", (int)params->LWORK, | |
| "JOBZ", ' ',params->JOBZ); | |
| } | |
| static inline int | |
| compute_urows_vtcolumns(char jobz, | |
| fortran_int m, fortran_int n, | |
| fortran_int *urows, fortran_int *vtcolumns) | |
| { | |
| fortran_int min_m_n = m<n?m:n; | |
| switch(jobz) | |
| { | |
| case 'N': | |
| *urows = 0; | |
| *vtcolumns = 0; | |
| break; | |
| case 'A': | |
| *urows = m; | |
| *vtcolumns = n; | |
| break; | |
| case 'S': | |
| { | |
| *urows = min_m_n; | |
| *vtcolumns = min_m_n; | |
| } | |
| break; | |
| default: | |
| return 0; | |
| } | |
| return 1; | |
| } | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE# | |
| #lapack_func=sgesdd,dgesdd# | |
| #ftyp=fortran_real,fortran_doublereal# | |
| */ | |
| static inline int | |
| init_@lapack_func@(GESDD_PARAMS_t *params, | |
| char jobz, | |
| fortran_int m, | |
| fortran_int n) | |
| { | |
| npy_uint8 *mem_buff = NULL; | |
| npy_uint8 *mem_buff2 = NULL; | |
| npy_uint8 *a, *s, *u, *vt, *work, *iwork; | |
| size_t a_size = (size_t)m*(size_t)n*sizeof(@ftyp@); | |
| fortran_int min_m_n = m<n?m:n; | |
| size_t s_size = ((size_t)min_m_n)*sizeof(@ftyp@); | |
| fortran_int u_row_count, vt_column_count; | |
| size_t u_size, vt_size; | |
| fortran_int work_count; | |
| size_t work_size; | |
| size_t iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
| if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
| goto error; | |
| u_size = ((size_t)u_row_count)*m*sizeof(@ftyp@); | |
| vt_size = n*((size_t)vt_column_count)*sizeof(@ftyp@); | |
| mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| s = a + a_size; | |
| u = s + s_size; | |
| vt = u + u_size; | |
| iwork = vt + vt_size; | |
| /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
| vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
| { | |
| /* compute optimal work size */ | |
| @ftyp@ work_size_query; | |
| fortran_int do_query = -1; | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(&jobz, &m, &n, | |
| (void*)a, &m, (void*)s, (void*)u, &m, | |
| (void*)vt, &vt_column_count, | |
| &work_size_query, &do_query, | |
| (void*)iwork, &rv); | |
| if (0!=rv) | |
| goto error; | |
| work_count = (fortran_int)work_size_query; | |
| work_size = (size_t)work_count * sizeof(@ftyp@); | |
| } | |
| mem_buff2 = malloc(work_size); | |
| if (!mem_buff2) | |
| goto error; | |
| work = mem_buff2; | |
| params->M = m; | |
| params->N = n; | |
| params->A = a; | |
| params->S = s; | |
| params->U = u; | |
| params->VT = vt; | |
| params->WORK = work; | |
| params->RWORK = NULL; | |
| params->IWORK = iwork; | |
| params->M = m; | |
| params->N = n; | |
| params->LDA = m; | |
| params->LDU = m; | |
| params->LDVT = vt_column_count; | |
| params->LWORK = work_count; | |
| params->JOBZ = jobz; | |
| return 1; | |
| error: | |
| TRACE_TXT("%s failed init\n", __FUNCTION__); | |
| free(mem_buff); | |
| free(mem_buff2); | |
| memset(params, 0, sizeof(*params)); | |
| return 0; | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(GESDD_PARAMS_t *params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
| params->A, ¶ms->LDA, | |
| params->S, | |
| params->U, ¶ms->LDU, | |
| params->VT, ¶ms->LDVT, | |
| params->WORK, ¶ms->LWORK, | |
| params->IWORK, | |
| &rv); | |
| return rv; | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=CFLOAT,CDOUBLE# | |
| #ftyp=fortran_complex,fortran_doublecomplex# | |
| #frealtyp=fortran_real,fortran_doublereal# | |
| #typ=COMPLEX_t,DOUBLECOMPLEX_t# | |
| #lapack_func=cgesdd,zgesdd# | |
| */ | |
| static inline int | |
| init_@lapack_func@(GESDD_PARAMS_t *params, | |
| char jobz, | |
| fortran_int m, | |
| fortran_int n) | |
| { | |
| npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; | |
| npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; | |
| size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; | |
| fortran_int u_row_count, vt_column_count, work_count; | |
| fortran_int min_m_n = m<n?m:n; | |
| if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
| goto error; | |
| a_size = ((size_t)m)*((size_t)n)*sizeof(@ftyp@); | |
| s_size = ((size_t)min_m_n)*sizeof(@frealtyp@); | |
| u_size = ((size_t)u_row_count)*m*sizeof(@ftyp@); | |
| vt_size = n*((size_t)vt_column_count)*sizeof(@ftyp@); | |
| rwork_size = 'N'==jobz? | |
| 7*((size_t)min_m_n) : | |
| (5*(size_t)min_m_n*(size_t)min_m_n + 5*(size_t)min_m_n); | |
| rwork_size *= sizeof(@ftyp@); | |
| iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
| mem_buff = malloc(a_size + | |
| s_size + | |
| u_size + | |
| vt_size + | |
| rwork_size + | |
| iwork_size); | |
| if (!mem_buff) | |
| goto error; | |
| a = mem_buff; | |
| s = a + a_size; | |
| u = s + s_size; | |
| vt = u + u_size; | |
| rwork = vt + vt_size; | |
| iwork = rwork + rwork_size; | |
| /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
| vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
| { | |
| /* compute optimal work size */ | |
| @ftyp@ work_size_query; | |
| fortran_int do_query = -1; | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(&jobz, &m, &n, | |
| (void*)a, &m, (void*)s, (void*)u, &m, | |
| (void*)vt, &vt_column_count, | |
| &work_size_query, &do_query, | |
| (void*)rwork, | |
| (void*)iwork, &rv); | |
| if (0!=rv) | |
| goto error; | |
| work_count = (fortran_int)((@typ@*)&work_size_query)->array[0]; | |
| work_size = (size_t)work_count * sizeof(@ftyp@); | |
| } | |
| mem_buff2 = malloc(work_size); | |
| if (!mem_buff2) | |
| goto error; | |
| work = mem_buff2; | |
| params->A = a; | |
| params->S = s; | |
| params->U = u; | |
| params->VT = vt; | |
| params->WORK = work; | |
| params->RWORK = rwork; | |
| params->IWORK = iwork; | |
| params->M = m; | |
| params->N = n; | |
| params->LDA = m; | |
| params->LDU = m; | |
| params->LDVT = vt_column_count; | |
| params->LWORK = work_count; | |
| params->JOBZ = jobz; | |
| return 1; | |
| error: | |
| TRACE_TXT("%s failed init\n", __FUNCTION__); | |
| free(mem_buff2); | |
| free(mem_buff); | |
| memset(params, 0, sizeof(*params)); | |
| return 0; | |
| } | |
| static inline fortran_int | |
| call_@lapack_func@(GESDD_PARAMS_t *params) | |
| { | |
| fortran_int rv; | |
| LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
| params->A, ¶ms->LDA, | |
| params->S, | |
| params->U, ¶ms->LDU, | |
| params->VT, ¶ms->LDVT, | |
| params->WORK, ¶ms->LWORK, | |
| params->RWORK, | |
| params->IWORK, | |
| &rv); | |
| return rv; | |
| } | |
| /**end repeat**/ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# | |
| #lapack_func=sgesdd,dgesdd,cgesdd,zgesdd# | |
| */ | |
| static inline void | |
| release_@lapack_func@(GESDD_PARAMS_t* params) | |
| { | |
| /* A and WORK contain allocated blocks */ | |
| free(params->A); | |
| free(params->WORK); | |
| memset(params, 0, sizeof(*params)); | |
| } | |
| static inline void | |
| @TYPE@_svd_wrapper(char JOBZ, | |
| char **args, | |
| npy_intp* dimensions, | |
| npy_intp* steps) | |
| { | |
| ptrdiff_t outer_steps[4]; | |
| int error_occurred = get_fp_invalid_and_clear(); | |
| size_t iter; | |
| size_t outer_dim = *dimensions++; | |
| size_t op_count = (JOBZ=='N')?2:4; | |
| GESDD_PARAMS_t params; | |
| for (iter=0; iter < op_count; ++iter) { | |
| outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
| } | |
| steps += op_count; | |
| if (init_@lapack_func@(¶ms, | |
| JOBZ, | |
| (fortran_int)dimensions[0], | |
| (fortran_int)dimensions[1])) { | |
| LINEARIZE_DATA_t a_in, u_out, s_out, v_out; | |
| init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); | |
| if ('N' == params.JOBZ) { | |
| /* only the singular values are wanted */ | |
| fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
| init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); | |
| } else { | |
| fortran_int u_columns, v_rows; | |
| fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
| if ('S' == params.JOBZ) { | |
| u_columns = min_m_n; | |
| v_rows = min_m_n; | |
| } else { | |
| u_columns = params.M; | |
| v_rows = params.N; | |
| } | |
| init_linearize_data(&u_out, | |
| u_columns, params.M, | |
| steps[3], steps[2]); | |
| init_linearize_data(&s_out, | |
| 1, min_m_n, | |
| 0, steps[4]); | |
| init_linearize_data(&v_out, | |
| params.N, v_rows, | |
| steps[6], steps[5]); | |
| } | |
| for (iter = 0; iter < outer_dim; ++iter) { | |
| int not_ok; | |
| /* copy the matrix in */ | |
| linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
| not_ok = call_@lapack_func@(¶ms); | |
| if (!not_ok) { | |
| if ('N' == params.JOBZ) { | |
| delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out); | |
| } else { | |
| delinearize_@TYPE@_matrix(args[1], params.U, &u_out); | |
| delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out); | |
| delinearize_@TYPE@_matrix(args[3], params.VT, &v_out); | |
| } | |
| } else { | |
| error_occurred = 1; | |
| if ('N' == params.JOBZ) { | |
| nan_@REALTYPE@_matrix(args[1], &s_out); | |
| } else { | |
| nan_@TYPE@_matrix(args[1], &u_out); | |
| nan_@REALTYPE@_matrix(args[2], &s_out); | |
| nan_@TYPE@_matrix(args[3], &v_out); | |
| } | |
| } | |
| update_pointers((npy_uint8**)args, outer_steps, op_count); | |
| } | |
| release_@lapack_func@(¶ms); | |
| } | |
| set_fp_invalid_or_clear(error_occurred); | |
| } | |
| /**end repeat*/ | |
| /* svd gufunc entry points */ | |
| /**begin repeat | |
| #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
| */ | |
| static void | |
| @TYPE@_svd_N(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_svd_wrapper('N', args, dimensions, steps); | |
| } | |
| static void | |
| @TYPE@_svd_S(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_svd_wrapper('S', args, dimensions, steps); | |
| } | |
| static void | |
| @TYPE@_svd_A(char **args, | |
| npy_intp *dimensions, | |
| npy_intp *steps, | |
| void *NPY_UNUSED(func)) | |
| { | |
| @TYPE@_svd_wrapper('A', args, dimensions, steps); | |
| } | |
| /**end repeat**/ | |
| #pragma GCC diagnostic pop | |
| /* -------------------------------------------------------------------------- */ | |
| /* gufunc registration */ | |
| static void *array_of_nulls[] = { | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL, | |
| (void *)NULL | |
| }; | |
| #define FUNC_ARRAY_NAME(NAME) NAME ## _funcs | |
| #define GUFUNC_FUNC_ARRAY_REAL(NAME) \ | |
| static PyUFuncGenericFunction \ | |
| FUNC_ARRAY_NAME(NAME)[] = { \ | |
| FLOAT_ ## NAME, \ | |
| DOUBLE_ ## NAME \ | |
| } | |
| #define GUFUNC_FUNC_ARRAY_REAL_COMPLEX(NAME) \ | |
| static PyUFuncGenericFunction \ | |
| FUNC_ARRAY_NAME(NAME)[] = { \ | |
| FLOAT_ ## NAME, \ | |
| DOUBLE_ ## NAME, \ | |
| CFLOAT_ ## NAME, \ | |
| CDOUBLE_ ## NAME \ | |
| } | |
| /* There are problems with eig in complex single precision. | |
| * That kernel is disabled | |
| */ | |
| #define GUFUNC_FUNC_ARRAY_EIG(NAME) \ | |
| static PyUFuncGenericFunction \ | |
| FUNC_ARRAY_NAME(NAME)[] = { \ | |
| FLOAT_ ## NAME, \ | |
| DOUBLE_ ## NAME, \ | |
| CDOUBLE_ ## NAME \ | |
| } | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); | |
| GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); | |
| GUFUNC_FUNC_ARRAY_EIG(eig); | |
| GUFUNC_FUNC_ARRAY_EIG(eigvals); | |
| static char equal_2_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_CFLOAT, | |
| NPY_CDOUBLE, NPY_CDOUBLE | |
| }; | |
| static char equal_3_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, | |
| NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE | |
| }; | |
| /* second result is logdet, that will always be a REAL */ | |
| static char slogdet_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, | |
| NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE | |
| }; | |
| static char eigh_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, | |
| NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE | |
| }; | |
| static char eighvals_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_FLOAT, | |
| NPY_CDOUBLE, NPY_DOUBLE | |
| }; | |
| static char eig_types[] = { | |
| NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT, | |
| NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, | |
| NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE | |
| }; | |
| static char eigvals_types[] = { | |
| NPY_FLOAT, NPY_CFLOAT, | |
| NPY_DOUBLE, NPY_CDOUBLE, | |
| NPY_CDOUBLE, NPY_CDOUBLE | |
| }; | |
| static char svd_1_1_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_FLOAT, | |
| NPY_CDOUBLE, NPY_DOUBLE | |
| }; | |
| static char svd_1_3_types[] = { | |
| NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
| NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
| NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, | |
| NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE | |
| }; | |
| typedef struct gufunc_descriptor_struct { | |
| char *name; | |
| char *signature; | |
| char *doc; | |
| int ntypes; | |
| int nin; | |
| int nout; | |
| PyUFuncGenericFunction *funcs; | |
| char *types; | |
| } GUFUNC_DESCRIPTOR_t; | |
| GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { | |
| { | |
| "slogdet", | |
| "(m,m)->(),()", | |
| "slogdet on the last two dimensions and broadcast on the rest. \n"\ | |
| "Results in two arrays, one with sign and the other with log of the"\ | |
| " determinants. \n"\ | |
| " \"(m,m)->(),()\" \n", | |
| 4, 1, 2, | |
| FUNC_ARRAY_NAME(slogdet), | |
| slogdet_types | |
| }, | |
| { | |
| "det", | |
| "(m,m)->()", | |
| "det of the last two dimensions and broadcast on the rest. \n"\ | |
| " \"(m,m)->()\" \n", | |
| 4, 1, 1, | |
| FUNC_ARRAY_NAME(det), | |
| equal_2_types | |
| }, | |
| { | |
| "eigh_lo", | |
| "(m,m)->(m),(m,m)", | |
| "eigh on the last two dimension and broadcast to the rest, using"\ | |
| " lower triangle \n"\ | |
| "Results in a vector of eigenvalues and a matrix with the"\ | |
| "eigenvectors. \n"\ | |
| " \"(m,m)->(m),(m,m)\" \n", | |
| 4, 1, 2, | |
| FUNC_ARRAY_NAME(eighlo), | |
| eigh_types | |
| }, | |
| { | |
| "eigh_up", | |
| "(m,m)->(m),(m,m)", | |
| "eigh on the last two dimension and broadcast to the rest, using"\ | |
| " upper triangle. \n"\ | |
| "Results in a vector of eigenvalues and a matrix with the"\ | |
| " eigenvectors. \n"\ | |
| " \"(m,m)->(m),(m,m)\" \n", | |
| 4, 1, 2, | |
| FUNC_ARRAY_NAME(eighup), | |
| eigh_types | |
| }, | |
| { | |
| "eigvalsh_lo", | |
| "(m,m)->(m)", | |
| "eigh on the last two dimension and broadcast to the rest, using"\ | |
| " lower triangle. \n"\ | |
| "Results in a vector of eigenvalues and a matrix with the"\ | |
| "eigenvectors. \n"\ | |
| " \"(m,m)->(m)\" \n", | |
| 4, 1, 1, | |
| FUNC_ARRAY_NAME(eigvalshlo), | |
| eighvals_types | |
| }, | |
| { | |
| "eigvalsh_up", | |
| "(m,m)->(m)", | |
| "eigvalsh on the last two dimension and broadcast to the rest,"\ | |
| " using upper triangle. \n"\ | |
| "Results in a vector of eigenvalues and a matrix with the"\ | |
| "eigenvectors.\n"\ | |
| " \"(m,m)->(m)\" \n", | |
| 4, 1, 1, | |
| FUNC_ARRAY_NAME(eigvalshup), | |
| eighvals_types | |
| }, | |
| { | |
| "solve", | |
| "(m,m),(m,n)->(m,n)", | |
| "solve the system a x = b, on the last two dimensions, broadcast"\ | |
| " to the rest. \n"\ | |
| "Results in a matrices with the solutions. \n"\ | |
| " \"(m,m),(m,n)->(m,n)\" \n", | |
| 4, 2, 1, | |
| FUNC_ARRAY_NAME(solve), | |
| equal_3_types | |
| }, | |
| { | |
| "solve1", | |
| "(m,m),(m)->(m)", | |
| "solve the system a x = b, for b being a vector, broadcast in"\ | |
| " the outer dimensions. \n"\ | |
| "Results in vectors with the solutions. \n"\ | |
| " \"(m,m),(m)->(m)\" \n", | |
| 4,2,1, | |
| FUNC_ARRAY_NAME(solve1), | |
| equal_3_types | |
| }, | |
| { | |
| "inv", | |
| "(m,m)->(m,m)", | |
| "compute the inverse of the last two dimensions and broadcast"\ | |
| " to the rest. \n"\ | |
| "Results in the inverse matrices. \n"\ | |
| " \"(m,m)->(m,m)\" \n", | |
| 4,1,1, | |
| FUNC_ARRAY_NAME(inv), | |
| equal_2_types | |
| }, | |
| { | |
| "cholesky_lo", | |
| "(m,m)->(m,m)", | |
| "cholesky decomposition of hermitian positive-definite matrices. \n"\ | |
| "Broadcast to all outer dimensions. \n"\ | |
| " \"(m,m)->(m,m)\" \n", | |
| 4, 1, 1, | |
| FUNC_ARRAY_NAME(cholesky_lo), | |
| equal_2_types | |
| }, | |
| { | |
| "svd_m", | |
| "(m,n)->(m)", | |
| "svd when n>=m. ", | |
| 4, 1, 1, | |
| FUNC_ARRAY_NAME(svd_N), | |
| svd_1_1_types | |
| }, | |
| { | |
| "svd_n", | |
| "(m,n)->(n)", | |
| "svd when n<=m", | |
| 4, 1, 1, | |
| FUNC_ARRAY_NAME(svd_N), | |
| svd_1_1_types | |
| }, | |
| { | |
| "svd_m_s", | |
| "(m,n)->(m,m),(m),(m,n)", | |
| "svd when m>=n", | |
| 4, 1, 3, | |
| FUNC_ARRAY_NAME(svd_S), | |
| svd_1_3_types | |
| }, | |
| { | |
| "svd_n_s", | |
| "(m,n)->(m,n),(n),(n,n)", | |
| "svd when m>=n", | |
| 4, 1, 3, | |
| FUNC_ARRAY_NAME(svd_S), | |
| svd_1_3_types | |
| }, | |
| { | |
| "svd_m_f", | |
| "(m,n)->(m,m),(m),(n,n)", | |
| "svd when m>=n", | |
| 4, 1, 3, | |
| FUNC_ARRAY_NAME(svd_A), | |
| svd_1_3_types | |
| }, | |
| { | |
| "svd_n_f", | |
| "(m,n)->(m,m),(n),(n,n)", | |
| "svd when m>=n", | |
| 4, 1, 3, | |
| FUNC_ARRAY_NAME(svd_A), | |
| svd_1_3_types | |
| }, | |
| { | |
| "eig", | |
| "(m,m)->(m),(m,m)", | |
| "eig on the last two dimension and broadcast to the rest. \n"\ | |
| "Results in a vector with the eigenvalues and a matrix with the"\ | |
| " eigenvectors. \n"\ | |
| " \"(m,m)->(m),(m,m)\" \n", | |
| 3, 1, 2, | |
| FUNC_ARRAY_NAME(eig), | |
| eig_types | |
| }, | |
| { | |
| "eigvals", | |
| "(m,m)->(m)", | |
| "eigvals on the last two dimension and broadcast to the rest. \n"\ | |
| "Results in a vector of eigenvalues. \n"\ | |
| " \"(m,m)->(m),(m,m)\" \n", | |
| 3, 1, 1, | |
| FUNC_ARRAY_NAME(eigvals), | |
| eigvals_types | |
| }, | |
| }; | |
| static void | |
| addUfuncs(PyObject *dictionary) { | |
| PyObject *f; | |
| int i; | |
| const int gufunc_count = sizeof(gufunc_descriptors)/ | |
| sizeof(gufunc_descriptors[0]); | |
| for (i=0; i < gufunc_count; i++) { | |
| GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i]; | |
| f = PyUFunc_FromFuncAndDataAndSignature(d->funcs, | |
| array_of_nulls, | |
| d->types, | |
| d->ntypes, | |
| d->nin, | |
| d->nout, | |
| PyUFunc_None, | |
| d->name, | |
| d->doc, | |
| 0, | |
| d->signature); | |
| PyDict_SetItemString(dictionary, d->name, f); | |
| #if 0 | |
| dump_ufunc_object((PyUFuncObject*) f); | |
| #endif | |
| Py_DECREF(f); | |
| } | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* Module initialization stuff */ | |
| static PyMethodDef UMath_LinAlgMethods[] = { | |
| {NULL, NULL, 0, NULL} /* Sentinel */ | |
| }; | |
| #if defined(NPY_PY3K) | |
| static struct PyModuleDef moduledef = { | |
| PyModuleDef_HEAD_INIT, | |
| UMATH_LINALG_MODULE_NAME, | |
| NULL, | |
| -1, | |
| UMath_LinAlgMethods, | |
| NULL, | |
| NULL, | |
| NULL, | |
| NULL | |
| }; | |
| #endif | |
| #if defined(NPY_PY3K) | |
| #define RETVAL m | |
| PyObject *PyInit__umath_linalg(void) | |
| #else | |
| #define RETVAL | |
| PyMODINIT_FUNC | |
| init_umath_linalg(void) | |
| #endif | |
| { | |
| PyObject *m; | |
| PyObject *d; | |
| PyObject *version; | |
| init_constants(); | |
| #if defined(NPY_PY3K) | |
| m = PyModule_Create(&moduledef); | |
| #else | |
| m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods); | |
| #endif | |
| if (m == NULL) | |
| return RETVAL; | |
| import_array(); | |
| import_ufunc(); | |
| d = PyModule_GetDict(m); | |
| version = PyString_FromString(umath_linalg_version_string); | |
| PyDict_SetItemString(d, "__version__", version); | |
| Py_DECREF(version); | |
| /* Load the ufunc operators into the module's namespace */ | |
| addUfuncs(d); | |
| if (PyErr_Occurred()) { | |
| PyErr_SetString(PyExc_RuntimeError, | |
| "cannot load _umath_linalg module."); | |
| } | |
| return RETVAL; | |
| } | |